This file is a merged representation of a subset of the codebase, containing files not matching ignore patterns, combined into a single document by Repomix.
The content has been processed where content has been compressed (code blocks are separated by ⋮---- delimiter).

<file_summary>
This section contains a summary of this file.

<purpose>
This file contains a packed representation of a subset of the repository's contents that is considered the most important context.
It is designed to be easily consumable by AI systems for analysis, code review,
or other automated processes.
</purpose>

<file_format>
The content is organized as follows:
1. This summary section
2. Repository information
3. Directory structure
4. Repository files (if enabled)
5. Multiple file entries, each consisting of:
  - File path as an attribute
  - Full contents of the file
</file_format>

<usage_guidelines>
- This file should be treated as read-only. Any changes should be made to the
  original repository files, not this packed version.
- When processing this file, use the file path to distinguish
  between different files in the repository.
- Be aware that this file may contain sensitive information. Handle it with
  the same level of security as you would the original repository.
- Pay special attention to the Repository Description. These contain important context and guidelines specific to this project.
</usage_guidelines>

<notes>
- Some files may have been excluded based on .gitignore rules and Repomix's configuration
- Binary files are not included in this packed representation. Please refer to the Repository Structure section for a complete list of file paths, including binary files
- Files matching these patterns are excluded: .*/**, !.github, !.github/**, data/**, **/data/**, **/*.csv, logs/**, build/**, dist/**, *.egg-info/**, __pycache__/**, **/*.pyc, .pytest_cache/**, .mypy_cache/**, .ruff_cache/**, venv/**, env/**, .env, **/*.json, llms.txt, llms-compressed.txt, llms-128k.txt, llms-no-tests.txt, llms-no-tests-compressed.txt, llms-no-tests-no-examples.txt, llms-no-tests-no-examples-compressed.txt, file-list.txt, file-list-updated.txt, examples/data/**, examples/docqa/docs/**, examples/logs/**, tests/cache/**, tests/logs/**, **/*.pkl, **/*.pickle, **/*.db, **/*.sqlite, **/*.log, **/node_modules/**, **/*.min.js, **/*.map, coverage/**, htmlcov/**, .coverage, *.orig, *.tmp, *.bak, *.swp, *.swo, **/docker-compose*.yml, visual_log.sh, **/*_converted.md, **/page_*.md, **/page-*.md, tests/main/dummy-pages/**, tests/main/data/**/*.txt, **/*.pb2.py, **/*.pb2_grpc.py
- Files matching patterns in .gitignore are excluded
- Files matching default ignore patterns are excluded
- Content has been compressed - code blocks are separated by ⋮---- delimiter
- Files are sorted by Git change count (files with more changes are at the bottom)
</notes>

</file_summary>

<user_provided_header>
Langroid Repository Export for LLM Analysis
</user_provided_header>

<directory_structure>
ai-instructions/
  claude-repomix-instructions.md
ai-notes/
  handler-parameter-analysis-notes.md
  Langroid-repo-docs.md
  repomix-plan.md
docs/
  blog/
    posts/
      chat-completion.md
      langroid-architecture.md
      langroid-intro.md
      langroid-knowledge-graph.md
      langroid-lancedb.md
      local-llm-formatting.md
      local-llm.md
      malade.md
      multi-agent-debate.md
      test.md
    .authors.yml
    index.md
  demos/
    targeting/
      audience-targeting.md
  examples/
    agent-tree.md
    guide.md
  javascripts/
    mathjax.js
  notes/
    async-streaming.md
    azure-openai-models.md
    chunking.md
    code-injection-protection.md
    crawl4ai.md
    custom-azure-client.md
    enriching-for-retrieval.md
    file-input.md
    gemini.md
    glhf-chat.md
    handle-llm-no-tool.md
    html-logger.md
    knowledge-graphs.md
    langdb.md
    large-tool-results.md
    litellm-proxy.md
    llama-cpp-embeddings.md
    llm-pdf-parser.md
    marker-pdf.md
    markitdown.md
    mcp-tools.md
    message-routing.md
    openai-client-caching.md
    openai-http-client.md
    overview.md
    pgvector.md
    pinecone.md
    portkey.md
    pydantic-v2-migration.md
    qdrant-resource-cleanup.md
    quiet-mode.md
    reasoning-content.md
    seltz_search.md
    structured-output.md
    task-termination.md
    task-tool.md
    tavily_search.md
    tool-message-handler.md
    url_loader.md
    weaviate.md
    xml-tools.md
  overrides/
    partials/
      comments.html
  quick-start/
    chat-agent-docs.md
    chat-agent-tool.md
    chat-agent.md
    index.md
    llm-interaction.md
    multi-agent-task-delegation.md
    setup.md
    three-agent-chat-num-router.md
    three-agent-chat-num.md
    two-agent-chat-num.md
  stylesheets/
    extra.css
  tutorials/
    langroid-tour.md
    llm-usage-options.md
    local-llm-setup.md
    non-openai-llms.md
    postgresql-agent.md
    supported-models.md
  auto_docstring.py
  FAQ.md
  index.md
issues/
  pydantic-v2-migration/
    examples-errors.md
    migration-checking-log.md
    pr-pydantic-v2-fixes.md
    PYDANTIC_V2_MIGRATION_TASK_SPECIFICATION.md
    pydantic-migration-checking-instructions.md
    PYRANTIC-V2-MIGRATION-PLAN.md
  20251010-concurrent-rag-status.md
  20251010-concurrent-rag.md
  20251011-cross-encoder-race-bug.md
  20251011-pr-926-description.md
  20251107-fix-mcp-dectorator.md
  20251123-new-model-support-gpt51-gemini30.md
  898-implementation.md
  html-logger-implementation.md
  html-logger.md
  issue-919-llamacpp-embeddings.md
  llm-client-caching-phase1-summary.md
  llm-client-caching-phase2-summary.md
  llm-client-caching-test-summary.md
  llm-client-caching.md
  pr-882-cached-tokens-improvements.md
  pr-openai-client-caching.md
  pr-qdrant-lock-fix.md
  qdrant-lock-issue-spec-changes.md
langroid/
  agent/
    callbacks/
      chainlit.py
    special/
      arangodb/
        arangodb_agent.py
        system_messages.py
        tools.py
        utils.py
      lance_rag/
        __init__.py
        critic_agent.py
        lance_rag_task.py
        query_planner_agent.py
      neo4j/
        csv_kg_chat.py
        neo4j_chat_agent.py
        system_messages.py
        tools.py
      sql/
        utils/
          __init__.py
          description_extractors.py
          populate_metadata.py
          system_message.py
          tools.py
        __init__.py
        sql_chat_agent.py
      __init__.py
      doc_chat_agent.py
      lance_doc_chat_agent.py
      lance_tools.py
      relevance_extractor_agent.py
      retriever_agent.py
      table_chat_agent.py
    tools/
      mcp/
        __init__.py
        decorators.py
        fastmcp_client.py
      __init__.py
      duckduckgo_search_tool.py
      exa_search_tool.py
      file_tools.py
      google_search_tool.py
      metaphor_search_tool.py
      orchestration.py
      recipient_tool.py
      retrieval_tool.py
      rewind_tool.py
      segment_extract_tool.py
      seltz_search_tool.py
      task_tool.py
      tavily_search_tool.py
    __init__.py
    base.py
    batch.py
    chat_agent.py
    chat_document.py
    done_sequence_parser.py
    openai_assistant.py
    task.py
    tool_message.py
    xml_tool_message.py
  cachedb/
    __init__.py
    base.py
    redis_cachedb.py
  embedding_models/
    protoc/
      embeddings_pb2_grpc.py
      embeddings_pb2.py
      embeddings_pb2.pyi
      embeddings.proto
    __init__.py
    base.py
    models.py
    remote_embeds.py
  language_models/
    prompt_formatter/
      __init__.py
      base.py
      hf_formatter.py
      llama2_formatter.py
    __init__.py
    azure_openai.py
    base.py
    client_cache.py
    config.py
    mock_lm.py
    model_info.py
    openai_gpt.py
    provider_params.py
    utils.py
  parsing/
    __init__.py
    agent_chats.py
    code_parser.py
    document_parser.py
    file_attachment.py
    md_parser.py
    para_sentence_split.py
    parse_json.py
    parser.py
    pdf_utils.py
    repo_loader.py
    routing.py
    search.py
    spider.py
    table_loader.py
    url_loader.py
    urls.py
    utils.py
    web_search.py
  prompts/
    __init__.py
    dialog.py
    prompts_config.py
    templates.py
  pydantic_v1/
    __init__.py
    main.py
  utils/
    algorithms/
      __init__.py
      graph.py
    output/
      __init__.py
      citations.py
      printing.py
      status.py
    __init__.py
    configuration.py
    constants.py
    git_utils.py
    globals.py
    html_logger.py
    logging.py
    object_registry.py
    pandas_utils.py
    pydantic_utils.py
    system.py
    types.py
  vector_store/
    __init__.py
    base.py
    chromadb.py
    lancedb.py
    meilisearch.py
    pineconedb.py
    postgres.py
    qdrantdb.py
    weaviatedb.py
  __init__.py
  exceptions.py
  mytypes.py
plugins/
  langroid/
    skills/
      add-pattern/
        SKILL.md
      patterns/
        agent-handler-validation-with-state.md
        agent-tool-handler-with-state.md
        done-sequences-specific-tool.md
        mcp-tool-integration.md
        quiet-mode.md
        run-batch-tasks.md
        SKILL.md
        task-return-tool.md
release-notes/
  v0-56-0-task-tool.md
  v0-56-11-openai-client-caching.md
  v0-56-12-cached-tokens-support.md
  v0-56-13-done-sequences-parent-chain-fixes.md
  v0-56-15-response-sequence-tracking.md
  v0-56-2-table-chat-fix.md
  v0-56-4-handler-params.md
  v0-56-6-doc-chat-refactor.md
  v0-56-7-doc-chat-deprecation-fix.md
  v0-56-8-task-tool-spawn-example.md
  v0-56-9-rrf-crossencoder-fixes.md
  v0-58-0-crawl4ai-integration.md
  v0.57.0-html-logger.md
scripts/
  fix-pydantic-imports.sh
.blackignore
.coveragerc
.env-template
.gitignore
.pre-commit-config.yaml
bump_version.sh
chainlit.md
CLAUDE.md
CODE_OF_CONDUCT.md
CONTRIBUTING.md
Dockerfile
LICENSE
Makefile
mkdocs.yml
PR_954_REVIEW.md
PR_REVIEW_975.md
pyproject.toml
pytest.ini
README.md
SECURITY.md
setup.cfg
</directory_structure>

<files>
This section contains the contents of the repository's files.

<file path="ai-instructions/claude-repomix-instructions.md">
# AI Instructions for Setting Up Repomix

## Task Overview
Set up [repomix](https://github.com/yamadashy/repomix) to generate LLM-friendly repository exports. This creates text files that can be uploaded to AI models for code analysis.

## Steps to Complete

### 1. Install Repomix
```bash
npm install -g repomix
```

### 2. Create repomix.config.json
Create a configuration file in the repository root with:
- **Include patterns**: Source code files (*.py, *.js, *.md, *.yaml, *.yml, *.toml)
- **Exclude patterns**: Data directories, logs, node_modules, JSON files, generated files
- **Security check**: Enable to prevent sensitive data inclusion

### 3. Configure Include/Exclude Patterns
- Include only source code directories and documentation
- Exclude data/, logs/, build artifacts, dependencies
- Add `llms*.txt` to exclusions to prevent recursive inclusion

### 4. Test Configuration (Optional)
```bash
# Generate file list only for inspection
repomix --no-files -o file-list.txt
```
This allows you to review which files will be included before generating the full output.

### 5. Generate Output Versions

Use the Makefile targets to generate repomix files:

```bash
# Generate all variants (recommended)
make repomix-all

# Or generate specific versions:
make repomix                      # llms.txt and llms-compressed.txt (includes tests)
make repomix-no-tests             # llms-no-tests.txt and llms-no-tests-compressed.txt
make repomix-no-tests-no-examples # llms-no-tests-no-examples.txt and compressed version
```

All commands use `git ls-files` to ensure only git-tracked files are included.

### 6. Verify Results
- Check file sizes and token counts in repomix output
- Ensure no sensitive data is included
- Confirm only relevant source files are packaged

## Expected Outcome
Six text files optimized for different LLM contexts:
- `llms.txt`: Full version with tests and examples (870K tokens)
- `llms-compressed.txt`: Compressed version with tests and examples (513K tokens)
- `llms-no-tests.txt`: Full version without tests (677K tokens)
- `llms-no-tests-compressed.txt`: Compressed version without tests (433K tokens)
- `llms-no-tests-no-examples.txt`: Core library code only (no tests/examples)
- `llms-no-tests-no-examples-compressed.txt`: Compressed core library code (285K tokens)

The files contain only git-tracked source code with proper exclusions for clean, focused LLM consumption.
</file>

<file path="ai-notes/handler-parameter-analysis-notes.md">
# Handler Parameter Analysis Notes

## Overview

This document summarizes the investigation into how Langroid analyzes handler method parameters in `langroid/agent/base.py`, specifically focusing on the `_analyze_handler_params` method and its role in creating handler wrappers.

## Key Methods and Call Chain

### Call Chain
1. `_get_tool_list()` - Registers tool messages and their handlers
2. `_create_handler_wrapper()` - Creates wrapper functions for handlers
3. `_analyze_handler_params()` - Analyzes handler method signatures

## How _analyze_handler_params Works

The `_analyze_handler_params` method (lines 253-313 in agent/base.py) analyzes a handler method's signature to identify:
- Whether it has type annotations
- Which parameter is the agent parameter
- Which parameter is the chat_doc parameter

### Analysis Process (Updated Implementation)
1. **Type Annotation Check**: First checks if parameters have type annotations
   - **Direct Class Checking** (NEW): For simple class annotations like `Agent` or `ChatAgent`:
     - Uses `inspect.isclass(param.annotation) and issubclass(param.annotation, Agent)`
     - This works because Python stores the actual class object in the annotation
   - **Direct Identity Check** (NEW): For ChatDocument:
     - Uses `param.annotation is ChatDocument` for exact match
   - **String-based Fallback**: For complex type hints like `Optional[Agent]`:
     - Falls back to checking if "Agent" is in the string representation
     - Necessary because complex generic types aren't simple class objects

2. **Fallback to Parameter Names**: If no annotations found
   - Looks for parameter named `agent`
   - Looks for parameter named `chat_doc`

### Key Insight: Type Annotations Are Objects
The crucial realization is that Python's type annotation system stores actual class references when possible:
- `def handler(agent: Agent):` → `param.annotation` contains the actual `Agent` class object
- `def handler(agent: Optional[Agent]):` → `param.annotation` contains a complex type object that requires string inspection
- This allows direct `issubclass()` checks for simple annotations, making the analysis more accurate and robust

## How _create_handler_wrapper Works

Based on the analysis from `_analyze_handler_params`, the wrapper creates different function signatures:
- No parameters → `wrapper(obj)`
- Both agent and chat_doc → `wrapper(obj, chat_doc)` with correct parameter order
- Only agent → `wrapper(obj)` passing agent internally
- Only chat_doc → `wrapper(obj, chat_doc)`

## Why Direct Type Checking Works (Clarification)

Initially, we believed runtime type checking wasn't feasible because we confused two different concepts:

### The Misconception
We thought we needed runtime values to check parameter types, but this was incorrect. The confusion arose from:
1. Thinking we needed actual parameter values to determine their types
2. Not realizing that type annotations are stored as Python objects in the function signature

### The Reality: Static Analysis of Type Annotations
1. **Type annotations are available at definition time**: When Python parses `def handler(agent: Agent):`, it stores the `Agent` class object in the function's signature
2. **No runtime values needed**: We're checking the type annotations themselves, not the runtime values
3. **Direct class comparison is possible**: For simple type hints, `param.annotation` contains the actual class object, allowing `issubclass()` checks

### Why This Approach Works
1. **Setup Time Analysis**: We analyze the handler signature when tools are registered, using the stored annotation objects
2. **Direct Type Checking**: For simple annotations like `Agent`, we can use `issubclass(param.annotation, Agent)`
3. **Fallback for Complex Types**: For generic types like `Optional[Agent]`, we fall back to string matching
4. **Performance**: Still analyzes once at setup, no runtime overhead

## Current Design Benefits
- Analyzes handler signatures once at setup time
- Creates wrappers with exact signatures needed
- No runtime ambiguity about parameter arrangement
- Clear error messages if handler signatures don't match expectations

## Implementation Changes Summary

### Recent Updates to _analyze_handler_params
The method was enhanced to support direct type checking of handler parameters:

1. **Direct Class Checking for Agent Types**:
   ```python
   if inspect.isclass(param.annotation) and issubclass(param.annotation, Agent):
   ```
   - Checks if the annotation is a direct class reference to Agent or its subclasses
   - More accurate than string matching alone

2. **Direct Identity Check for ChatDocument**:
   ```python
   if param.annotation is ChatDocument:
   ```
   - Uses identity comparison for exact ChatDocument type matching

3. **Improved Parameter Extraction**:
   - Changed from `[p for p in params if p.name != "self"]` to `params[1:]`
   - More reliable for removing the 'self' parameter

4. **Fallback Strategy**:
   - Still uses string matching for complex type hints like `Optional[Agent]`
   - Maintains backward compatibility while improving accuracy

## Related PR
This investigation was prompted by PR #861 "MCP updates" which made changes to how `FastMCPServer` forwards image context and resources, and added optional persistence for MCP server connections. The handler parameter analysis improvements were made to support more robust type checking for MCP tool handlers.
</file>

<file path="ai-notes/Langroid-repo-docs.md">
# CLAUDE.md

This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.

## Commands

### Development
- Install core dependencies: `pip install -e .`
- Install dev dependencies: `pip install -e ".[dev]"`
- Install specific feature groups:
  - Document chat features: `pip install -e ".[doc-chat]"`
  - Database features: `pip install -e ".[db]"`
  - HuggingFace embeddings: `pip install -e ".[hf-embeddings]"`
  - All features: `pip install -e ".[all]"`
- Run linting and type checking: `make check`
- Format code: `make lint`

### Testing
- Run all tests: `pytest tests/`
- Run specific test: `pytest tests/main/test_file.py::test_function`
- Run tests with coverage: `pytest --cov=langroid tests/`
- Run only main tests: `make tests` (uses `pytest tests/main`)

### Linting and Type Checking
- Lint code: `make check` (runs black, ruff check, mypy)
- Format only: `make lint` (runs black and ruff fix)
- Type check only: `make type-check`
- Always use `make check` to run lints + mypy before trying to commit changes

### Version and Release Management
- Bump version: `./bump_version.sh [patch|minor|major]`
- Or use make commands:
  - `make all-patch` - Bump patch version, build, push, release
  - `make all-minor` - Bump minor version, build, push, release
  - `make all-major` - Bump major version, build, push, release

## Architecture

Langroid is a framework for building LLM-powered agents that can use tools and collaborate with each other.

### Core Components:

1. **Agents** (`langroid/agent/`):
   - `chat_agent.py` - Base ChatAgent that can converse and use tools
   - `task.py` - Handles execution flow for agents
   - `special/` - Domain-specific agents (doc chat, table chat, SQL chat, etc.)
   - `openai_assistant.py` - Integration with OpenAI Assistant API

2. **Tools** (`langroid/agent/tools/`):
   - Tool system for agents to interact with external systems
   - `tool_message.py` - Protocol for tool messages
   - Various search tools (Google, DuckDuckGo, Tavily, Exa, etc.)

3. **Language Models** (`langroid/language_models/`):
   - Abstract interfaces for different LLM providers
   - Implementations for OpenAI, Azure, local models, etc.
   - Support for hundreds of LLMs via LiteLLM

4. **Vector Stores** (`langroid/vector_store/`):
   - Abstract interface and implementations for different vector databases
   - Includes support for Qdrant, Chroma, LanceDB, Pinecone, PGVector, Weaviate

5. **Document Processing** (`langroid/parsing/`):
   - Parse and process documents from various formats
   - Chunk text for embedding and retrieval
   - Support for PDF, DOCX, images, and more

6. **Embedding Models** (`langroid/embedding_models/`):
   - Abstract interface for embedding generation
   - Support for OpenAI, HuggingFace, and custom embeddings

### Key Multi-Agent Patterns:

- **Task Delegation**: Agents can delegate tasks to other agents through hierarchical task structures
- **Message Passing**: Agents communicate by transforming and passing messages
- **Collaboration**: Multiple agents can work together on complex tasks

### Key Security Features:

- The `full_eval` flag in both `TableChatAgentConfig` and `VectorStoreConfig` controls code injection protection
- Defaults to `False` for security, set to `True` only in trusted environments

## Documentation

- Main documentation is in the `docs/` directory
- Examples in the `examples/` directory demonstrate usage patterns
- Quick start examples available in `examples/quick-start/`

## MCP (Model Context Protocol) Tools Integration

Langroid provides comprehensive support for MCP tools through the `langroid.agent.tools.mcp` module. Here are the key patterns and approaches:

### MCP Tool Creation Methods

#### 1. Using the `@mcp_tool` Decorator (Module Level)
```python
from langroid.agent.tools.mcp import mcp_tool
from fastmcp.client.transports import StdioTransport

transport = StdioTransport(command="...", args=[...])

@mcp_tool(transport, "tool_name")
class MyTool(lr.ToolMessage):
    async def handle_async(self):
        result = await self.call_tool_async()
        # custom processing
        return result
```

**Important**: The decorator creates the transport connection at module import time, so it must be used at module level (not inside async functions).

#### 2. Using `get_tool_async` (Inside Async Functions)
```python
from langroid.agent.tools.mcp.fastmcp_client import get_tool_async

async def main():
    transport = StdioTransport(command="...", args=[...])
    BaseTool = await get_tool_async(transport, "tool_name")
    
    class MyTool(BaseTool):
        async def handle_async(self):
            result = await self.call_tool_async()
            # custom processing
            return result
```

**Use this approach when**:
- Creating tools inside async functions
- Need to avoid event loop conflicts
- Want to delay transport creation until runtime

### Transport Types and Event Loop Considerations

- **StdioTransport**: Creates subprocess immediately, can cause "event loop closed" errors if created at module level in certain contexts
- **SSETransport**: HTTP-based, generally safer for module-level creation
- **Best Practice**: Create transports inside async functions when possible, use `asyncio.run()` wrapper for Fire CLI integration

### Tool Message Request Field and Agent Handlers

When you get an MCP tool named "my_tool", Langroid automatically:

1. **Sets the `request` field**: The dynamically created ToolMessage subclass has `request = "my_tool"`
2. **Enables custom agent handlers**: Agents can define these methods:
   - `my_tool()` - synchronous handler
   - `my_tool_async()` - async handler

The agent's message routing system automatically calls these handlers when the tool is used.

### Custom `handle_async` Method Override

Both decorator and non-decorator approaches support overriding `handle_async`:

```python
class MyTool(BaseTool):  # or use @mcp_tool decorator
    async def handle_async(self):
        # Get raw result from MCP server
        result = await self.call_tool_async()
        
        # Option 1: Return processed result to LLM (continues conversation)
        return f"<ProcessedResult>{result}</ProcessedResult>"
        
        # Option 2: Return ResultTool to terminate task
        return MyResultTool(answer=result)
```

### Common Async Issues and Solutions

**Problem**: "RuntimeError: asyncio.run() cannot be called from a running event loop"
**Solution**: Use `get_tool_async` instead of `@mcp_tool` decorator when already in async context

**Problem**: "RuntimeError: Event loop is closed"
**Solution**: 
- Move transport creation inside async functions
- Use `asyncio.run()` wrapper for Fire CLI integration:
```python
if __name__ == "__main__":
    import asyncio
    def run_main(**kwargs):
        asyncio.run(main(**kwargs))
    Fire(run_main)
```

### MCP Tool Integration Examples

See `examples/mcp/` for working examples:
- `gitmcp.py` - HTTP-based SSE transport
- `pyodide_code_executor.py` - Subprocess-based stdio transport with proper async handling

## Testing and Tool Message Patterns

### MockLM for Testing Tool Generation
- Use `MockLM` with `response_dict` to simulate LLM responses that include tool messages
- Set `tools=[ToolClass]` or `enable_message=[ToolClass]` on the agent to enable tool handling
- The `try_get_tool_messages()` method can extract tool messages from LLM responses with `all_tools=True`

### Task Termination Control
- `TaskConfig` has `done_if_tool` parameter to terminate tasks when any tool is generated
- `Task.done()` method checks `result.agent_response` for tool content when this flag is set
- Useful for workflows where tool generation signals task completion

### Testing Tool-Based Task Flows
```python
# Example: Test task termination on tool generation
config = TaskConfig(done_if_tool=True)
task = Task(agent, config=config)
response_dict = {"content": '{"request": "my_tool", "param": "value"}'}
```

## Multi-Agent System Development

### Important Patterns and Best Practices

#### 1. Pydantic Imports
**ALWAYS import Pydantic classes from `langroid.pydantic_v1`**, not from `pydantic` directly:
```python
# CORRECT
from langroid.pydantic_v1 import Field, BaseModel

# WRONG - will cause issues
from pydantic import Field, BaseModel
```

#### 2. Tool Name References in System Messages
When referencing tool names in f-strings within system messages, use the `.name()` method:
```python
system_message: str = f"""
Use {MyTool.name()} to perform the action.
"""
```
This works at module level in configs, but be aware that complex initialization at module level can sometimes cause issues.

#### 3. Agent Configuration with LLM
Always specify the LLM configuration explicitly in agent configs:
```python
class MyAgentConfig(lr.ChatAgentConfig):
    name: str = "MyAgent"
    llm: lm.OpenAIGPTConfig = lm.OpenAIGPTConfig(
        chat_model="gpt-4",  # or "gpt-4.1" etc.
    )
    system_message: str = "..."
```

#### 4. Tool Organization in Multi-Agent Systems
When tools delegate to agents:
- Define agent configs and agents BEFORE the tools that use them
- Tools can directly instantiate agents in their `handle()` methods:
```python
class MyTool(lr.ToolMessage):
    def handle(self) -> str:
        agent = MyAgent(MyAgentConfig())
        task = lr.Task(agent, interactive=False)
        result = task.run(prompt)
        return result.content
```

#### 5. Task Termination with Done Sequences
Use `done_sequences` for precise task termination control:
```python
# For a task that should complete after: Tool -> Agent handles -> LLM responds
task = lr.Task(
    agent,
    interactive=False,
    config=lr.TaskConfig(done_sequences=["T,A,L"]),
)
```

Common patterns:
- `"T,A"` - Tool used and handled by agent
- `"T,A,L"` - Tool used, handled, then LLM responds
- `"T[specific_tool],A"` - Specific tool used and handled

See `docs/notes/task-termination.md` for comprehensive documentation.

#### 6. Handling Non-Tool LLM Responses
Use `handle_llm_no_tool` in agent configs to handle cases where the LLM forgets to use a tool:
```python
class MyAgentConfig(lr.ChatAgentConfig):
    handle_llm_no_tool: str = "You FORGOT to use one of your TOOLs!"
```

#### 7. Agent Method Parameters
Note that `ChatAgentConfig` does not have a `use_tools` parameter. Instead, enable tools on the agent after creation:
```python
agent = MyAgent(config)
agent.enable_message([Tool1, Tool2, Tool3])  # Pass list of tool classes
```

## Commit and Pull Request Guidelines

- Never include "co-authored by Claude Code" or "created by Claude" in commit messages or pull request descriptions

## Codecov Badge Fix (June 2025)

- Fixed broken Codecov badge in README by removing the token parameter from the URL
- Changed from `https://codecov.io/gh/langroid/langroid/branch/main/graph/badge.svg?token=H94BX5F0TE` to `https://codecov.io/gh/langroid/langroid/graph/badge.svg`
- Tokens are not needed for public repositories and can cause GitHub rendering issues
</file>

<file path="ai-notes/repomix-plan.md">
## Plan to Add llms-no-tests.txt

### Overview
Create a third version of the repomix output that excludes all test files from the `tests/` directory. This will provide a more concise version focused only on source code without test implementations.

### Steps:

1. **Create ai-scratchpads directory and save this plan** ✓
   - Create directory: `mkdir -p ai-scratchpads`
   - Save this plan to `ai-scratchpads/repomix-plan.md`

2. **Create temporary repomix configuration**
   - Copy existing `repomix.config.json` to `repomix-no-tests.config.json`
   - Add `"tests/**"` to the `customPatterns` array in the `ignore` section
   - Add `"llms-no-tests.txt"` to the ignore patterns to prevent recursive inclusion

3. **Generate the new output file**
   - Run: `repomix --config repomix-no-tests.config.json -o llms-no-tests.txt`
   - This will create a new file excluding all test files

4. **Clean up and update documentation**
   - Remove the temporary `repomix-no-tests.config.json` file
   - Update `ai-instructions/claude-repomix-instructions.md` to mention the third variant
   - Add a note about generating the no-tests version with the command:
     ```bash
     # No-tests version (excludes tests directory)
     repomix --config repomix-no-tests.config.json -o llms-no-tests.txt
     ```

### Expected Result
- A new file `llms-no-tests.txt` that contains all source code except test files
- This will be smaller than the standard `llms.txt` but larger than `llms-compressed.txt`
- Useful for LLM analysis when test implementations are not needed

### File Size Expectations
Based on the current setup:
- `llms.txt`: ~3.3 MB (782K tokens)
- `llms-compressed.txt`: ~1.6 MB (434K tokens)
- `llms-no-tests.txt`: Expected to be between these sizes, excluding test code

## Results and Conclusions

### Actual Token Counts
After generating all variants, here are the actual token counts:
- `llms.txt`: 782K tokens (standard version with tests)
- `llms-compressed.txt`: 434K tokens (compressed version with tests)
- `llms-no-tests.txt`: 652K tokens (no tests version)
- `llms-no-tests-compressed.txt`: 400K tokens (compressed no-tests version)

### Key Observations
1. **Limited Impact of Excluding Tests**: Removing test files only reduced tokens by ~130K (17% reduction), suggesting that test files don't constitute a major portion of the codebase.

2. **Compression More Effective**: The compression feature provides a much more significant reduction (~45-50% reduction) compared to just excluding tests.

3. **Minimal Benefit of Combined Approach**: The compressed no-tests version (400K) is only marginally smaller than the compressed version with tests (434K) - a difference of just 34K tokens or ~8%.

### Recommendations
- For most use cases, the standard `llms-compressed.txt` (434K tokens) is likely sufficient
- The no-tests variants might be useful for specific scenarios where test implementation details would confuse the LLM or are explicitly not needed
- The marginal benefit of excluding tests doesn't justify maintaining multiple variants unless there's a specific need

### Files Created
- `repomix-no-tests.config.json` - Permanent config file for generating no-tests versions
- `llms-no-tests.txt` - Full version without tests (652K tokens)
- `llms-no-tests-compressed.txt` - Compressed version without tests (400K tokens)
</file>

<file path="docs/blog/posts/chat-completion.md">
---
title: 'Language Models: Completion and Chat-Completion'
draft: false
date: 2023-09-19
authors: 
  - pchalasani
categories:
  - langroid
  - llm
  - local-llm
  - chat
comments: true
---

Transformer-based language models are fundamentally next-token predictors, so 
naturally all LLM APIs today at least provide a completion endpoint. 
If an LLM is a next-token predictor, how could it possibly be used to 
generate a response to a question or instruction, or to engage in a conversation with 
a human user? This is where the idea of "chat-completion" comes in.
This post is a refresher on the distinction between completion and chat-completion,
and some interesting details on how chat-completion is implemented in practice.

<!-- more -->

## Language Models as Next-token Predictors

A Language Model is essentially a "next-token prediction" model,
and so all LLMs today provide a "completion" endpoint, typically something like:
`/completions` under the base URL.

The endpoint simply takes a prompt and returns a completion (i.e. a continuation).

A typical prompt sent to a completion endpoint might look like this:
```
The capital of Belgium is 
```
and the LLM will return a completion like this:
```
Brussels.
```
OpenAI's GPT3 is an example of a pure completion LLM.
But interacting with a completion LLM is not very natural or useful:
you cannot give instructions or ask questions; instead you would always need to 
formulate your input as a prompt whose natural continuation is your desired output.
For example, if you wanted the LLM to highlight all proper nouns in a sentence,
you would format it as the following prompt:

**Chat-To-Prompt Example:** Chat/Instruction converted to a completion prompt.

```
User: here is a sentence, the Assistant's task is to identify all proper nouns.
     Jack lives in Bosnia, and Jill lives in Belgium.
Assistant:    
```
The natural continuation of this prompt would be a response listing the proper nouns,
something like:
```
John, Bosnia, Jill, Belgium are all proper nouns.
```

This _seems_ sensible in theory, but a "base" LLM that performs well on completions
may _not_ perform well on these kinds of prompts. The reason is that during its training, it may not
have been exposed to very many examples of this type of prompt-response pair.
So how can an LLM be improved to perform well on these kinds of prompts?

## Instruction-tuned, Aligned LLMs 

This brings us to the heart of the innovation behind the wildly popular ChatGPT:
it uses an enhancement of GPT3 that (besides having a lot more parameters),
was _explicitly_ fine-tuned on instructions (and dialogs more generally) -- this is referred to
as **instruction-fine-tuning** or IFT for short. In addition to fine-tuning instructions/dialogs,
the models behind ChatGPT (i.e., GPT-3.5-Turbo and GPT-4) are further tuned to produce
responses that _align_ with human preferences (i.e. produce responses that are more helpful and safe),
using a procedure called Reinforcement Learning with Human Feedback (RLHF).
See this [OpenAI InstructGPT Paper](https://arxiv.org/pdf/2203.02155.pdf) for details on these techniques and references to the 
original papers that introduced these ideas. Another recommended read is Sebastian 
Raschka's post on [RLHF and related techniques](https://magazine.sebastianraschka.com/p/llm-training-rlhf-and-its-alternatives). 

For convenience, we refer to the combination of IFT and RLHF as **chat-tuning**.
A chat-tuned LLM can be expected to perform well on prompts such as the one in 
the Chat-To-Prompt Example above. These types of prompts are still unnatural, however, 
so as a convenience, chat-tuned LLM API servers also provide a "chat-completion" 
endpoint (typically `/chat/completions` under the base URL), which allows the user
to interact with them in a natural dialog, which might look like this
(the portions in square brackets are indicators of who is generating the text):

```
[User] What is the capital of Belgium?
[Assistant] The capital of Belgium is Brussels.
```
or
```
[User] In the text below, find all proper nouns:
    Jack lives in Bosnia, and Jill lives in Belgium.
[Assistant] John, Bosnia, Jill, Belgium are all proper nouns.
[User] Where does John live?
[Assistant] John lives in Bosnia.
```

## Chat Completion Endpoints: under the hood

How could this work, given that LLMs are fundamentally next-token predictors?
This is a convenience provided by the LLM API service (e.g. from OpenAI or
local model server libraries):
when a user invokes the chat-completion endpoint (typically
at `/chat/completions` under the base URL), under the hood, the server converts the
instructions and multi-turn chat history into a single string, with annotations indicating
user and assistant turns, and ending with something like "Assistant:"
as in the Chat-To-Prompt Example above.

Now the subtle detail to note here is this:

>It matters _how_ the
dialog (instructions plus chat history) is converted into a single prompt string.
Converting to a single prompt by simply concatenating the
instructions and chat history using an "intuitive" format (e.g. indicating
user, assistant turns using "User", "Assistant:", etc.) _can_ work,
however most local LLMs are trained on a _specific_ prompt format.
So if we format chats in a different way, we may get odd/inferior results.

## Converting Chats to Prompts: Formatting Rules

For example, the llama2 models are trained on a format where the user's input is bracketed within special strings `[INST]`
and `[/INST]`. There are other requirements that we don't go into here, but
interested readers can refer to these links:

- A reddit thread on the [llama2 formats](https://www.reddit.com/r/LocalLLaMA/comments/155po2p/get_llama_2_prompt_format_right/)
- Facebook's [llama2 code](https://github.com/facebookresearch/llama/blob/main/llama/generation.py#L44)
- Langroid's [llama2 formatting code](https://github.com/langroid/langroid/blob/main/langroid/language_models/prompt_formatter/llama2_formatter.py)

A dialog fed to a Llama2 model in its expected prompt format would look like this:

```
<s>[INST] <<SYS>>
You are a helpful assistant.
<</SYS>>

Hi there! 
[/INST] 
Hello! How can I help you today? </s>
<s>[INST] In the text below, find all proper nouns:
    Jack lives in Bosnia, and Jill lives in Belgium.
 [/INST] 
John, Bosnia, Jill, Belgium are all proper nouns. </s><s> 
[INST] Where does Jack live? [/INST] 
Jack lives in Bosnia. </s><s>
[INST] And Jill? [/INST]
Jill lives in Belgium. </s><s>
[INST] Which are its neighboring countries? [/INST]
```

This means that if an LLM server library wants to provide a chat-completion endpoint for
a local model, it needs to provide a way to convert chat history to a single prompt
using the specific formatting rules of the model.
For example the [`oobabooga/text-generation-webui`](https://github.com/oobabooga/text-generation-webui) 
library has an extensive set of chat formatting [templates](https://github.com/oobabooga/text-generation-webui/tree/main/instruction-templates)
for a variety of models, and their model server auto-detects the
format template from the model name.

!!! note "Chat completion model names: look for 'chat' or 'instruct' in the name"
    You can search for a variety of models on the [HuggingFace model hub](https://huggingface.co/models).
    For example if you see a name `Llama-2-70B-chat-GGUF` you know it is chat-tuned.
    Another example of a chat-tuned model is `Llama-2-7B-32K-Instruct` 
    
A user of these local LLM server libraries thus has two options when using a 
local model in chat mode:

- use the _chat-completion_ endpoint, and let the underlying library handle the chat-to-prompt formatting, or
- first format the chat history according to the model's requirements, and then use the
  _completion_ endpoint

## Using Local Models in Langroid

Local models can be used in Langroid by defining a `LocalModelConfig` object.
More details are in this [tutorial](https://langroid.github.io/langroid/blog/2023/09/14/using-langroid-with-local-llms/), 
but here we briefly discuss prompt-formatting in this context.
Langroid provides a built-in [formatter for LLama2 models](https://github.com/langroid/langroid/blob/main/langroid/language_models/prompt_formatter/llama2_formatter.py), 
so users looking to use llama2 models with langroid can try either of these options, by setting the
`use_completion_for_chat` flag in the `LocalModelConfig` object
(See the local-LLM [tutorial](https://langroid.github.io/langroid/blog/2023/09/14/using-langroid-with-local-llms/) for details).

When this flag is set to `True`, the chat history is formatted using the built-in 
Langroid llama2 formatter and the completion endpoint are used. When the flag is set to `False`, the chat 
history is sent directly to the chat-completion endpoint, which internally converts the 
chat history to a prompt in the expected llama2 format.

For local models other than Llama2, users can either:

- write their own formatters by writing a class similar to `Llama2Formatter` and 
then setting the `use_completion_for_chat` flag to `True` in the `LocalModelConfig` object, or
- use an LLM server library (such as the `oobabooga` library mentioned above) that provides a chat-completion endpoint, 
_and converts chats to single prompts under the hood,_ and set the
  `use_completion_for_chat` flag to `False` in the `LocalModelConfig` object.

You can use a similar approach if you are using an LLM application framework other than Langroid.


<iframe src="https://langroid.substack.com/embed" width="480" height="320" style="border:1px solid #EEE; background:white;" frameborder="0" scrolling="no"></iframe>
</file>

<file path="docs/blog/posts/langroid-architecture.md">
---
title: "Overview of Langroid's Multi-Agent Architecture (prelim)"
draft: false
date: 2024-08-15
authors:
- pchalasani
- nils
- jihye
- someshjha
categories:
- langroid
- multi-agent
- llm
comments: true
---


## Agent, as an intelligent message transformer

A natural and convenient abstraction in designing a complex
LLM-powered system is the notion of an *agent* that is instructed to be responsible for a specific aspect of the 
overall task. In terms of code, an *Agent* is essentially a class representing an intelligent entity that can 
respond to *messages*, i.e., an agent is simply a *message transformer*.
An agent typically encapsulates an (interface to an) LLM, and may also be equipped with so-called *tools* (as 
described below) and *external documents/data* (e.g., via a vector database, as described below).
Much like a team of humans, agents interact by exchanging messages, in a manner reminiscent of the 
[*actor framework*](https://en.wikipedia.org/wiki/Actor_model) in programming languages.
An *orchestration mechanism* is needed to manage the flow of messages between agents, to ensure that progress is 
made towards completion of the task, and to handle the inevitable cases where an agent deviates from instructions.
Langroid is founded on this *multi-agent programming* paradigm, where agents are 
first-class citizens, acting as message transformers, and communicate by exchanging messages.

<!-- more -->

To build useful applications with LLMs, we need to endow them with the ability to
trigger actions (such as API calls, computations, database queries, etc) or send structured messages to other agents 
or downstream processes. *Tools* provide these capabilities, described next.

## Tools, also known as functions

An LLM is essentially a text transformer; i.e.,  in response to some input text, 
it produces a text response. Free-form text responses are ideal when we want to generate a description, answer, or summary for human consumption, or even a question for another agent to answer.
However, in some cases, we would like the responses to be more structured, for example 
to trigger external *actions* (such as an API call, code execution, or a database query),
or for unambiguous/deterministic handling by a downstream process or another agent. 
In such cases, we would instruct the LLM to produce a *structured* output, typically in JSON format, with various 
pre-specified fields, such as code, an SQL query, parameters of an API call, and so on. These structured responses 
have come to be known as *tools*, and the LLM is said to *use* a tool when it produces a structured response 
corresponding to a specific tool. To elicit a tool response from an LLM, it needs to be instructed on the expected tool format and the conditions under which it should use the tool.
To actually use a tool emitted by an LLM, a *tool handler* method must be defined as well.
The tool handler for a given tool is triggered when it is recognized in the LLM's response.

### Tool Use: Example

As a simple example, a SQL query tool can be specified as a JSON structure with a `sql` 
field (containing the SQL query) and a `db` field (containing the name of the database).
The LLM may be instructed with a system prompt of the form:
> When the user asks a question about employees, use the SQLTool described in the below schema,
> and the results of this tool will be sent back to you, and you can use these to respond to
> the user's question, or correct your SQL query if there is a syntax error.

The tool handler would detect this specific tool in the LLM's response, parse this JSON structure, 
extract the `sql` and `db` fields, run the query on the specified database, 
and return the result if the query ran successfully, otherwise return an error message.
Depending on how the multi-agent system is organized, the query result or error message may be handled by the same agent
(i.e., its LLM), which may either summarize the results in narrative form, or revise the query if the error message 
indicates a syntax error.

## Agent-oriented programming: Function-Signatures

If we view an LLM as a function with signature `string -> string`,
it is possible to express the concept of an agent, tool, and other constructs
in terms of derived function signatures, as shown in the table below.
Adding `tool` (or function calling) capability to an LLM requires a parser (that recognizes 
that the LLM has generated a tool) and a callback that performs arbitrary computation and returns a string.
The serialized instances of tools `T` correspond to a language `L`; 
Since by assumption, the LLM is capable of producing outputs in $L$, 
this allows the LLM to express the intention to execute a Callback with arbitrary instances 
of `T`. In the last row, we show how an Agent can be viewed as a function signature
involving its state `S`.


| Function Description | Function Signature                                                                                                |
|----------------------|-------------------------------------------------------------------------------------------------------------------|
| LLM | `[Input Query] -> string` <br> `[Input Query]` is the original query.                                             |
| Chat interface | `[Message History] x [Input Query] -> string` <br> `[Message History]` consists of  previous messages[^1].        |
| Agent | `[System Message] x [Message History] x [Input Query] -> string` <br> `[System Message]` is the system prompt. |
| Agent with tool | `[System Message] x (string -> T) x (T -> string) x [Message History] x [Input Query] -> string`                  |
| Parser with type `T` | `string -> T`                                                                                                     |
| Callback with type `T` | `T -> string`                                                                                                     |
| General Agent with state type `S` | `S x [System Message] x (string -> T) x (S x T -> S x string) x [Message History] x [Input Query] -> S x string`  |

[^1]: Note that in reality, separator tokens are added to distinguish messages, and the messages are tagged with metadata indicating the sender, among other things.

## Multi-Agent Orchestration


### An Agent's "Native" Responders

When building an LLM-based multi-agent system, an orchestration mechanism is critical to manage the flow of messages 
between agents, to ensure task progress, and handle inevitable LLM deviations from instructions. Langroid provides a 
simple yet versatile orchestration mechanism that seamlessly handles:

- user interaction,
- tool handling,
- sub-task delegation

We view an agent as a message transformer; 
it may transform an incoming message using one of its three "native" responder methods, all of which have the same 
function signature: `string -> string`. These methods are:

- `llm_response` returns the LLM's response to the input message.
Whenever this method is invoked, the agent updates its dialog history (typically consisting of alternating user and LLM messages).
- `user_response` prompts the user for input and returns their response.
- `agent_response` by default only handles a `tool message` (i.e., one that contains an llm-generated structured 
response): it performs any requested actions, and returns the result as a string. An `agent_response` method can have 
other uses besides handling tool messages, such as handling scenarios where an LLM ``forgot'' to use a tool, 
or used a tool incorrectly, and so on.

To see why it is useful to have these responder methods, consider first a simple example of creating a basic chat loop
with the user. It is trivial to create such a loop by alternating between `user_response` and `llm_response`. 
Now suppose we instruct the agent to either directly answer the user's question or perform a web-search. Then it is possible that
sometimes the `llm_response` will produce a "tool message", say `WebSearchTool`, which we would handle with the
`agent_response` method. This requires a slightly different, and more involved, way of iterating among the agent's
responder methods. 

### Tasks: Encapsulating Agent Orchestration

From a coding perspective, it is useful to hide the actual iteration logic by wrapping an Agent class
in a separate class, which we call a `Task`, which encapsulates all of the orchestration logic. Users of the Task class
can then define the agent, tools, and any sub-tasks, wrap the agent in a task object of class Task, and simply call
`task.run()`, letting the Task class deal with the details of orchestrating the agent's responder methods, determining
task completion, and invoking sub-tasks.

### Responders in a Task: Agent's native responders and sub-tasks

The orchestration mechanism of a `Task` object works as follows. When a `Task` object is created from an agent, a 
sequence of eligible responders is created, which includes the agent's three "native" responder agents in the sequence:
`agent_response`, `llm_response`, `user_response`. 
The type signature of the task's run method is `string -> string`, just like the Agent's
native responder methods, and this is the key to seamless delegation of tasks to sub-tasks. A list of subtasks can be
added to a `Task` object via `task.add_sub_tasks([t1, t2, ... ])`, where `[t1, t2, ...]` are other 
`Task` objects. The result of this is that the run method of each sub-task is appended to the sequence of eligible 
responders in the parent task object.

### Task Orchestration: Updating the Current Pending Message (CPM)

A task always maintains a *current pending message* (CPM), which is the latest message "awaiting" a valid response 
from a responder, which updates the CPM. 
At a high level the `run` method of a task attempts to repeatedly find a valid response to the 
CPM, until the task is done. (Note that this paradigm is somewhat reminescent of a *Blackboard* architecture, where
agents take turns deciding whether they can update the shared message on the "blackboard".)
This is achieved by repeatedly invoking the `step` method, which represents a "turn" in the conversation.
The `step` method sequentially tries the eligible responders from the beginning of the eligible-responders list, until it
finds a valid response, defined as a non-null or terminating message (i.e. one that signals that the task is done). In
particular, this `step()` algorithm implies that a Task delegates (or "fails over") to a sub-task only if the task's 
native responders have no valid response. 

There are a few simple rules that govern how `step` works: 

- a responder entity (either a sub-task or a native entity -- one of LLM, Agent, or User) cannot 
  respond if it just responded in the previous step (this prevents a responder from "talking to itself". 
- when a response signals that the task is done (via a `DoneTool` or a "DONE" string) the task is ready to exit and 
  return the CPM as the result of the task. 
- when an entity "in charge" of the task has a null response, the task is considered finished and ready to exit.
- if the response of an entity or subtask is a structured message containing a recipient field, then the specified recipient task or entity will
be the only one eligible to respond at the next step.

Once a valid response is found in a step, the CPM is updated to this response, and the next step starts the search for a
valid response from the beginning of the eligible responders list. When a response signals that the task is done, 
the run method returns the CPM as the result of the task. This is a highly
simplified account of the orchestration mechanism, and the actual implementation is more involved.

The above simple design is surprising powerful and can support a wide variety of task structures, including trees and
DAGs. As a simple illustrative example, tool-handling has a natural implementation. The LLM is instructed to use a
certain JSON-structured message as a tool, and thus the `llm_response` method can produce a structured message, such 
as an SQL query.  This structured message is then handled by the `agent_response` method, and the resulting message updates the CPM. The
`llm_response` method then becomes eligible to respond again: for example if the agent's response contains an SQL 
error, the LLM would retry its query, and if the agent's response consists of the query results, the LLM would
respond with a summary of the results.

The Figure below depicts the task orchestration and delegation mechanism,
showing how iteration among responder methods works when a  Task `T` has sub-tasks `[T1, T2]` and `T1` has a 
sub-task `T3`. 


![langroid-arch.png](figures/langroid-arch.png)
</file>

<file path="docs/blog/posts/langroid-intro.md">
---
title: 'Langroid: Harness LLMs with Multi-Agent Programming'
draft: false
date: 2023-09-03
authors: 
  - pchalasani
categories:
  - langroid
  - llm
comments: true
---

# Langroid: Harness LLMs with Multi-Agent Programming

## The LLM Opportunity

Given the remarkable abilities of recent Large Language Models (LLMs), there
is an unprecedented opportunity to build intelligent applications powered by
this transformative technology. The top question for any enterprise is: how
best to harness the power of LLMs for complex applications? For technical and
practical reasons, building LLM-powered applications is not as simple as
throwing a task at an LLM-system and expecting it to do it.

<!-- more -->


## Langroid's Multi-Agent Programming Framework

Effectively leveraging LLMs at scale requires a *principled programming
framework*. In particular, there is often a need to maintain multiple LLM
conversations, each instructed in different ways, and "responsible" for
different aspects of a task.


An *agent* is a convenient abstraction that encapsulates LLM conversation
state, along with access to long-term memory (vector-stores) and tools (a.k.a functions
or plugins). Thus a **Multi-Agent Programming** framework is a natural fit
for complex LLM-based applications.

> Langroid is the first Python LLM-application framework that was explicitly
designed  with Agents as first-class citizens, and Multi-Agent Programming
as the core  design principle. The framework is inspired by ideas from the
[Actor Framework](https://en.wikipedia.org/wiki/Actor_model).

Langroid allows an intuitive definition of agents, tasks and task-delegation
among agents. There is a principled mechanism to orchestrate multi-agent
collaboration. Agents act as message-transformers, and take turns responding to (and
transforming) the current message. The architecture is lightweight, transparent,
flexible, and allows other types of orchestration to be implemented.
Besides Agents, Langroid also provides simple ways to directly interact with LLMs and vector-stores.


## Highlights
- **Agents as first-class citizens:** The `Agent` class encapsulates LLM conversation state,
  and optionally a vector-store and tools. Agents are a core abstraction in Langroid;
  Agents act as _message transformers_, and by default provide 3 _responder_ methods, one corresponding to each
  entity: LLM, Agent, User.
- **Tasks:** A Task class wraps an Agent, gives the agent instructions (or roles, or goals),
  manages iteration over an Agent's responder methods,
  and orchestrates multi-agent interactions via hierarchical, recursive
  task-delegation. The `Task.run()` method has the same
  type-signature as an Agent's responder's methods, and this is key to how
  a task of an agent can delegate to other sub-tasks: from the point of view of a Task,
  sub-tasks are simply additional responders, to be used in a round-robin fashion
  after the agent's own responders.
- **Modularity, Reusability, Loose coupling:** The `Agent` and `Task` abstractions allow users to design
  Agents with specific skills, wrap them in Tasks, and combine tasks in a flexible way.
- **LLM Support**: Langroid supports OpenAI LLMs including GPT-3.5-Turbo,
  GPT-4.
- **Caching of LLM prompts, responses:** Langroid by default uses [Redis](https://redis.com/try-free/) for caching.
- **Vector-stores**: [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/), LanceDB, Pinecone, PostgresDB (PGVector), Weaviate are currently supported.
  Vector stores allow for Retrieval-Augmented-Generaation (RAG).
- **Grounding and source-citation:** Access to external documents via vector-stores
  allows for grounding and source-citation.
- **Observability, Logging, Lineage:** Langroid generates detailed logs of multi-agent interactions and
  maintains provenance/lineage of messages, so that you can trace back
  the origin of a message.
- **Tools/Plugins/Function-calling**: Langroid supports OpenAI's recently
  released [function calling](https://platform.openai.com/docs/guides/gpt/function-calling)
  feature. In addition, Langroid has its own native equivalent, which we
  call **tools** (also known as "plugins" in other contexts). Function
  calling and tools have the same developer-facing interface, implemented
  using [Pydantic](https://docs.pydantic.dev/latest/),
  which makes it very easy to define tools/functions and enable agents
  to use them. Benefits of using Pydantic are that you never have to write
  complex JSON specs for function calling, and when the LLM
  hallucinates malformed JSON, the Pydantic error message is sent back to
  the LLM so it can fix it!

<iframe src="https://langroid.substack.com/embed" width="480" height="320" style="border:1px solid #EEE; background:white;" frameborder="0" scrolling="no"></iframe>
</file>

<file path="docs/blog/posts/langroid-knowledge-graph.md">
---
title: 'Langroid: Knolwedge Graph RAG powered by Neo4j'
draft: false
date: 2024-01-18
authors: 
  - mohannad
categories:
  - langroid
  - neo4j
  - rag
  - knowledge-graph
comments: true
---

## "Chat" with various sources of information
LLMs are increasingly being used to let users converse in natural language with 
a variety of types of data sources:
<!-- more -->
- unstructured text documents: a user's query is augmented with "relevant" documents or chunks
  (retrieved from an embedding-vector store) and fed to the LLM to generate a response -- 
  this is the idea behind Retrieval Augmented Generation (RAG).
- SQL Databases: An LLM translates a user's natural language question into an SQL query,
  which is then executed by another module, sending results to the LLM, so it can generate
  a natural language response based on the results.
- Tabular datasets: similar to the SQL case, except instead of an SQL Query, the LLM generates 
  a Pandas dataframe expression.

Langroid has had specialized Agents for the above scenarios: `DocChatAgent` for RAG with unstructured
text documents, `SQLChatAgent` for SQL databases, and `TableChatAgent` for tabular datasets.

## Adding support for Neo4j Knowledge Graphs

Analogous to the SQLChatAgent, Langroid now has a 
[`Neo4jChatAgent`](https://github.com/langroid/langroid/blob/main/langroid/agent/special/neo4j/neo4j_chat_agent.py) 
to interact with a Neo4j knowledge graph using natural language.
This Agent has access to two key tools that enable it to handle a user's queries:

- `GraphSchemaTool` to get the schema of a Neo4j knowledge graph.
- `CypherRetrievalTool` to generate Cypher queries from a user's query.
Cypher is a specialized query language for Neo4j, and even though it is not as widely known as SQL,
most LLMs today can generate Cypher Queries.

Setting up a basic Neo4j-based RAG chatbot is straightforward. First ensure 
you set these environment variables (or provide them in a `.env` file):
```bash
NEO4J_URI=<uri>
NEO4J_USERNAME=<username>
NEO4J_PASSWORD=<password>
NEO4J_DATABASE=<database>
```

Then you can configure and define a `Neo4jChatAgent` like this:
```python
import langroid as lr
import langroid.language_models as lm

from langroid.agent.special.neo4j.neo4j_chat_agent import (
    Neo4jChatAgent,
    Neo4jChatAgentConfig,
    Neo4jSettings,
)

llm_config = lm.OpenAIGPTConfig()

load_dotenv()

neo4j_settings = Neo4jSettings()

kg_rag_agent_config = Neo4jChatAgentConfig(
    neo4j_settings=neo4j_settings,
    llm=llm_config, 
)
kg_rag_agent = Neo4jChatAgent(kg_rag_agent_config)
kg_rag_task = lr.Task(kg_rag_agent, name="kg_RAG")
kg_rag_task.run()
```


## Example: PyPi Package Dependency Chatbot

In the Langroid-examples repository, there is an example python 
[script](https://github.com/langroid/langroid-examples/blob/main/examples/kg-chat/)
showcasing tools/Function-calling + RAG using a `DependencyGraphAgent` derived from [`Neo4jChatAgent`](https://github.com/langroid/langroid/blob/main/langroid/agent/special/neo4j/neo4j_chat_agent.py).
This agent uses two tools, in addition to the tools available to `Neo4jChatAgent`:

- `GoogleSearchTool` to find package version and type information, as well as to answer 
 other web-based questions after acquiring the required information from the dependency graph.
- `DepGraphTool` to construct a Neo4j knowledge-graph modeling the dependency structure
   for a specific package, using the API at [DepsDev](https://deps.dev/).

In response to a user's query about dependencies, the Agent decides whether to use a Cypher query
or do a web search. Here is what it looks like in action:

<figure markdown>
  ![dependency-demo](../../assets/demos/dependency_chatbot.gif)
  <figcaption>
Chatting with the `DependencyGraphAgent` (derived from Langroid's `Neo4jChatAgent`).
When a user specifies a Python package name (in this case "chainlit"), the agent searches the web using
`GoogleSearchTool` to find the version of the package, and then uses the `DepGraphTool`
to construct the dependency graph as a neo4j knowledge graph. The agent then answers
questions by generating Cypher queries to the knowledge graph, or by searching the web.
  </figcaption>
</figure>
</file>

<file path="docs/blog/posts/langroid-lancedb.md">
---
title: 'Langroid: Multi-Agent Programming Framework for LLMs'
draft: true
date: 2024-01-10
authors: 
  - pchalasani
categories:
  - langroid
  - lancedb
  - rag
  - vector-database
comments: true
---

## Langroid: Multi-Agent Programming framework for LLMs

In this era of Large Language Models (LLMs), there is unprecedented demand to
create intelligent applications powered by this transformative technology. What
is the best way for developers to harness the potential of LLMs in complex
application scenarios? For a variety of technical and practical reasons (context
length limitations, LLM brittleness, latency, token-costs), this is not as
simple as throwing a task at an LLM system and expecting it to get done. What is
needed is a principled programming framework, offering the right set of
abstractions and primitives to make developers productive when building LLM
applications.
<!-- more -->
## Langroid's Elegant Multi-Agent Paradigm

The [Langroid](https://github.com/langroid/langroid) team (ex-CMU/UW-Madison researchers) 
has a unique take on this – they have built an open source Python framework to 
simplify LLM application development, using a Multi-Agent Programming paradigm. 
Langroid’s architecture is founded on Agents as first-class citizens: 
they are message-transformers, and accomplish tasks collaboratively via messages.

Langroid is emerging as a popular LLM framework; developers appreciate its clean
design and intuitive, extensible architecture. Programming with Langroid is
natural and even fun: you configure Agents and equip them with capabilities (
such as LLMs, vector-databases, Function-calling/tools), connect them and have
them collaborate via messages. This is a “Conversational Programming” paradigm,
and works with local/open and remote/proprietary LLMs. (Importantly, it does not
use LangChain or any other existing LLM framework).

<figure markdown>
  ![Langroid-card](../../assets/langroid-card-ossem-rust-1200x630.png){ width="800" }
  <figcaption>
An Agent serves as a convenient abstraction, encapsulating the state of LLM
conversations, access to vector stores, and various tools (functions or
plugins). A Multi-Agent Programming framework naturally aligns with the demands
of complex LLM-based applications.
</figcaption>
</figure>



## Connecting Agents via Tasks

In Langroid, a ChatAgent has a set of “responder” methods, one for each "entity":
an LLM, a human, and a tool-handler. However it does not have any way to iterate through
these responders. This is where the Task class comes in: A Task wraps an Agent
and gives it the ability to loop through its responders, via the `Task.run()` method. 

A Task loop is organized around simple rules that govern when a responder is eligible
to respond, what is considered a valid response, and when the task is complete.
The simplest example of a Task loop is an interactive chat with the human user. 
A Task also enables an Agent to interact with other agents: 
other tasks can be added to a task as sub-tasks, 
in a recursive, hierarchical (or DAG) structure. From a Task’s perspective,
sub-tasks are just additional responders, and present the same string-to-string 
message-transformation interface (function signature) as the Agent’s "native" responders. 
This is the key to composability of tasks in Langroid,
since a sub-task can act the same way as an Agent's "native" responders, and is subject
to the same rules of task orchestration. The result is that the same task orchestration
mechanism seamlessly enables tool handling, retries when LLM deviates, and 
delegation to sub-tasks. More details are in the Langroid [quick-start guide](https://langroid.github.io/langroid/quick-start/)

## A Taste of Coding with Langroid

To get started with Langroid, simply install it from pypi into your virtual environment:

```bash
pip install langroid
```
To directly chat with an OpenAI LLM, define the LLM configuration,
instantiate a language model object and interact with it:
(Langroid works with non-OpenAI local/propreitary LLMs as well,
see their [tutorial](https://langroid.github.io/langroid/tutorials/non-openai-llms/)) 
For the examples below, ensure you have a file `.env` containing your OpenAI API key
with this line: `OPENAI_API_KEY=sk-...`.
    
```python
import langroid as lr
import langroid.language_models as lm

llm_cfg = lm.OpenAIGPTConfig() # default GPT4-Turbo
mdl = lm.OpenAIGPT(llm_cfg)
mdl.chat("What is 3+4?", max_tokens=10)
```
The mdl does not maintain any conversation state; for that you need a `ChatAgent`:

```python
agent_cfg = lr.ChatAgentConfig(llm=llm_cfg)
agent = lr.ChatAgent(agent_cfg)
agent.llm_response("What is the capital of China?")
agent.llm_response("What about France?") # interprets based on previous msg
```
Wrap a ChatAgent in a Task to create a basic interactive loop with the user:

```python
task = lr.Task(agent, name="Bot")
task.run("Hello")
```
Have a Teacher Agent talk to a Student Agent:
    
```python
teacher = lr.ChatAgent(agent_cfg)
teacher_task = lr.Task(
    teacher, name="Teacher",
    system_message="""
        Ask your student simple number-based questions, and give feedback.
        Start with a question.
        """,
)
student = lr.ChatAgent(agent_cfg)
student_task = lr.Task(
    student, name="Student",
    system_message="Concisely answer your teacher's questions."
)
teacher_task.add_sub_task(student_task)
teacher_task.run()
```



## Retrieval Augmented Generation (RAG) and Vector Databases

One of the most popular LLM applications is question-answering 
on documents via Retrieval-Augmented Generation (RAG), powered by a vector database.
Langroid has a built-in DocChatAgent that incorporates a number of advanced RAG techniques, 
clearly laid out so they can be easily understood and extended.

### Built-in Support for LanceDB
<figure markdown>
  ![Langroid-lance](../../assets/langroid-lance.png){ width="800" }
  <figcaption>
Langroid uses LanceDB as the default vector store for its DocChatAgent.
</figcaption>
</figure>

Langroid's DocChatAgent uses the LanceDB serverless vector-database by default.
Since LanceDB uses file storage, it is easy to set up and use (no need for docker or cloud services),
and due to its use of the Lance columnar format, it is 
highly performant and scalable. 
In addition, Langroid has a specialized `LanceDocChatAgent` that leverages LanceDB's 
unique features such as Full-text search, SQL-like filtering, and pandas dataframe interop.
Setting up a basic RAG chatbot is as simple as (assume the previous imports):

```python
from langroid.agent.special.lance_doc_chat_agent import import (
    LanceDocChatAgent, DocChatAgentConfig
)
llm_config = lm.OpenAIGPTConfig()

rag_agent_config = DocChatAgentConfig(
    llm=llm_config, 
    doc_paths=["/path/to/my/docs"], # files, folders, or URLs.
)
rag_agent = LanceDocChatAgent(rag_agent_config)
rag_task = lr.Task(rag_agent, name="RAG")
rag_task.run()
```

For an example showcasing Tools/Function-calling + RAG in a multi-agent setup, see their quick-start
[Colab notebook](https://colab.research.google.com/github/langroid/langroid/blob/main/examples/Langroid_quick_start.ipynb)
which shows a 2-agent system where one agent is tasked with extracting structured information
from a document, and generates questions for the other agent to answer using RAG.
In the Langroid-examples repo there is a [script](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/chat_multi_extract.py) with the same functionality,
and here is what it looks like in action:


<figure markdown>
  ![lease-demo](../../assets/demos/lease-extractor-demo.gif){ width="800" }
  <figcaption>
Extracting structured info from a Commercial Lease using a 2-agent system, with 
a Tool/Function-calling and RAG. The Extractor Agent is told to extract information
in a certain structure, and it generates questions for the Document Agent
to answer using RAG.
</figcaption>
</figure>

## Retrieval Augmented Analytics

One of the unique features of LanceDB is its SQL-like filtering and Pandas dataframe interoperability.
LLMs are great at generating SQL queries, and also Pandas computation code such as `df.groupby("col").mean()`.
This opens up a very interesting possibility, which we call
**Retrieval Augmented Analytics:** Suppose a user has a large dataset of movie descriptions
with metadata such as rating, year and genre, and wants to ask:

> What is the highest-rated Comedy movie about college students made after 2010?

It is not hard to imagine that an LLM should be able to generate a **Query Plan** to answer this,
consisting of:

- A SQL-like filter: `genre = "Comedy" and year > 2010`
- A Pandas computation: `df.loc[df["rating"].idxmax()]`
- A rephrased query given the filter: "Movie about college students" (used for semantic/lexical search)

Langroid's Multi-Agent framework enables exactly this type of application. 
The [`LanceRAGTaskCreator`](https://github.com/langroid/langroid/blob/main/langroid/agent/special/lance_rag/lance_rag_task.py) takes a `LanceDocChatAgent` and adds two additional agents:

- QueryPlannerAgent: Generates the Query Plan
- QueryPlanCriticAgent: Critiques the Query Plan and Answer received from the RAG Agent, so that 
  the QueryPlanner can generate a better plan if needed.

Checkout the [`lance-rag-movies.py`](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/lance-rag-movies.py) script in the langroid-examples repo to try this out.

## Try it out and get involved!
This was just a glimpse of what you can do with Langroid and how your code would look.
Give it a shot and learn more about the features and roadmap of Langroid on their 
[GitHub repo](https://github.com/langroid/langroid). Langroid welcomes contributions,
and they have a friendly [Discord](https://discord.gg/ZU36McDgDs) community.

If you like it, don’t forget to drop a 🌟.
</file>

<file path="docs/blog/posts/local-llm-formatting.md">
---
title: 'Chat formatting in Local LLMs'
draft: true
date: 2024-01-25
authors: 
  - pchalasani
categories:
  - langroid
  - prompts
  - llm
  - local-llm
comments: true
---


In an (LLM performance) investigation, details matter!

And assumptions kill (your LLM performance).

I'm talking about chat/prompt formatting, especially when working with Local LLMs.

TL/DR -- details like chat formatting matter a LOT,
and trusting that the local LLM API is doing it correctly may be a mistake,
leading to inferior results.

<!-- more -->

🤔Curious? Here are some notes from the trenches when we built an app
(https://github.com/langroid/langroid/blob/main/examples/docqa/chat-multi-extract-local.py)
based entirely on a locally running Mistral-7b-instruct-v0.2  
(yes ONLY 7B parameters, compared to 175B+ for GPT4!)
that leverages Langroid Multi-agents, Tools/Function-calling and RAG to
reliably extract structured information from a document,
where an Agent is given a spec of the desired structure, and it generates
questions for another Agent to answer using RAG.

🔵LLM API types: generate and chat
LLMs are typically served behind two types of APIs endpoints:
⏺ A "generation" API, which accepts a dialog formatted as a SINGLE string, and
⏺ a "chat" API, which accepts the dialog as a LIST,
and as convenience formats it into a single string before sending to the LLM.

🔵Proprietary vs Local LLMs
When you use a proprietary LLM API (such as OpenAI or Claude), for convenience
you can use their "chat" API, and you can trust that it will format the dialog
history correctly (or else they wouldn't be in business!).

But with a local LLM, you have two choices of where to send the dialog history:
⏺ you could send it to the "chat" API and trust that the server will format it correctly,
⏺ or you could format it yourself and send it to the "generation" API.

🔵Example of prompt formatting?
Suppose your system prompt and dialog look like this:

System Prompt/Instructions: when I give you a number, respond with its double
User (You): 3
Assistant (LLM): 6
User (You): 9

Mistral-instruct models expect this chat to be formatted like this
(note that the system message is combined with the first user message):
"<s>[INST] when I give you a number, respond with its double 3 [/INST] 6 [INST] 9 [/INST]"

🔵Why does it matter?
It matters A LOT -- because each type of LLM (llama2, mistral, etc) has
been trained and/or fine-tuned on chats formatted in a SPECIFIC way, and if you
deviate from that, you may get odd/inferior results.

🔵Using Mistral-7b-instruct-v0.2 via oobabooga/text-generation-webui
"Ooba" is a great library (https://github.com/oobabooga/text-generation-webui)
that lets you spin up an OpenAI-like API server for
local models, such as llama2, mistral, etc. When we used its chat endpoint
for a Langroid Agent, we were getting really strange results,
with the LLM sometimes thinking it is the user! 😧

Digging in, we found that their internal formatting template was
wrong, and it was formatting the system prompt as if it's
the first user message -- this leads to the LLM interpreting the first user
message as an assistant response, and so on -- no wonder there was role confusion!

💥Langroid solution:
To avoid these issues, in Langroid we now have a formatter
(https://github.com/langroid/langroid/blob/main/langroid/language_models/prompt_formatter/hf_formatter.py)
that retrieves the HuggingFace tokenizer for the LLM and uses
its "apply_chat_template" method to format chats.
This gives you control over the chat format and you can use the "generation"
endpoint of the LLM API instead of the "chat" endpoint.

Once we switched to this, results improved dramatically 🚀

Be sure to checkout Langroid https://github.com/langroid/langroid

#llm #ai #opensource
</file>

<file path="docs/blog/posts/local-llm.md">
---
title: 'Using Langroid with Local LLMs'
draft: false
date: 2023-09-14
authors: 
  - pchalasani
categories:
  - langroid
  - llm
  - local-llm
comments: true
---
## Why local models?
There are commercial, remotely served models that currently appear to beat all open/local
models. So why care about local models? Local models are exciting for a number of reasons:

<!-- more -->

- **cost**: other than compute/electricity, there is no cost to use them.
- **privacy**: no concerns about sending your data to a remote server.
- **latency**: no network latency due to remote API calls, so faster response times, provided you can get fast enough inference.
- **uncensored**: some local models are not censored to avoid sensitive topics.
- **fine-tunable**: you can fine-tune them on private/recent data, which current commercial models don't have access to.
- **sheer thrill**: having a model running on your machine with no internet connection,
  and being able to have an intelligent conversation with it -- there is something almost magical about it.

The main appeal with local models is that with sufficiently careful prompting,
they may behave sufficiently well to be useful for specific tasks/domains,
and bring all of the above benefits. Some ideas on how you might use local LLMs:

- In a multi-agent system, you could have some agents use local models for narrow 
  tasks with a lower bar for accuracy (and fix responses with multiple tries).
- You could run many instances of the same or different models and combine their responses.
- Local LLMs can act as a privacy layer, to identify and handle sensitive data before passing to remote LLMs.
- Some local LLMs have intriguing features, for example llama.cpp lets you 
  constrain its output using a grammar.

## Running LLMs locally

There are several ways to use LLMs locally. See the [`r/LocalLLaMA`](https://www.reddit.com/r/LocalLLaMA/comments/11o6o3f/how_to_install_llama_8bit_and_4bit/) subreddit for
a wealth of information. There are open source libraries that offer front-ends
to run local models, for example [`oobabooga/text-generation-webui`](https://github.com/oobabooga/text-generation-webui)
(or "ooba-TGW" for short) but the focus in this tutorial is on spinning up a
server that mimics an OpenAI-like API, so that any code that works with
the OpenAI API (for say GPT3.5 or GPT4) will work with a local model,
with just a simple change: set `openai.api_base` to the URL where the local API
server is listening, typically `http://localhost:8000/v1`.

There are a few libraries we recommend for setting up local models with OpenAI-like APIs:

- [LiteLLM OpenAI Proxy Server](https://docs.litellm.ai/docs/proxy_server) lets you set up a local 
  proxy server for over 100+ LLM providers (remote and local).
- [ooba-TGW](https://github.com/oobabooga/text-generation-webui) mentioned above, for a variety of models, including llama2 models.
- [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) (LCP for short), specifically for llama2 models.
- [ollama](https://github.com/jmorganca/ollama)

We recommend visiting these links to see how to install and run these libraries.

## Use the local model with the OpenAI library

Once you have a server running using any of the above methods, 
your code that works with the OpenAI models can be made to work 
with the local model, by simply changing the `openai.api_base` to the 
URL where the local server is listening. 

If you are using Langroid to build LLM applications, the framework takes
care of the `api_base` setting in most cases, and you need to only set
the `chat_model` parameter in the LLM config object for the LLM model you are using.
See the [Non-OpenAI LLM tutorial](../../tutorials/non-openai-llms.md) for more details.



<iframe src="https://langroid.substack.com/embed" width="480" height="320" style="border:1px solid #EEE; background:white;" frameborder="0" scrolling="no"></iframe>
</file>

<file path="docs/blog/posts/malade.md">
---
title: 'MALADE: Multi-Agent Architecture for Pharmacovigilance'
draft: false
date: 2024-08-12
authors:
- jihye
- nils
- pchalasani
- mengelhard
- someshjha
- anivaryakumar
- davidpage

categories:
- langroid
- multi-agent
- neo4j
- rag
comments: true
---

# MALADE: Multi-Agent Architecture for Pharmacovigilance

[Published in ML for HealthCare 2024](https://www.mlforhc.org/2024-abstracts)

[Arxiv](https://arxiv.org/abs/2408.01869) 

[GitHub](https://github.com/jihyechoi77/malade)

## Summary
We introduce MALADE (**M**ultiple **A**gents powered by **L**LMs for **ADE** Extraction),
a multi-agent system for Pharmacovigilance. It is the first effective explainable 
multi-agent LLM system for extracting Adverse Drug Events (ADEs) from FDA drug labels and drug prescription data.
<!-- more -->
Given a drug category and an adverse outcome, MALADE
produces:

- a qualitative label of risk (`increase`, `decrease` or `no-effect`),
- confidence in the label (a number in $[0,1]$),
- frequency of effect (`rare`, `common`, or `none`),
- strength of evidence (`none`, `weak`, or `strong`), and
- a justification with citations.

This task is challenging for several reasons: 

- FDA labels and prescriptions are for individual drugs, not drug categories, so representative drugs in a category 
  need to be identified from patient prescription data, and ADE information found for specific drugs in a category 
  needs to be aggregated to make a statement about the category as a whole, 
- The data is noisy, with variations in the terminologies of drugs and outcomes, and 
- ADE descriptions are often buried in large amounts of narrative text.

The MALADE architecture is LLM-agnostic 
and leverages the [Langroid](https://github.com/langroid/langroid) multi-agent framework.
It consists of a combination of Agents using Retrieval Augmented Generation (RAG), that 
iteratively improve their answers based on feedback from Critic Agents.
We evaluate the quantitative scores against 
a ground-truth dataset known as the [*OMOP Ground Truth Task*](https://www.niss.org/sites/default/files/Session3-DaveMadigan_PatrickRyanTalk_mar2015.pdf)
and find that MALADE achieves state-of-the-art performance.



## Introduction

In the era of Large Language Models (LLMs), given their remarkable text understanding and generation abilities, 
there is an unprecedented opportunity to develop new, LLM-based methods for trustworthy medical knowledge synthesis, 
extraction and summarization. The focus of this paper is Pharmacovigilance, a critical task in healthcare, where 
the goal is to monitor and evaluate the safety of drugs. In particular, the identification of Adverse Drug Events 
(ADEs) is crucial for ensuring patient safety. Consider a question such as this:

> What is the effect of **ACE inhibitors** on the risk of developing **angioedema**?

Here the **drug category** $C$ is _ACE inhibitors_, and the **outcome** $O$ is _angioedema_.
Answering this question involves several steps:

- **1(a): Find all drugs** in the ACE inhibitor category $C$, e.g. by searching the FDA 
[National Drug Code](https://www.fda.gov/drugs/drug-approvals-and-databases/national-drug-code-directory) (NDC) 
   database. This can be done using Elastic-Search, with filters to handle variations in drug/category names and inaccurate classifications.
- **1(b): Find the prescription frequency** of each drug in $C$ from patient prescription data, e.g. 
the [MIMIC-IV](https://physionet.org/content/mimiciv/3.0/) database. This can be done with a SQL query.
- **1(c): Identify the representative drugs** $D \subset C$ in this category, based on prescription frequency data 
     from step 2.  
- **2:** For each drug $d \in D$, **summarize ADE information** about the effect of $d$ on the outcome $O$ of interest,
   (in this case angioedema) from text-based pharmaceutical sources, 
    e.g. the [OpenFDA Drug Label](https://open.fda.gov/apis/drug/label/) database.
- **3: Aggregate** the information from all drugs in $D$ to make a statement about the category $C$ as a whole.


## The role of LLMs

While steps 1(a) and 1(b) can be done by straightforward deterministic algorithms (SQL queries or Elastic-Search), the 
remaining steps are challenging but ideally suited to LLMs:

### Step 1(c): Identifying representative drugs in a category from prescription frequency data (`DrugFinder` Agent)

This is complicated by noise, such as the same drug appearing multiple times under different names, 
formulations or delivery methods (For example, the ACE inhibitor **Lisinopril** is also known as **Zestril** and **Prinivil**.) 
  Thus a judgment must
  be made as to whether these are sufficiently different to be considered pharmacologically distinct;
  and some of these drugs may not actually belong to the category. This task thus requires a grouping operation, 
  related to the task of identifying standardized drug codes from text descriptions,
  well known to be challenging. This makes it very difficult to explicitly define the algorithm in a deterministic 
  manner that covers all edge cases (unlike the above database tasks), and hence is well-suited
  to LLMs, particularly those such as GPT-4, Claude3.5, and similar-strength variants which are known to have been 
  trained on vast amounts of general medical texts. 

In MALADE, this task is handled by the `DrugFinder` agent,
which is an Agent/Critic system where the main agent iteratively improves its output
in a feedback loop with the Critic agent. For example, the Critic corrects the Agent when it incorrectly
classifies drugs as pharmacologically distinct.

###  Step 2: Identifying Drug-Outcome Associations (`DrugOutcomeInfoAgent`)

The task here is to identify whether a given drug
has an established effect on the risk of a given outcome, based on FDA drug label database, and
output a summary of relevant information, including the level of identified risk and the evidence for
such an effect. Since this task involves extracting information from narrative text, it is well-suited to
LLMs using the Retrieval Augmented Generation (RAG) technique. 

In MALADE, the `DrugOutcomeInfoAgent` handles this task, and is also an Agent/Critic system, where the Critic
provides feedback and corrections to the Agent's output.
This agent does not have direct access to the FDA Drug Label data, but can receive
this information via another agent, `FDAHandler`. FDAHandler is equipped with **tools** (also known as function-calls) 
to invoke the OpenFDA API for drug label data, and answers questions in the context of information retrieved
based on the queries. Information received from this API is ingested into a vector database, so the
agent first uses a tool to query this vector database, and only resorts to the OpenFDA API tool if
the vector database does not contain the relevant information. An important aspect of this agent is that
its responses include specific **citations** and **excerpts** justifying its conclusions.

###  Step 3: Labeling Drug Category-Outcome Associations (`CategoryOutcomeRiskAgent`)

To identify association between a drug category C and an adverse health outcome $O$, we concurrently run a batch of 
queries to copies of `DrugOutcomeInfoAgent`, one for each drug $d$ in the
representative-list $D$ for the category, of the form: 

> Does drug $d$ increase or decrease the risk of condition $O$?

The results are sent to `CategoryOutcomeRiskAgent`, 
which is an Agent/Critic system which performs the final classification
step; its goal is to generate the qualitative and quantitative outputs mentioned above.

## MALADE Architecture

The figure below illustrates how the MALADE architecture handles the query,

> What is the effect of **ACE inhibitors** on the risk of developing **angioedema**?

![malade-arch.png](figures/malade-arch.png)

The query triggers a sequence of subtasks performed by the three Agents described above: 
`DrugFinder`, `DrugOutcomeInfoAgent`, and `CategoryOutcomeRiskAgent`.
Each Agent generates a response and justification, which are validated by a corresponding Critic agent, whose feedback is
used by the Agent to revise its response.

## Evaluation

### OMOP Ground Truth

We evaluate the results of MALADE against a well-established ground-truth dataset, 
the [OMOP ADE ground-truth table](https://www.niss.org/sites/default/files/Session3-DaveMadigan_PatrickRyanTalk_mar2015.pdf), shown below.
This is a reference dataset within the Observational Medical Outcomes Partnership (OMOP) Common Data Model that 
contains validated information about known adverse drug events.

![omop-ground-truth.png](figures/omop-ground-truth.png)

### Confusion Matrix

Below is a side-by-side comparison of this ground-truth dataset (left) with MALADE's labels (right), ignoring blue 
cells (see the paper for details):

![omop-results.png](figures/omop-results.png)

The resulting confusion-matrix for MALADE is shown below:

![confusion.png](figures/confusion.png)

### AUC Metric

Since MALADE produces qualitative and quantitative outputs, the paper explores a variety of ways to evaluate its
performance against the OMOP ground-truth dataset. Here we focus on the label output $L$ (i.e. `increase`, 
`decrease`, or `no-effect`), and its associated confidence score $c$, and use the Area Under the ROC Curve (AUC) as 
the evaluation metric.
The AUC metric is designed for binary classification, so we transform the three-class label output $L$ and
confidence score $c$ to a binary classification score $p$ as follows.
We treat $L$ = `increase` as the positive class,
and $L$ = `decrease` or `no-effect` as the negative class, and
we transform the label confidence score $c$ into a probability $p$ of `increase` as follows:


- if the label output is `increase`, $p = (2+c)/3$,
- if the label output is `no-effect`, $p = (2-c)/3$, and
- if the label output is `decrease` , $p = (1-c)/3$.

These transformations align with two intuitions: (a) a *higher* confidence in `increase` corresponds
to a *higher* probability of `increase`, and a *higher* confidence in `no-effect` or `decrease`
corresponds to a *lower* probability of `increase`, and (b) for a given confidence score $c$, the progression
of labels `decrease`, `no-effect`, and `increase` corresponds to *increasing* probabilities of `increase`.
The above transformations ensure that the probability $p$ is in the range $[0,1]$ and scales linearly with the
confidence score $c$.

We ran the full MALADE system for all drug-category/outcome pairs in the OMOP ground-truth dataset, 
and then computed the AUC for the score $p$ against the ground-truth binary classification label.
With `GPT-4-Turbo` we obtained an AUC of 0.85, while `GPT-4o` resulted in an AUC of 0.90.
These are state-of-the-art results for this specific ADE-extraction task.


### Ablations

An important question the paper investigates is whether (and how much) the various components (RAG, critic agents, etc)
contribute to MALADE's performance. To answer this, we perform ablations, where we remove one or more
components from the MALADE system and evaluate the performance of the resulting system.
For example we found that dropping the Critic agents reduces the AUC (using `GPT-4-Turbo`) from 0.85 to 0.82
(see paper, Appendix D for more ablation results).

### Variance of LLM-generated Scores

When using an LLM to generate numerical scores, it is important to understand the variance in the scores.
For example, if a single "full" run of MALADE (i.e. for all drug-category/outcome pairs in the OMOP ground-truth
dataset) produces a certain AUC, was it a "lucky" run, or is the AUC relatively stable across runs?
Ideally one would investigate this by repeating the full run of MALADE many times,  
but given the expense of running a full experiment, we focus on just three representative cells in the OMOP table,
one corresponding to each possible ground-truth label, and run MALADE 10 times for each cells, and
study the distribution of $p$ (the probability of increased risk, translated from the confidence score using the
method described above), for each output label. Encouragingly, we find that the distribution of $p$ shows clear
separation between the three labels, as in the figure below (The $x$ axis ranges from 0 to 1, and the three colored
groups of bars represent, from left to right, `decrease`, `no-effect`, and `increase` labels). Full details are in 
the Appendix D of the paper.

![img.png](figures/variance-histogram.png)
</file>

<file path="docs/blog/posts/multi-agent-debate.md">
---
title: 'Multi Agent Debate and Education Platform'
draft: false
date: 2025-02-04
authors: 
  - adamshams
categories:
  - langroid
  - llm
  - local-llm
  - chat
comments: true
---

## Introduction
Have you ever imagined a world where we can debate complex issues with Generative AI agents taking a distinct 
stance and backing their arguments with evidence? Some will change your mind, and some will reveal the societal biases 
on which each distinctive Large Language Model (LLM) is trained on. Introducing an [AI-powered debate platform](https://github.com/langroid/langroid/tree/main/examples/multi-agent-debate) that brings 
this imagination to reality, leveraging diverse LLMs and the Langroid multi-agent programming framework. The system enables users to engage in structured debates with an AI taking the opposite stance (or even two AIs debating each other), using a multi-agent architecture with Langroid's powerful framework, where each agent embodies a specific ethical perspective, creating realistic and dynamic interactions. 
Agents are prompt-engineered and role-tuned to align with their assigned ethical stance, 
ensuring thoughtful and structured debates. 

<!-- more -->

My motivations for creating this platform included: 

  - A debate coach for underserved students without access to traditional resources. 
  - Tool for research and generating arguments from authentic sources. 
  - Create an adaptable education platform to learn two sides of the coin for any topic.
  - Reduce echo chambers perpetuated by online algorithms by fostering two-sided debates on any topic, promoting education and awareness around misinformation. 
  - Provide a research tool to study the varieties of biases in LLMs that are often trained on text reflecting societal biases. 
  - Identify a good multi-agent framework designed for programming with LLMs.


## Platform Features:
### Dynamic Agent Generation:
The platform features five types of agents: Pro, Con, Feedback, Research, and Retrieval Augmented Generation (RAG) Q&A. 
Each agent is dynamically generated using role-tuned and engineered prompts, ensuring diverse and engaging interactions.
#### Pro and Con Agents: 
These agents engage in the core debate, arguing for and against the chosen topic. 
Their prompts are carefully engineered to ensure they stay true to their assigned ethical stance.
#### Feedback Agent: 
This agent provides real-time feedback on the arguments and declares a winner. The evaluation criteria are based on the well-known Lincoln–Douglas debate format, and include:

  - Clash of Values 
  - Argumentation 
  - Cross-Examination 
  - Rebuttals 
  - Persuasion 
  - Technical Execution 
  - Adherence to Debate Etiquette 
  - Final Focus
#### Research Agent: 
This agent has the following functionalities:

  - Utilizes the `MetaphorSearchTool` and the `Metaphor` (now called `Exa`) Search API to conduct web searches combined with
Retrieval Augmented Generation (RAG) to relevant web references for user education about the selected topic. 
  - Produces a summary of arguments for and against the topic.
  - RAG-based document chat with the resources identified through Web Search. 
#### RAG Q&A Agent:

  - Provides Q&A capability using a RAG based chat interaction with the resources identified through Web Search.
The agent utilizes `DocChatAgent` that is part of Langroid framework which orchestrates all LLM interactions. 
  - Rich chunking parameters allows the user to get optimized relevance results. Check out `config.py`for details.

### Topic Adaptability:
Easily adaptable to any subject by simply adding pro and con system messages. This makes it a versatile tool for
exploring diverse topics and fostering critical thinking. Default topics cover ethics and use of AI for the following:
  - Healthcare
  - Intellectual property 
  - Societal biases 
  - Education
### Autonomous or Interactive:
Engage in manual debate with a pro or con agent or watch it autonomously while adjusting number of turns.

### Diverse LLM Selection Adaptable per Agent: 
Configurable to select from diverse commercial and open source models: OpenAI, Google, and Mistral 
to experiment with responses for diverse perspectives. Users can select a unique LLM for each agent. 
       
### LLM Tool/Function Integration: 
Utilizes LLM tools/functions features to conduct semantic search using Metaphor Search API and summarizes the pro and 
con perspectives for education.

### Configurable LLM Parameters: 
Parameters like temperature, minimum and maximum output tokens, allowing for customization of the AI's responses.
Configurable LLM parameters like temperature, min & max output tokens. For Q&A with the searched resources, several
parameters can be tuned in the `config` to enhance response relevance.

### Modular Design: 
Reusable code and modularized for other LLM applications.


## Interaction
1. Decide if you want to you use same LLM for all agents or different ones
2. Decide if you want autonomous debate between AI Agents or user vs. AI Agent. 
3. Select a debate topic.
4. Choose your side (Pro or Con).
5. Engage in a debate by providing arguments and receiving responses from agents.
6. Request feedback at any time by typing `f`.
7. Decide if you want the Metaphor Search to run to find Topic relevant web links
   and summarize them. 
8. Decide if you want to chat with the documents extracted from URLs found to learn more about the Topic.
9. End the debate manually by typing `done`. If you decide to chat with the documents, you can end session
by typing `x`

## Why was Langroid chosen?
I chose Langroid framework because it's a principled multi-agent programming framework inspired by the Actor framework.
Prior to using Langroid, I developed a multi-agent debate system, however, I had to write a lot of tedious code to manage states of communication between
debating agents, and the user interactions with LLMs. Langroid allowed me to seamlessly integrate multiple LLMs,
easily create agents, tasks, and attach sub-tasks. 

### Agent Creation Code Example

```python
   def create_chat_agent(name: str, llm_config: OpenAIGPTConfig, system_message: str) -> ChatAgent:
   
    return ChatAgent(
        ChatAgentConfig(
            llm=llm_config,
            name=name,
            system_message=system_message,
        )
    )
```
#### Sample Pro Topic Agent Creation

```python
 
    pro_agent = create_chat_agent(
        "Pro",
        pro_agent_config,
        system_messages.messages[pro_key].message + DEFAULT_SYSTEM_MESSAGE_ADDITION,
    )
    
```
The `Task` mechanism in Langroid provides a robust mechanism for managing complex interactions within multi-agent 
systems. `Task` serves as a container for managing the flow of interactions between different agents
(such as chat agents) and attached sub-tasks.`Task` also helps with turn-taking, handling responses, 
and ensuring smooth transitions between dialogue states. Each Task object is responsible for coordinating responses 
from its assigned agent, deciding the sequence of responder methods (llm_response, user_response, agent_response), 
and managing transitions between different stages of a conversation or debate. Each agent can focus on its specific 
role while the task structure handles the overall process's orchestration and flow, allowing a clear separation of 
concerns. The architecture and code transparency of Langroid's framework make it an incredible candidate for 
applications like debates where multiple agents must interact dynamically and responsively
based on a mixture of user inputs and automated responses.

### Task creation and Orchestration Example

```python
    user_task = Task(user_agent, interactive=interactive_setting, restart=False)
    ai_task = Task(ai_agent, interactive=False, single_round=True)
    user_task.add_sub_task(ai_task)
    if not llm_delegate:
        user_task.run(user_agent.user_message, turns=max_turns)
    else:
        user_task.run("get started", turns=max_turns)
    
```
Tasks can be easily set up as sub-tasks of an orchestrating agent. In this case user_task could be Pro or Con depending 
on the user selection. 

If you want to build custom tools/functions or use Langroid provided it is only a line of code using
`agent.enable_messaage`. Here is an example of `MetaphorSearchTool` and `DoneTool`. 
```python
        metaphor_search_agent.enable_message(MetaphorSearchTool)
        metaphor_search_agent.enable_message(DoneTool)
```

Overall I had a great learning experience using Langroid and recommend using it for any projects 
that need to utilize LLMs. I am already working on a few Langroid based information retrieval and research systems 
for use in medicine and hoping to contribute more soon. 

### Bio

I'm a high school senior at Khan Lab School located in Mountain View, CA where I host a student-run Podcast known as the
Khan-Cast. I also enjoy tinkering with interdisciplinary STEM projects. You can reach me on [LinkedIn](https://www.linkedin.com/in/adamshams/).
</file>

<file path="docs/blog/posts/test.md">
---
draft: true
date: 2022-01-31
authors: 
  - pchalasani
categories:
  - test
  - blog
comments: true
---

# Test code snippets

```python
from langroid.language_models.base import LLMMessage, Role
msg = LLMMessage(
        content="What is the capital of Bangladesh?",
        role=Role.USER,
      )
```

<!-- more -->


# Test math notation

A nice equation is $e^{i\pi} + 1 = 0$, which is known as Euler's identity.
Here is a cool equation too, and in display mode:

$$
e = mc^2
$$

# Latex with newlines

Serious latex with `\\` for newlines renders fine:

$$
\begin{bmatrix}
a & b \\
c & d \\
e & f \\
\end{bmatrix}
$$

or a multi-line equation

$$
\begin{aligned}
\dot{x} & = \sigma(y-x) \\
\dot{y} & = \rho x - y - xz \\
\dot{z} & = -\beta z + xy
\end{aligned}
$$

<iframe src="https://langroid.substack.com/embed" width="480" height="320" style="border:1px solid #EEE; background:white;" frameborder="0" scrolling="no"></iframe>
</file>

<file path="docs/blog/.authors.yml">
authors:
  pchalasani:
    name: Prasad Chalasani
    description: Langroid CoFounder
    avatar: https://github.com/pchalasani.png
  mohannad:
    name: Mohannad Alhanahnah
    description: Langroid Contributor
    avatar: https://avatars.githubusercontent.com/u/15859139
  nils:
    name: Nils Palumbo
    description: Phd Candidate (CS), UW-Madison; Langroid core dev.
    avatar: https://www.github.com/nilspalumbo.png
  jihye:
    name: Jihye Choi
    description: PhD Candidate (CS), UW-Madison
    avatar: https://www.github.com/jihyechoi77.png
  someshjha:
    name: Somesh Jha
    description: UW-Madison; Langroid CoFounder
    avatar: https://www.gravatar.com/avatar/?d=mp
  anivaryakumar:
    name: Anivarya Kumar
    description: Duke University
    avatar: https://www.gravatar.com/avatar/?d=mp
  davidpage:
    name: David Page
    description: Duke University
    avatar: https://www.gravatar.com/avatar/?d=mp
  mengelhard:
    name: Matthew Engelhard
    description: Duke University
    avatar: https://www.gravatar.com/avatar/?d=mp
  adamshams:
    name: Adam Shams
    description: Langroid Contributor, Khan Lab School
    avatar: https://avatars.githubusercontent.com/u/84205479
</file>

<file path="docs/blog/index.md">
# Blog
</file>

<file path="docs/demos/targeting/audience-targeting.md">
# Audience Targeting for a Business

Suppose you are a marketer for a business, trying to figure out which 
audience segments to target.
Your downstream systems require that you specify _standardized_ audience segments
to target, for example from the [IAB Audience Taxonomy](https://iabtechlab.com/standards/audience-taxonomy/).

There are thousands of standard audience segments, and normally you would need 
to search the list for potential segments that match what you think your ideal
customer profile is. This is a tedious, error-prone task.

But what if we can leverage an LLM such as GPT-4?
We know that GPT-4 has  skills that are ideally suited for this task:

- General knowledge about businesses and their ideal customers
- Ability to recognize which standard segments match an English description of a customer profile
- Ability to plan a conversation to get the information it needs to answer a question


Once you decide to use an LLM, you still need to figure out how to organize the 
various components of this task:

- **Research:** What are some ideal customer profiles for the business
- **Segmentation:** Which standard segments match an English description of a customer profile
- **Planning:** how to organize the task to identify a few standard segments

## Using Langroid Agents 

Langroid makes it intuitive and simple to build an LLM-powered system organized
around agents, each responsible for a different task.
In less than a day we built a 3-agent system to automate this task:

- The `Marketer` Agent is given the Planning role.
- The `Researcher` Agent is given the Research role, 
  and it has access to the business description. 
- The `Segmentor` Agent is given the Segmentation role. It has access to the 
  IAB Audience Taxonomy via a vector database, i.e. its rows have been mapped to
  vectors via an embedding model, and these vectors are stored in a vector-database. 
  Thus given an English description of a customer profile,
  the `Segmentor` Agent maps it to a vector using the embedding model,
  and retrieves the nearest (in vector terms, e.g. cosine similarity) 
  IAB Standard Segments from the vector-database. The Segmentor's LLM 
  further refines this by selecting the best-matching segments from the retrieved list.

To kick off the system, the human user describes a business in English,
or provides the URL of the business's website. 
The `Marketer` Agent sends
customer profile queries to the `Researcher`, who answers in plain English based on 
the business description, and the Marketer takes this description and sends it to the Segmentor,
who maps it to Standard IAB Segments. The task is done when the Marketer finds 4 Standard segments. 
The agents are depicted in the diagram below:

![targeting.png](targeting.png)

## An example: Glashutte Watches

The human user first provides the URL of the business, in this case:
```text
https://www.jomashop.com/glashutte-watches.html
```
From this URL, the `Researcher` agent summarizes its understanding of the business.
The `Marketer` agent starts by asking the `Researcher`:
``` 
Could you please describe the age groups and interests of our typical customer?
```
The `Researcher` responds with an English description of the customer profile:
```text
Our typical customer is a fashion-conscious individual between 20 and 45 years...
```
The `Researcher` forwards this English description to the `Segmentor` agent, who
maps it to a standardized segment, e.g.:
```text
Interest|Style & Fashion|Fashion Trends
...
```
This conversation continues until the `Marketer` agent has identified 4 standardized segments.

Here is what the conversation looks like:

![targeting.gif](targeting.gif)
</file>

<file path="docs/examples/agent-tree.md">
# Hierarchical computation with Langroid Agents 

Here is a simple example showing tree-structured computation
where each node in the tree is handled by a separate agent.
This is a toy numerical example, and illustrates:

- how to have agents organized in a hierarchical structure to accomplish a task 
- the use of global state accessible to all agents, and 
- the use of tools/function-calling.

## The Computation 

We want to carry out the following calculation for a given input number $n$:

```python
def Main(n):
    if n is odd:
        return (3*n+1) + n
    else:
        if n is divisible by 10:
            return n/10 + n
        else:
            return n/2 + n
```

## Using function composition

Imagine we want to do this calculation using a few auxiliary functions:

```python
def Main(n):
    # return non-null value computed by Odd or Even
    Record n as global variable # to be used by Adder below
    return Odd(n) or Even(n)

def Odd(n):
    # Handle odd n
    if n is odd:
        new = 3*n+1
        return Adder(new)
    else:
        return None
    
def Even(n):
    # Handle even n: return non-null value computed by EvenZ or EvenNZ
    return EvenZ(n) or EvenNZ(n)

def EvenZ(n):
    # Handle even n divisible by 10, i.e. ending in Zero
    if n is divisible by 10:
        new = n/10
        return Adder(new)
    else:
        return None
    
def EvenNZ(n):
    # Handle even n not divisible by 10, i.e. not ending in Zero
    if n is not divisible by 10:
        new = n/2
        return Adder(new)
    else:
        return None  

def Adder(new):
    # Add new to starting number, available as global variable n
    return new + n
```

## Mapping to a tree structure

This compositional/nested computation can be represented as a tree:

```plaintext
       Main
     /     \
  Even     Odd
  /   \        \
EvenZ  EvenNZ   Adder
  |      |
 Adder  Adder
```

Let us specify the behavior we would like for each node, in a 
"decoupled" way, i.e. we don't want a node to be aware of the other nodes.
As we see later, this decoupled design maps very well onto Langroid's
multi-agent task orchestration. To completely define the node behavior,
we need to specify how it handles an "incoming" number $n$ (from a parent node 
or user), and how it handles a "result" number $r$ (from a child node).

- `Main`: 
    - incoming $n$: simply send down $n$, record the starting number $n_0 = n$ as a global variable. 
    - result $r$: return $r$.
- `Odd`: 
    - incoming $n$: if n is odd, send down $3*n+1$, else return None
    - result $r$: return $r$
- `Even`: 
    - incoming $n$: if n is even, send down $n$, else return None
    - result $r$: return $r$
- `EvenZ`: (guaranteed by the tree hierarchy, to receive an even number.)  
    - incoming $n$: if n is divisible by 10, send down $n/10$, else return None
    - result $r$: return $r$
- `EvenNZ`: (guaranteed by the tree hierarchy, to receive an even number.)
    - incoming $n$: if n is not divisible by 10, send down $n/2$, else return None
    - result $r$: return $r$
- `Adder`:
    - incoming $n$: return $n + n_0$ where $n_0$ is the 
    starting number recorded by Main as a global variable.
    - result $r$: Not applicable since `Adder` is a leaf node.
  
## From tree nodes to Langroid Agents 

Let us see how we can perform this calculation using multiple Langroid agents, where

- we define an agent corresponding to each of the nodes above, namely 
`Main`, `Odd`, `Even`, `EvenZ`, `EvenNZ`, and `Adder`.
- we wrap each Agent into a Task, and use the `Task.add_subtask()` method to connect the agents into 
  the desired hierarchical structure.

Below is one way to do this using Langroid. We designed this with the following
desirable features:

- Decoupling: Each agent is instructed separately, without mention of any other agents
  (E.g. Even agent does not know about Odd Agent, EvenZ agent, etc).
  In particular, this means agents will not be "addressing" their message
  to specific other agents, e.g. send number to Odd agent when number is odd,
  etc. Allowing addressing would make the solution easier to implement,
  but would not be a decoupled solution.
  Instead, we want Agents to simply put the number "out there", and have it handled
  by an applicable agent, in the task loop (which consists of the agent's responders,
  plus any sub-task `run` methods).

- Simplicity: Keep the agent instructions relatively simple. We would not want a solution
  where we have to instruct the agents (their LLMs) in convoluted ways. 

One way naive solutions fail is because agents are not able to distinguish between
a number that is being "sent down" the tree as input, and a number that is being
"sent up" the tree as a result from a child node.

We use a simple trick: we instruct the LLM to mark returned values using the RESULT keyword,
and instruct the LLMs on how to handle numbers that come with RESULT keyword, and those that don't
In addition, we leverage some features of Langroid's task orchestration:

- When `llm_delegate` is `True`, if the LLM says `DONE [rest of msg]`, the task is
  considered done, and the result of the task is `[rest of msg]` (i.e the part after `DONE`).
- In the task loop's `step()` function (which seeks a valid message during a turn of
  the conversation) when any responder says `DO-NOT-KNOW`, it is not considered a valid
  message, and the search continues to other responders, in round-robin fashion.



See the [`chat-tree.py`](https://github.com/langroid/langroid/blob/main/examples/basic/chat-tree.py)
example for an implementation of this solution. You can run that example as follows:
```bash
python3 examples/basic/chat-tree.py
```
In the sections below we explain the code in more detail.

## Define the agents

Let us start with defining the configuration to be used by all agents:

```python
from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig
from langroid.language_models.openai_gpt import OpenAIChatModel, OpenAIGPTConfig

config = ChatAgentConfig(
  llm=OpenAIGPTConfig(
    chat_model=OpenAIChatModel.GPT4o,
  ),
  vecdb=None, # no need for a vector database
)
```

Next we define each of the agents, for example:

```python
main_agent = ChatAgent(config)
```

and similarly for the other agents.

## Wrap each Agent in a Task

To allow agent interactions, the first step is to wrap each agent in a Task.
When we define the task, we pass in the instructions above as part of the system message.
Recall the instructions for the `Main` agent:

- `Main`:
    - incoming $n$: simply send down $n$, record the starting number $n_0 = n$ as a global variable.
    - result $r$: return $r$.

We include the equivalent of these instructions in the `main_task` that wraps 
the `main_agent`:

```python
from langroid.agent.task import Task

main_task = Task(
    main_agent,
    name="Main",
    interactive=False, #(1)!
    system_message="""
          You will receive two types of messages, to which you will respond as follows:
          
          INPUT Message format: <number>
          In this case simply write the <number>, say nothing else.
          
          RESULT Message format: RESULT <number>
          In this case simply say "DONE <number>", e.g.:
          DONE 19
    
          To start off, ask the user for the initial number, 
          using the `ask_num` tool/function.
          """,
    llm_delegate=True, # allow LLM to control end of task via DONE
    single_round=False,
)
```

1. Non-interactive: don't wait for user input in each turn 

There are a couple of points to highlight about the `system_message` 
value in this task definition:

- When the `Main` agent receives just a number, it simply writes out that number,
  and in the Langroid Task loop, this number becomes the "current pending message"
  to be handled by one of the sub-tasks, i.e. `Even, Odd`. Note that these sub-tasks
  are _not_ mentioned in the system message, consistent with the decoupling principle.
- As soon as either of these sub-tasks returns a non-Null response, in the format "RESULT <number>", the `Main` agent
  is instructed to return this result saying "DONE <number>". Since `llm_delegate`
  is set to `True` (meaning the LLM can decide when the task has ended), 
  this causes the `Main` task to be considered finished and the task loop is exited.

Since we want the `Main` agent to record the initial number as a global variable,
we use a tool/function `AskNum` defined as follows 
(see [this section](../quick-start/chat-agent-tool.md) in the getting started guide 
for more details on Tools):

```python
from rich.prompt import Prompt
from langroid.agent.tool_message import ToolMessage


class AskNumTool(ToolMessage):
  request = "ask_num"
  purpose = "Ask user for the initial number"

  def handle(self) -> str:
    """
    This is a stateless tool (i.e. does not use any Agent member vars), so we can
    define the handler right here, instead of defining an `ask_num`
    method in the agent.
    """
    num = Prompt.ask("Enter a number")
    # record this in global state, so other agents can access it
    MyGlobalState.set_values(number=num)
    return str(num)
```

We then enable the `main_agent` to use and handle messages that conform to the 
`AskNum` tool spec:

```python
main_agent.enable_message(AskNumTool)
```

!!! tip "Using and Handling a tool/function"
    "Using" a tool means the agent's LLM _generates_ 
    the function-call (if using OpenAI function-calling) or 
    the JSON structure (if using Langroid's native tools mechanism) 
    corresponding to this tool. "Handling" a tool refers to the Agent's method 
    recognizing the tool and executing the corresponding code.


The tasks for other agents are defined similarly. We will only note here
that the `Adder` agent needs a special tool `AddNumTool` to be able to add the current number
to the initial number set by the `Main` agent. 

## Connect the tasks into a tree structure

So far, we have wrapped each agent in a task, in isolation, and there is no 
connection between the tasks. The final step is to connect the tasks to 
the tree structure we saw earlier:

```python
main_task.add_sub_task([even_task, odd_task])
even_task.add_sub_task([evenz_task, even_nz_task])
evenz_task.add_sub_task(adder_task)
even_nz_task.add_sub_task(adder_task)
odd_task.add_sub_task(adder_task)
```

Now all that remains is to run the main task:

```python
main_task.run()
```

Here is what a run starting with $n=12$ looks like:

![chat-tree.png](chat-tree.png)
</file>

<file path="docs/javascripts/mathjax.js">
document$.subscribe(() => {
MathJax.typesetPromise()
</file>

<file path="docs/notes/async-streaming.md">
# Suppressing output in async, streaming mode

Available since version 0.18.0

When using an LLM API in streaming + async mode, you may want to suppress output,
especially when concurrently running multiple instances of the API.
To suppress output in async + stream mode, 
you can set the `async_stream_quiet` flag in [`LLMConfig`][langroid.language_models.base.LLMConfig]
to `True` (this is the default). 
Note that [`OpenAIGPTConfig`][langroid.language_models.openai_gpt.OpenAIGPTConfig]
inherits from `LLMConfig`, so you can use this flag with `OpenAIGPTConfig` as well:

```python
import langroid.language_models as lm
llm_config = lm.OpenAIGPTConfig(
    async_stream_quiet=True,
    ...
)
```
</file>

<file path="docs/notes/chunking.md">
# Document Chunking/Splitting in Langroid

Langroid's [`ParsingConfig`][langroid.parsing.parser.ParsingConfig]
provides several document chunking strategies through the `Splitter` enum:

## 1. MARKDOWN (`Splitter.MARKDOWN`) (The default)

**Purpose**: Structure-aware splitting that preserves markdown formatting.

**How it works**:

- Preserves document hierarchy (headers and sections)
- Enriches chunks with header information
- Uses word count instead of token count (with adjustment factor)
- Supports "rollup" to maintain document structure
- Ideal for markdown documents where preserving formatting is important

## 2. TOKENS (`Splitter.TOKENS`)

**Purpose**: Creates chunks of approximately equal token size.

**How it works**:

- Tokenizes the text using tiktoken
- Aims for chunks of size `chunk_size` tokens (default: 200)
- Looks for natural breakpoints like punctuation or newlines
- Prefers splitting at sentence/paragraph boundaries
- Ensures chunks are at least `min_chunk_chars` long (default: 350)

## 3. PARA_SENTENCE (`Splitter.PARA_SENTENCE`)

**Purpose**: Splits documents respecting paragraph and sentence boundaries.

**How it works**:

- Recursively splits documents until chunks are below 1.3× the target size
- Maintains document structure by preserving natural paragraph breaks
- Adjusts chunk boundaries to avoid cutting in the middle of sentences
- Stops when it can't split chunks further without breaking coherence

## 4. SIMPLE (`Splitter.SIMPLE`)

**Purpose**: Basic splitting using predefined separators.

**How it works**:

- Uses a list of separators to split text (default: `["\n\n", "\n", " ", ""]`)
- Splits on the first separator in the list
- Doesn't attempt to balance chunk sizes
- Simplest and fastest splitting method


## Basic Configuration

```python
from langroid.parsing.parser import ParsingConfig, Splitter

config = ParsingConfig(
    splitter=Splitter.MARKDOWN,  # Most feature-rich option
    chunk_size=200,              # Target tokens per chunk
    chunk_size_variation=0.30,   # Allowed variation from target
    overlap=50,                  # Token overlap between chunks
    token_encoding_model="text-embedding-3-small"
)
```

## Format-Specific Configuration

```python
# Customize PDF parsing
config = ParsingConfig(
    splitter=Splitter.PARA_SENTENCE,
    pdf=PdfParsingConfig(
        library="pymupdf4llm"  # Default PDF parser
    )
)

# Use Gemini for PDF parsing
config = ParsingConfig(
    pdf=PdfParsingConfig(
        library="gemini",
        gemini_config=GeminiConfig(
            model_name="gemini-2.0-flash",
            requests_per_minute=5
        )
    )
)
```

# Setting Up Parsing Config in DocChatAgentConfig

You can configure document parsing when creating a `DocChatAgent` by customizing the `parsing` field within the `DocChatAgentConfig`. Here's how to do it:

```python
from langroid.agent.special.doc_chat_agent import DocChatAgentConfig  
from langroid.parsing.parser import ParsingConfig, Splitter, PdfParsingConfig

# Create a DocChatAgent with custom parsing configuration
agent_config = DocChatAgentConfig(
    parsing=ParsingConfig(
        # Choose the splitting strategy
        splitter=Splitter.MARKDOWN,  # Structure-aware splitting with header context
        
        # Configure chunk sizes
        chunk_size=800,              # Target tokens per chunk
        overlap=150,                 # Overlap between chunks
        
        # Configure chunk behavior
        max_chunks=5000,             # Maximum number of chunks to create
        min_chunk_chars=250,         # Minimum characters when truncating at punctuation
        discard_chunk_chars=10,      # Discard chunks smaller than this
        
        # Configure context window
        n_neighbor_ids=3,            # Store 3 chunk IDs on either side
        
        # Configure PDF parsing specifically
        pdf=PdfParsingConfig(
            library="pymupdf4llm",   # Choose PDF parsing library
        )
    )
)
```
</file>

<file path="docs/notes/code-injection-protection.md">
# Code Injection Protection with full_eval Flag

Available in Langroid since v0.53.15.

Langroid provides a security feature that helps protect against code injection vulnerabilities when evaluating pandas expressions in `TableChatAgent` and `VectorStore`. This protection is controlled by the `full_eval` flag, which defaults to `False` for maximum security, but can be set to `True` when working in trusted environments.

## Background

When executing dynamic pandas expressions within `TableChatAgent` and in `VectorStore.compute_from_docs()`, there is a risk of code injection if malicious input is provided. To mitigate this risk, Langroid implements a command sanitization system that validates and restricts the operations that can be performed.

## How It Works

The sanitization system uses AST (Abstract Syntax Tree) analysis to enforce a security policy that:

1. Restricts DataFrame methods to a safe whitelist
2. Prevents access to potentially dangerous methods and arguments
3. Limits expression depth and method chaining
4. Validates literals and numeric values to be within safe bounds
5. Blocks access to any variables other than the provided DataFrame

When `full_eval=False` (the default), all expressions are run through this sanitization process before evaluation. When `full_eval=True`, the sanitization is bypassed, allowing full access to pandas functionality.

## Configuration Options

### In TableChatAgent

```python
from langroid.agent.special.table_chat_agent import TableChatAgentConfig, TableChatAgent

config = TableChatAgentConfig(
    data=my_dataframe,
    full_eval=False,  # Default: True only for trusted input
)

agent = TableChatAgent(config)
```

### In VectorStore

```python
from langroid.vector_store.lancedb import LanceDBConfig, LanceDB

config = LanceDBConfig(
    collection_name="my_collection",
    full_eval=False,  # Default: True only for trusted input
)

vectorstore = LanceDB(config)
```

## When to Use full_eval=True

Set `full_eval=True` only when:

1. All input comes from trusted sources (not from users or external systems)
2. You need full pandas functionality that goes beyond the whitelisted methods
3. You're working in a controlled development or testing environment

## Security Considerations

- By default, `full_eval=False` provides a good balance of security and functionality
- The whitelisted operations support most common pandas operations
- Setting `full_eval=True` removes all protection and should be used with caution
- Even with protection, always validate input when possible

## Affected Classes

The `full_eval` flag affects the following components:

1. `TableChatAgentConfig` and `TableChatAgent` - Controls sanitization in the `pandas_eval` method
2. `VectorStoreConfig` and `VectorStore` - Controls sanitization in the `compute_from_docs` method
3. All implementations of `VectorStore` (ChromaDB, LanceDB, MeiliSearch, PineconeDB, PostgresDB, QdrantDB, WeaviateDB)

## Example: Safe Pandas Operations

When `full_eval=False`, the following operations are allowed:

```python
# Allowed operations (non-exhaustive list)
df.head()
df.groupby('column')['value'].mean()
df[df['column'] > 10]
df.sort_values('column', ascending=False)
df.pivot_table(...)
```

Some operations that might be blocked include:

```python
# Potentially blocked operations
df.eval("dangerous_expression")
df.query("dangerous_query")
df.apply(lambda x: dangerous_function(x))
```

## Testing Considerations

When writing tests that use `TableChatAgent` or `VectorStore.compute_from_docs()` with pandas expressions that go beyond the whitelisted operations, you may need to set `full_eval=True` to ensure the tests pass.
</file>

<file path="docs/notes/crawl4ai.md">
# Crawl4ai Crawler Documentation

## Overview

The `Crawl4aiCrawler` is a highly advanced and flexible web crawler integrated into Langroid, built on the powerful `crawl4ai` library. It uses a real browser engine (Playwright) to render web pages, making it exceptionally effective at handling modern, JavaScript-heavy websites. This crawler provides a rich set of features for simple page scraping, deep-site crawling, and sophisticated data extraction, making it the most powerful crawling option available in Langroid.

It is a local crawler, so no need for API keys.

## Installation

To use `Crawl4aiCrawler`, you must install the `crawl4ai` extra dependencies.

To install and prepare crawl4ai:

```bash
# Install langroid with crawl4ai support
pip install "langroid[crawl4ai]"
crawl4ai setup
crawl4ai doctor

```

> **Note**: The `crawl4ai setup` command will download Playwright browsers (Chromium, Firefox, WebKit) on first run. This is a one-time download that can be several hundred MB in size. The browsers are stored locally and used for rendering web pages.

## Key Features

- **Real Browser Rendering**: Accurately processes dynamic content, single-page applications (SPAs), and sites that require JavaScript execution.

- **Simple and Deep Crawling**: Can scrape a list of individual URLs (`simple` mode) or perform a recursive, deep crawl of a website starting from a seed URL (`deep` mode).

- **Powerful Extraction Strategies**:

  - **Structured JSON (No LLM)**: Extract data into a predefined JSON structure using CSS selectors, XPath, or Regex patterns. This is extremely fast, reliable, and cost-effective.

  - **LLM-Based Extraction**: Leverage Large Language Models (like GPT or Gemini) to extract data from unstructured content based on natural language instructions and a Pydantic schema.

- **Advanced Markdown Generation**: Go beyond basic HTML-to-markdown conversion. Apply content filters to prune irrelevant sections (sidebars, ads, footers) or use an LLM to intelligently reformat content for maximum relevance, perfect for RAG pipelines.

- **High-Performance Scraping**: Optionally use an LXML-based scraping strategy for a significant speed boost on large HTML documents.

- **Fine-Grained Configuration**: Offers detailed control over browser behavior (`BrowserConfig`) and individual crawl runs (`CrawlerRunConfig`) for advanced use cases.

## Configuration (`Crawl4aiConfig`)

The `Crawl4aiCrawler` is configured via the `Crawl4aiConfig` object. This class acts as a high-level interface to the underlying `crawl4ai` library's settings.

All of the strategies are optional.
Learn more about these strategies , browser_config and run_config at [Crawl4AI docs](https://docs.crawl4ai.com/)

```python
from langroid.parsing.url_loader import Crawl4aiConfig

# All parameters are optional and have sensible defaults
config = Crawl4aiConfig(
    crawl_mode="simple",  # or "deep"
    extraction_strategy=...,
    markdown_strategy=...,
    deep_crawl_strategy=...,
    scraping_strategy=...,
    browser_config=...,  # For advanced browser settings
    run_config=...,      # For advanced crawl-run settings
)
```

**Main Parameters:**

- `crawl_mode` (str):

  - `"simple"` (default): Crawls each URL in the provided list individually.

  - `"deep"`: Starts from the first URL in the list and recursively crawls linked pages based on the `deep_crawl_strategy`.

  - Make sure you are setting `"crawl_mode=deep"` whenever you are deep crawling this is crucial for smooth functioning.

- `extraction_strategy` (`ExtractionStrategy`): Defines how to extract structured data from a page. If set, the `Document.content` will be a **JSON string** containing the extracted data.

- `markdown_strategy` (`MarkdownGenerationStrategy`): Defines how to convert HTML to markdown. This is used when `extraction_strategy` is not set. The `Document.content` will be a **markdown string**.

- `deep_crawl_strategy` (`DeepCrawlStrategy`): Configuration for deep crawling, such as `max_depth`, `max_pages`, and URL filters. Only used when `crawl_mode` is `"deep"`.

- `scraping_strategy` (`ContentScrapingStrategy`): Specifies the underlying HTML parsing engine. Useful for performance tuning.

- `browser_config` & `run_config`: For advanced users to pass detailed `BrowserConfig` and `CrawlerRunConfig` objects directly from the `crawl4ai` library.

---

## Usage Examples

These are representative examples. For runnable examples check the script [`examples/docqa/crawl4ai_examples.py`](https://github.com/langroid/langroid/blob/main/examples/docqa/crawl4ai_examples.py)

### 1. Simple Crawling (Default Markdown)

This is the most basic usage. It will fetch the content of each URL and convert it to clean markdown.

```python
from langroid.parsing.url_loader import URLLoader, Crawl4aiConfig

urls = [
    "https://pytorch.org/",
    "https://techcrunch.com/",
]

# Use default settings
crawler_config = Crawl4aiConfig()
loader = URLLoader(urls=urls, crawler_config=crawler_config)

docs = loader.load()
for doc in docs:
    print(f"URL: {doc.metadata.source}")
    print(f"Content (first 200 chars): {doc.content[:200]}")
```

### 2. Structured JSON Extraction (No LLM)

When you need to extract specific, repeated data fields from a page, schema-based extraction is the best choice. It's fast, precise, and free of LLM costs. The result in `Document.content` is a JSON string.

#### a. Using CSS Selectors (`JsonCssExtractionStrategy`)

This example scrapes titles and links from the Hacker News front page.

```python
import json
from langroid.parsing.url_loader import URLLoader, Crawl4aiConfig
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy

HACKER_NEWS_URL = "https://news.ycombinator.com"
HACKER_NEWS_SCHEMA = {
    "name": "HackerNewsArticles",
    "baseSelector": "tr.athing",
    "fields": [
        {"name": "title", "selector": "span.titleline > a", "type": "text"},
        {"name": "link", "selector": "span.titleline > a", "type": "attribute", "attribute": "href"},
    ],
}

# Create the strategy and pass it to the config
css_strategy = JsonCssExtractionStrategy(schema=HACKER_NEWS_SCHEMA)
crawler_config = Crawl4aiConfig(extraction_strategy=css_strategy)

loader = URLLoader(urls=[HACKER_NEWS_URL], crawler_config=crawler_config)
documents = loader.load()

# The Document.content will contain the JSON string
extracted_data = json.loads(documents[0].content)
print(json.dumps(extracted_data[:3], indent=2))
```

#### b. Using Regex (`RegexExtractionStrategy`)

This is ideal for finding common patterns like emails, URLs, or phone numbers.

```python
from langroid.parsing.url_loader import URLLoader, Crawl4aiConfig
from crawl4ai.extraction_strategy import RegexExtractionStrategy

url = "https://www.scrapethissite.com/pages/forms/"

# Combine multiple built-in patterns
regex_strategy = RegexExtractionStrategy(
    pattern=(
        RegexExtractionStrategy.Email
        | RegexExtractionStrategy.Url
        | RegexExtractionStrategy.PhoneUS
    )
)

crawler_config = Crawl4aiConfig(extraction_strategy=regex_strategy)
loader = URLLoader(urls=[url], crawler_config=crawler_config)
documents = loader.load()

print(documents[0].content)
```

### 3. Advanced Markdown Generation

For RAG applications, the quality of the markdown is crucial. These strategies produce highly relevant, clean text. The result in `Document.content` is the filtered markdown (`fit_markdown`).

#### a. Pruning Filter (`PruningContentFilter`)

This filter heuristically removes boilerplate content based on text density, link density, and common noisy tags.

```python
from langroid.parsing.url_loader import URLLoader, Crawl4aiConfig
from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
from crawl4ai.content_filter_strategy import PruningContentFilter

prune_filter = PruningContentFilter(threshold=0.6, min_word_threshold=10)
md_generator = DefaultMarkdownGenerator(
    content_filter=prune_filter,
    options={"ignore_links": True}
)

crawler_config = Crawl4aiConfig(markdown_strategy=md_generator)
loader = URLLoader(urls=["https://news.ycombinator.com"], crawler_config=crawler_config)
docs = loader.load()

print(docs[0].content[:500])
```

#### b. LLM Filter (`LLMContentFilter`)

Use an LLM to semantically understand the content and extract only the relevant parts based on your instructions. This is extremely powerful for creating topic-focused documents.

```python
import os
from langroid.parsing.url_loader import URLLoader, Crawl4aiConfig
from crawl4ai.async_configs import LLMConfig
from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
from crawl4ai.content_filter_strategy import LLMContentFilter

# Requires an API key, e.g., OPENAI_API_KEY
llm_filter = LLMContentFilter(
    llm_config=LLMConfig(
        provider="openai/gpt-4o-mini",
        api_token=os.getenv("OPENAI_API_KEY"),
    ),
    instruction="""
    Extract only the main article content.
    Exclude all navigation, sidebars, comments, and footer content.
    Format the output as clean, readable markdown.
    """,
    chunk_token_threshold=4096,
)

md_generator = DefaultMarkdownGenerator(content_filter=llm_filter)
crawler_config = Crawl4aiConfig(markdown_strategy=md_generator)
loader = URLLoader(urls=["https://www.theverge.com/tech"], crawler_config=crawler_config)
docs = loader.load()

print(docs[0].content)
```

### 4. Deep Crawling

To crawl an entire website or a specific section, use `deep` mode.

Recommended setting is BestFirstCrawlingStrategy

```python
from langroid.parsing.url_loader import URLLoader, Crawl4aiConfig
from crawl4ai.deep_crawling import BestFirstCrawlingStrategy
from crawl4ai.deep_crawling.filters import FilterChain, URLPatternFilter


deep_crawl_strategy = BestFirstCrawlingStrategy(
    max_depth=2,
    include_external=False,
    max_pages=25,              # Maximum number of pages to crawl (optional)
    filter_chain=FilterChain([URLPatternFilter(patterns=["*core*"])]) # Pattern matching for granular control (optional)
)

crawler_config = Crawl4aiConfig(
    crawl_mode="deep",
    deep_crawl_strategy=deep_crawl_strategy
)

loader = URLLoader(urls=["https://docs.crawl4ai.com/"], crawler_config=crawler_config)
docs = loader.load()

print(f"Crawled {len(docs)} pages.")
for doc in docs:
    print(f"- {doc.metadata.source}")
```

### 5. High-Performance Scraping (`LXMLWebScrapingStrategy`)

For a performance boost, especially on very large, static HTML pages, switch the scraping strategy to LXML.

```python
from langroid.parsing.url_loader import URLLoader, Crawl4aiConfig
from crawl4ai.content_scraping_strategy import LXMLWebScrapingStrategy

crawler_config = Crawl4aiConfig(
    scraping_strategy=LXMLWebScrapingStrategy()
)

loader = URLLoader(urls=["https://www.nbcnews.com/business"], crawler_config=crawler_config)
docs = loader.load()
print(f"Content Length: {len(docs[0].content)}")
```

### 6. LLM-Based JSON Extraction (`LLMExtractionStrategy`)

When data is unstructured or requires semantic interpretation, use an LLM for extraction. This is slower and more expensive but incredibly flexible. The result in `Document.content` is a JSON string.

```python
import os
import json
from langroid.pydantic_v1 import BaseModel, Field
from typing import Optional
from langroid.parsing.url_loader import URLLoader, Crawl4aiConfig
from crawl4ai.async_configs import LLMConfig
from crawl4ai.extraction_strategy import LLMExtractionStrategy

# Define the data structure you want to extract
class ArticleData(BaseModel):
    headline: str
    summary: str = Field(description="A short summary of the article")
    author: Optional[str] = None

# Configure the LLM strategy
llm_strategy = LLMExtractionStrategy(
    llm_config=LLMConfig(
        provider="openai/gpt-4o-mini",
        api_token=os.getenv("OPENAI_API_KEY"),
    ),
    schema=ArticleData.schema_json(),
    extraction_type="schema",
    instruction="Extract the headline, summary, and author of the main article.",
)

crawler_config = Crawl4aiConfig(extraction_strategy=llm_strategy)
loader = URLLoader(urls=["https://news.ycombinator.com"], crawler_config=crawler_config)
docs = loader.load()

extracted_data = json.loads(docs[0].content)
print(json.dumps(extracted_data, indent=2))
```

## How It Handles Different Content Types

The `Crawl4aiCrawler` is smart about handling different types of URLs:

- **Web Pages** (e.g., `http://...`, `https://...`): These are processed by the `crawl4ai` browser engine. The output format (`markdown` or `JSON`) depends on the strategy you configure in `Crawl4aiConfig`.
- **Local and Remote Documents** (e.g., URLs ending in `.pdf`, `.docx`): These are automatically detected and delegated to Langroid's internal `DocumentParser`. This ensures that documents are properly parsed and chunked according to your `ParsingConfig`, just like with other Langroid tools.

## Conclusion

The `Crawl4aiCrawler` is a feature-rich, powerful tool for any web-based data extraction task.

- For **simple, clean text**, use the default `Crawl4aiConfig`.

- For **structured data from consistent sites**, use `JsonCssExtractionStrategy` or `RegexExtractionStrategy` for unbeatable speed and reliability.

- To create **high-quality, focused content for RAG**, use `PruningContentFilter` or the `LLMContentFilter` with the `DefaultMarkdownGenerator`.

- To scrape an **entire website**, use `deep_crawl_strategy` with `crawl_mode="deep"`.

- For **complex or unstructured data** that needs AI interpretation, `LLMExtractionStrategy` provides a flexible solution.
</file>

<file path="docs/notes/custom-azure-client.md">
# Custom Azure OpenAI client

!!! warning "This is only for using a Custom Azure OpenAI client"
    This note **only** meant for those who are trying to use a custom Azure client,
    and is NOT TYPICAL for most users. For typical usage of Azure-deployed models with Langroid, see
    the [docs](https://langroid.github.io/langroid/notes/azure-openai-models/), 
    the [`test_azure_openai.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_azure_openai.py) and
    [`example/basic/chat.py`](https://github.com/langroid/langroid/blob/main/examples/basic/chat.py)


Example showing how to use Langroid with Azure OpenAI and Entra ID
authentication by providing a custom client.

By default, Langroid manages the configuration and creation 
of the Azure OpenAI client (see the [Setup guide](https://langroid.github.io/langroid/quick-start/setup/#microsoft-azure-openai-setupoptional)
for details). In most cases, the available configuration options
are sufficient, but if you need to manage any options that
are not exposed, you instead have the option of providing a custom
client, in Langroid v0.29.0 and later. 

In order to use a custom client, you must provide a function that
returns the configured client. Depending on whether you need to make
synchronous or asynchronous calls, you need to provide the appropriate
client. A sketch of how this is done (supporting both sync and async calls)
is given below:

```python
def get_azure_openai_client():
    return AzureOpenAI(...)

def get_azure_openai_async_client():
    return AsyncAzureOpenAI(...)

lm_config = lm.AzureConfig(
    azure_openai_client_provider=get_azure_openai_client,
    azure_openai_async_client_provider=get_azure_openai_async_client,
)
```

## Microsoft Entra ID Authentication

A key use case for a custom client is [Microsoft Entra ID authentication](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/managed-identity).
Here you need to provide an `azure_ad_token_provider` to the client. 
For examples on this, see [examples/basic/chat-azure-client.py](https://github.com/langroid/langroid/blob/main/examples/basic/chat-azure-client.py) 
and [examples/basic/chat-azure-async-client.py](https://github.com/langroid/langroid/blob/main/examples/basic/chat-azure-async-client.py).
</file>

<file path="docs/notes/enriching-for-retrieval.md">
# Enriching Chunked Documents for Better Retrieval

Available in Langroid v0.34.0 or later. 

When using the `DocChatAgent` for RAG with documents in highly specialized/technical
domains, retrieval accuracy may be low since embeddings are not sufficient to capture 
relationships between entities, e.g. suppose a document-chunk consists of a medical 
test name "BUN" (Blood Urea Nitrogen), and a retrieval query is looking for 
tests related to kidney function, the embedding for "BUN" may not be close to the
embedding for "kidney function", and the chunk may not be retrieved.

In such cases it is useful to *enrich* the chunked documents with additional keywords
(or even "hypothetical questions") to increase the "semantic surface area" of the chunk,
so that the chunk is more likely to be retrieved for relevant queries.

As of Langroid v0.34.0, you can provide a `chunk_enrichment_config` 
of type `ChunkEnrichmentAgentConfig`, in the `DocChatAgentConfig`. 
This config extends `ChatAgentConfig` and has the following fields:

- `batch_size` (int): The batch size for the chunk enrichment agent. Default is 50.
- `delimiter` (str): The delimiter to use when 
   concatenating the chunk and the enriched text. 
- `enrichment_prompt_fn`: function (`str->str`) that creates a prompt
  from a doc-chunk string `x`

In the above medical test example, suppose we want to augment a chunk containing
only the medical test name, with the organ system it is related to. We can set up
a `ChunkEnrichmentAgentConfig` as follows:

```python
from langroid.agent.special.doc.doc_chat_agent import (
    ChunkEnrichmentAgentConfig,
)

enrichment_config = ChunkEnrichmentAgentConfig(
    batch_size=10,
    system_message=f"""
        You are an experienced clinical physician, very well-versed in
        medical tests and their names.
        You will be asked to identify WHICH ORGAN(s) Function/Health
        a test name is most closely associated with, to aid in 
        retrieving the medical test names more accurately from an embeddings db
        that contains thousands of such test names.
        The idea is to use the ORGAN NAME(S) provided by you, 
        to make the right test names easier to discover via keyword-matching
        or semantic (embedding) similarity.
         Your job is to generate up to 3 ORGAN NAMES
         MOST CLOSELY associated with the test name shown, ONE PER LINE.
         DO NOT SAY ANYTHING ELSE, and DO NOT BE OBLIGATED to provide 3 organs --
         if there is just one or two that are most relevant, that is fine.
        Examples:
          "cholesterol" -> "heart function", 
          "LDL" -> "artery health", etc,
          "PSA" -> "prostate health", 
          "TSH" -> "thyroid function", etc.                
        """,
    enrichment_prompt_fn=lambda test: f"""
        Which ORGAN(S) Function/Health is the medical test named 
        '{test}' most closely associated with?
        """,
)

doc_agent_config = DocChatAgentConfig(
    chunk_enrichment_config=enrichment_config,
    ...
)
```

This works as follows:

- Before ingesting document-chunks into the vector-db, a specialized 
  "chunk enrichment" agent is created, configured with the `enrichment_config` above.
- For each document-chunk `x`, the agent's `llm_response_forget_async` method is called
 using the prompt created by `enrichment_prompt_fn(x)`. The resulting response text 
 `y` is concatenated with the original chunk text `x` using the `delimiter`,
  before storing in the vector-db. This is done in batches of size `batch_size`.
- At query time, after chunk retrieval, before generating the final LLM response,
  the enrichments are stripped from the retrieved chunks, and the original content
  of the retrieved chunks are passed to the LLM for generating the final response.

See the script 
[`examples/docqa/doc-chunk-enrich.py`](https://github.com/langroid/langroid/blob/main/examples/docqa/doc-chunk-enrich.py)
for a complete example. Also see the tests related to "enrichment" in 
[`test_doc_chat_agent.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_doc_chat_agent.py).
</file>

<file path="docs/notes/file-input.md">
# PDF Files and Image inputs to LLMs

Langroid supports sending PDF files and images (either URLs or local files)
directly to Large Language Models with multi-modal 
capabilities. This feature allows models to "see" files and other documents,
and works with most multi-modal models served via an OpenAI-compatible API,
e.g.:

- OpenAI's GPT-4o series and GPT-4.1 series
- Gemini models
- Claude series models (via OpenAI-compatible providers like OpenRouter or LiteLLM )

To see example usage, see:

- tests: [test_llm.py](https://github.com/langroid/langroid/blob/main/tests/main/test_llm.py), 
   [test_llm_async.py](https://github.com/langroid/langroid/blob/main/tests/main/test_llm_async.py),
   [test_chat-agent.py](https://github.com/langroid/langroid/blob/main/tests/main/test_chat_agent.py).
- example script: [pdf-json-no-parse.py](https://github.com/langroid/langroid/blob/main/examples/extract/pdf-json-no-parse.py), which shows
  how you can directly extract structured information from a document 
  **without having to first parse it to markdown** (which is inherently lossy).

## Basic Usage directly with LLM `chat` and `achat` methods

First create a `FileAttachment` object using one of the `from_` methods.
For image (`png`, `jpg/jpeg`) files you can use `FileAttachment.from_path(p)`
where `p` is either a local file path, or a http/https URL.
For PDF files, you can use `from_path` with a local file, or `from_bytes` or `from_io`
(see below). In the examples below we show only `pdf` examples.

```python
from langroid.language_models.base import LLMMessage, Role
from langroid.parsing.file_attachment import FileAttachment
import langroid.language_models as lm

# Create a file attachment
attachment = FileAttachment.from_path("path/to/document.pdf")

# Create messages with attachment
messages = [
    LLMMessage(role=Role.SYSTEM, content="You are a helpful assistant."),
    LLMMessage(
        role=Role.USER, content="What's the title of this document?", 
        files=[attachment]
    )
]

# Set up LLM with model that supports attachments
llm = lm.OpenAIGPT(lm.OpenAIGPTConfig(chat_model=lm.OpenAIChatModel.GPT4o))

# Get response
response = llm.chat(messages=messages)
```

## Supported File Formats

Currently the OpenAI-API supports:

- PDF files (including image-based PDFs)
- image files and URLs


## Creating Attachments

There are multiple ways to create file attachments:

```python
# From a file path
attachment = FileAttachment.from_path("path/to/file.pdf")

# From bytes
with open("path/to/file.pdf", "rb") as f:
    attachment = FileAttachment.from_bytes(f.read(), filename="document.pdf")

# From a file-like object
from io import BytesIO
file_obj = BytesIO(pdf_bytes)
attachment = FileAttachment.from_io(file_obj, filename="document.pdf")
```

## Follow-up Questions

You can continue the conversation with follow-up questions that reference the attached files:

```python
messages.append(LLMMessage(role=Role.ASSISTANT, content=response.message))
messages.append(LLMMessage(role=Role.USER, content="What is the main topic?"))
response = llm.chat(messages=messages)
```

## Multiple Attachments

Langroid allows multiple files can be sent in a single message,
but as of 16 Apr 2025, sending multiple PDF files does not appear to be properly supported in the 
APIs (they seem to only use the last file attached), although sending multiple 
images does work. 

```python
messages = [
    LLMMessage(
        role=Role.USER,
        content="Compare these documents",
        files=[attachment1, attachment2]
    )
]
```

## Using File Attachments with Agents

Agents can process file attachments as well, in the `llm_response` method,
which takes a `ChatDocument` object as input. 
To pass in file attachments, include the `files` field in the `ChatDocument`,
in addition to the content:

```python
import langroid as lr
from langroid.agent.chat_document import ChatDocument, ChatDocMetaData
from langroid.mytypes import Entity


agent = lr.ChatAgent(lr.ChatAgentConfig())

user_input = ChatDocument(
    content="What is the title of this document?",
    files=[attachment],
    metadata=ChatDocMetaData(
        sender=Entity.USER,
    )
)
# or more simply, use the agent's `create_user_response` method:
# user_input = agent.create_user_response(
#     content="What is the title of this document?",
#     files=[attachment],    
# )
response = agent.llm_response(user_input)
```


## Using File Attachments with Tasks

In Langroid,  `Task.run()` can take a `ChatDocument` object as input,
and as mentioned above, it can contain attached files in the `files` field.
To ensure proper orchestration, you'd want to properly set various `metadata` fields
as well, such as `sender`, etc. Langroid provides a convenient 
`create_user_response` method to create a `ChatDocument` object with the necessary 
metadata, so you only need to specify the `content` and `files` fields:


```python
from langroid.parsing.file_attachment import FileAttachment
from langroid.agent.task import Task

agent = ...
# Create task
task = Task(agent, interactive=True)

# Create a file attachment
attachment = FileAttachment.from_path("path/to/document.pdf")

# Create input with attachment
input_message = agent.create_user_response(
    content="Extract data from this document",
    files=[attachment]
)

# Run task with file attachment
result = task.run(input_message)
```

See the script [`pdf-json-no-parse.py`](https://github.com/langroid/langroid/blob/main/examples/extract/pdf-json-no-parse.py)
for a complete example of using file attachments with tasks.

## Practical Applications

- PDF document analysis and data extraction
- Report summarization
- Structured information extraction from documents
- Visual content analysis

For more complex applications, consider using the Task and Agent infrastructure in 
Langroid to orchestrate multi-step document processing workflows.
</file>

<file path="docs/notes/glhf-chat.md">
# Support for Open LLMs hosted on glhf.chat

Available since v0.23.0.

If you're looking to use Langroid with one of the recent performant Open LLMs,
such as `Qwen2.5-Coder-32B-Instruct`, you can do so using our glhf.chat integration.

See [glhf.chat](https://glhf.chat/chat/create) for a list of available models.

To run with one of these models, 
set the chat_model in the `OpenAIGPTConfig` to `"glhf/<model_name>"`, 
where model_name is hf: followed by the HuggingFace repo path, 
e.g. `Qwen/Qwen2.5-Coder-32B-Instruct`, 
so the full chat_model would be `"glhf/hf:Qwen/Qwen2.5-Coder-32B-Instruct"`.

Also many of the example scripts in the main repo (under the `examples` directory) can
be run with this and other LLMs using the model-switch cli arg `-m <model>`, e.g.

```bash
python3 examples/basic/chat.py -m glhf/hf:Qwen/Qwen2.5-Coder-32B-Instruct
```

Additionally, you can run many of the tests in the `tests` directory with this model
instead of the default OpenAI `GPT4o` using `--m <model>`, e.g. 

```bash
pytest tests/main/test_chat_agent.py --m glhf/hf:Qwen/Qwen2.5-Coder-32B-Instruct
```

For more info on running langroid with Open LLMs via other providers/hosting services,
see our
[guide to using Langroid with local/open LLMs](https://langroid.github.io/langroid/tutorials/local-llm-setup/#local-llms-hosted-on-glhfchat).
</file>

<file path="docs/notes/html-logger.md">
# HTML Logger

The HTML logger creates interactive, self-contained HTML files that make it easy to navigate complex multi-agent conversations in Langroid.

## Enabling the HTML Logger

The HTML logger is **enabled by default** in `TaskConfig`:

```python
import langroid as lr

# HTML logging is automatically enabled
task = lr.Task(agent)

# To disable HTML logging
task = lr.Task(agent, config=lr.TaskConfig(enable_html_logging=False))

# To change the log directory (default is "logs/")
task = lr.Task(agent, config=lr.TaskConfig(logs_dir="my_logs"))
```

## Log Files

Langroid creates three types of log files in the `logs/` directory:

1. **HTML Log**: `<name>.html` - Interactive, collapsible view
2. **Plain Text Log**: `<name>.log` - Traditional text log with colors
3. **TSV Log**: `<name>.tsv` - Tab-separated values for data analysis

The `<name>` is determined by:

- The task name (if specified)
- Otherwise, the agent name
- Falls back to "root" if neither is specified

When a task starts, you'll see a clickable `file://` link in the console:
```
WARNING - 📊 HTML Log: file:///path/to/logs/task-name.html
```

## Key Features

### Collapsible Entries
Each log entry can be expanded/collapsed to show different levels of detail:

- **Collapsed**: Shows only the entity type (USER, LLM, AGENT) and preview
- **Expanded**: Shows full message content, tools, and sub-sections

### Visual Hierarchy
- **Important responses** are shown at full opacity
- **Intermediate steps** are faded (0.4 opacity)
- Color-coded entities: USER (blue), LLM (green), AGENT (orange), SYSTEM (gray)

### Tool Visibility
Tools are clearly displayed with:

- Tool name and parameters
- Collapsible sections showing raw tool calls
- Visual indicators for tool results

### Auto-Refresh
The HTML page automatically refreshes every 2 seconds to show new log entries as they're written.

### Persistent UI State
Your view preferences are preserved across refreshes:

- Expanded/collapsed entries remain in their state
- Filter settings are remembered

## Example

Here's what the HTML logger looks like for a planner workflow:

![HTML Logger Screenshot](../screenshots/planner-workflow-html-logs.png)

In this example from `examples/basic/planner-workflow-simple.py`, you can see:

- The planner agent orchestrating multiple tool calls
- Clear visibility of `IncrementTool` and `DoublingTool` usage
- The filtered view showing only important responses
- Collapsible tool sections with parameters

## Benefits

1. **Easy Navigation**: Quickly expand/collapse entries to focus on what matters
2. **Tool Clarity**: See exactly which tools were called with what parameters
3. **Real-time Updates**: Watch logs update automatically as your task runs
4. **Filtered Views**: Use "Show only important responses" to hide intermediate steps
</file>

<file path="docs/notes/knowledge-graphs.md">
# Knowledge-graph support

Langroid can be used to set up natural-language conversations with knowledge graphs.
Currently the two most popular knowledge graphs are supported:

## Neo4j

- [implementation](https://github.com/langroid/langroid/tree/main/langroid/agent/special/neo4j)
- test: [test_neo4j_chat_agent.py](https://github.com/langroid/langroid/blob/main/tests/main/test_neo4j_chat_agent.py)
- examples: [chat-neo4j.py](https://github.com/langroid/langroid/blob/main/examples/kg-chat/chat-neo4j.py) 

## ArangoDB

Available with Langroid v0.20.1 and later.

Uses the [python-arangodb](https://github.com/arangodb/python-arango) library.

- [implementation](https://github.com/langroid/langroid/tree/main/langroid/agent/special/arangodb)
- tests: [test_arangodb.py](https://github.com/langroid/langroid/blob/main/tests/main/test_arangodb.py), [test_arangodb_chat_agent.py](https://github.com/langroid/langroid/blob/main/tests/main/test_arangodb_chat_agent.py)
- example: [chat-arangodb.py](https://github.com/langroid/langroid/blob/main/examples/kg-chat/chat-arangodb.py)
</file>

<file path="docs/notes/langdb.md">
# LangDB with Langroid

## Introduction

[LangDB](https://langdb.ai/) is an AI gateway that provides OpenAI-compatible APIs to access 250+ LLMs. It offers cost control, observability, and performance benchmarking while enabling seamless switching between models. 
Langroid has a simple integration with LangDB's API service, so there are no dependencies
to install. (LangDB also has a self-hosted version, which is not yet supported in Langroid).

## Setup environment variables

At minimum, ensure you have these env vars in your `.env` file:

```
LANGDB_API_KEY=your_api_key_here
LANGDB_PROJECT_ID=your_project_id_here
```

## Using LangDB with Langroid

### Configure LLM and Embeddings

In `OpenAIGPTConfig`, when you specify the `chat_model` with a `langdb/` prefix,
langroid uses the API key, `project_id` and other langDB-specific parameters
from the `langdb_params` field; if any of these are specified in the `.env` file
or in the environment explicitly, they will override the values in `langdb_params`.
For example, to use Anthropic's Claude-3.7-Sonnet model, 
set `chat_model="langdb/anthropic/claude-3.7-sonnet", as shown below. 
You can entirely omit the `langdb_params` field if you have already set up 
the fields as environment variables in your `.env` file, e.g. the `api_key`
and `project_id` are read from the environment variables 
`LANGDB_API_KEY` and `LANGDB_PROJECT_ID` respectively, and similarly for
the other fields (which are optional).

```python
import os
import uuid
from langroid.language_models.openai_gpt import OpenAIGPTConfig, LangDBParams
from langroid.embedding_models.models import OpenAIEmbeddingsConfig

# Generate tracking IDs (optional)
thread_id = str(uuid.uuid4())
run_id = str(uuid.uuid4())

# Configure LLM
llm_config = OpenAIGPTConfig(
    chat_model="langdb/anthropic/claude-3.7-sonnet",
    # omit the langdb_params field if you're not using custom tracking,
    # or if all its fields are provided in env vars, like
    # LANGDB_API_KEY, LANGDB_PROJECT_ID, LANGDB_RUN_ID, LANGDB_THREAD_ID, etc.
    langdb_params=LangDBParams(
        label='my-app',
        thread_id=thread_id,
        run_id=run_id,
        # api_key, project_id are read from .env or environment variables
        # LANGDB_API_KEY, LANGDB_PROJECT_ID respectively.
    )
)
```

Similarly, you can configure the embeddings using `OpenAIEmbeddingsConfig`,
which also has a `langdb_params` field that works the same way as 
in `OpenAIGPTConfig` (i.e. it uses the API key and project ID from the environment
if provided, otherwise uses the default values in `langdb_params`). Again the
`langdb_params` does not need to be specified explicitly, if you've already
set up the environment variables in your `.env` file.

```python
# Configure embeddings
embedding_config = OpenAIEmbeddingsConfig(
    model_name="langdb/openai/text-embedding-3-small",
)
```

## Tracking and Observability

LangDB provides special headers for request tracking:

- `x-label`: Tag requests for filtering in the dashboard
- `x-thread-id`: Track conversation threads (UUID format)
- `x-run-id`: Group related requests together

## Examples

The `langroid/examples/langdb/` directory contains examples demonstrating:

1. **RAG with LangDB**: `langdb_chat_agent_docs.py`
2. **LangDB with Function Calling**: `langdb_chat_agent_tool.py`
3. **Custom Headers**: `langdb_custom_headers.py`

## Viewing Results

Visit the [LangDB Dashboard](https://dashboard.langdb.com) to:
- Filter requests by label, thread ID, or run ID
- View detailed request/response information
- Analyze token usage and costs

For more information, visit [LangDB Documentation](https://docs.langdb.com).

See example scripts [here](https://github.com/langroid/langroid/tree/main/examples/langdb)
</file>

<file path="docs/notes/large-tool-results.md">
# Handling large tool results

Available since Langroid v0.22.0.

In some cases, the result of handling a `ToolMessage` could be very large,
e.g. when the Tool is a database query that returns a large number of rows,
or a large schema. When used in a task loop, this large result may then be
sent to the LLM to generate a response, which in some scenarios may not
be desirable, as it increases latency, token-cost and distractions. 
Langroid allows you to set two optional parameters in a `ToolMessage` to
handle this situation:

- `_max_result_tokens`: *immediately* truncate the result to this number of tokens.
- `_max_retained_tokens`: *after* a responder (typically the LLM) responds to this 
   tool result (which optionally may already have been 
   truncated via `_max_result_tokens`),
   edit the message history to truncate the result to this number of tokens.

You can set one, both or none of these parameters. If you set both, you would 
want to set `_max_retained_tokens` to a smaller number than `_max_result_tokens`.

See the test `test_reduce_raw_tool_result` in `test_tool_messages.py` for an example.

Here is a conceptual example. Suppose there is a Tool called `MyTool`,
with parameters `_max_result_tokens=20` and `_max_retained_tokens=10`.
Imagine a task loop where the user says "hello", 
and then LLM generates a call to `MyTool`, 
and the tool handler (i.e. `agent_response`) generates a result of 100 tokens.
This result is immediately truncated to 20 tokens, and then the LLM responds to it
with a message `response`.


The agent's message history looks like this:

```
1. System msg.
2. user: hello
3. LLM: MyTool
4. Agent (Tool handler): 100-token result => reduced to 20 tokens
5. LLM: response
```

Immediately after the LLM's response at step 5, the message history is edited
so that the message contents at position 4 are truncated to 10 tokens,
as specified by `_max_retained_tokens`.
</file>

<file path="docs/notes/litellm-proxy.md">
# Using LiteLLM Proxy with OpenAIGPTConfig

You can easily configure Langroid to use LiteLLM proxy for accessing models with a 
simple prefix `litellm-proxy/` in the `chat_model` name:

## Using the `litellm-proxy/` prefix

When you specify a model with the `litellm-proxy/` prefix, Langroid automatically uses the LiteLLM proxy configuration:

```python
from langroid.language_models.openai_gpt import OpenAIGPTConfig

config = OpenAIGPTConfig(
    chat_model="litellm-proxy/your-model-name"
)
```

## Setting LiteLLM Proxy Parameters

When using the `litellm-proxy/` prefix, Langroid will read connection parameters from either:

1. The `litellm_proxy` config object:
   ```python
   from langroid.language_models.openai_gpt import OpenAIGPTConfig, LiteLLMProxyConfig
   
   config = OpenAIGPTConfig(
       chat_model="litellm-proxy/your-model-name",
       litellm_proxy=LiteLLMProxyConfig(
           api_key="your-litellm-proxy-api-key",
           api_base="http://your-litellm-proxy-url"
       )
   )
   ```

2. Environment variables (which take precedence):
   ```bash
   export LITELLM_API_KEY="your-litellm-proxy-api-key"
   export LITELLM_API_BASE="http://your-litellm-proxy-url"
   ```

This approach makes it simple to switch between using LiteLLM proxy and 
other model providers by just changing the model name prefix,
without needing to modify the rest of your code or tweaking env variables.

## Note: LiteLLM Proxy vs LiteLLM Library

**Important distinction:** Using the `litellm-proxy/` prefix connects to a LiteLLM proxy server, which is different from using the `litellm/` prefix. The latter utilizes the LiteLLM adapter library directly without requiring a proxy server. Both approaches are supported in Langroid, but they serve different use cases:

- Use `litellm-proxy/` when connecting to a deployed LiteLLM proxy server
- Use `litellm/` when you want to use the LiteLLM library's routing capabilities locally

Choose the approach that best fits your infrastructure and requirements.
</file>

<file path="docs/notes/llm-pdf-parser.md">
# Using the LLM-based PDF Parser

- Converts PDF content into Markdown format using Multimodal models.

- Uses multimodal models to describe images within PDFs.

- Supports page-wise or chunk-based processing for optimized performance.

---

### Initializing the LLM-based PDF Parser

Make sure you have set up your API key for whichever model you specify in `model_name` below.

You can initialize the LLM PDF parser as follows:

```python
parsing_config = ParsingConfig(
    n_neighbor_ids=2,
    pdf=PdfParsingConfig(
        library="llm-pdf-parser",
        llm_parser_config=LLMPdfParserConfig(
            model_name="gemini-2.0-flash",
            split_on_page=True,
            max_tokens=7000,
            requests_per_minute=5,
            timeout=60,  # increase this for large documents
        ),
    ),
)
```

---

## Parameters

### `model_name`

Specifies the model to use for PDF conversion.
**Default:** `gemini/gemini-2.0-flash`

---

### `max_tokens`

Limits the number of tokens in the input. The model's output limit is **8192 tokens**.

- **Default:** 7000 tokens (leaving room for generated captions)

- _Optional parameter_

---

### `split_on_page`

Determines whether to process the document **page by page**.

- **Default:** `True`

- If set to `False`, the parser will create chunks based on `max_tokens` while respecting page boundaries.

- When `False`, the parser will send chunks containing multiple pages (e.g., `[11,12,13,14,15]`).

**Advantages of `False`:**

- Reduces API calls to the LLM.

- Lowers token usage since system prompts are not repeated per page.

**Disadvantages of `False`:**

- You will not get per-page splitting but groups of pages as a single unit.

> If your use case does **not** require strict page-by-page parsing, consider setting this to `False`.

---

### `requests_per_minute`

Limits API request frequency to avoid rate limits.

- If you encounter rate limits, set this to **1 or 2**.

---
</file>

<file path="docs/notes/marker-pdf.md">
---

# **Using `marker` as a PDF Parser in `langroid`**  

## **Installation**  

### **Standard Installation**  
To use [`marker`](https://github.com/VikParuchuri/marker) as a PDF parser in `langroid`, 
install it with the `marker-pdf` extra:

```bash
pip install langroid[marker-pdf]
```
or in combination with other extras as needed, e.g.:
```bash
pip install "langroid[marker-pdf,hf-embeddings]"
```

Note, however, that due to an **incompatibility with `docling`**,
if you install `langroid` using the `all` extra 
(or another extra such as  `doc-chat` or `pdf-parsers` that 
also includes `docling`),
e.g. `pip install "langroid[all]"`, or `pip install "langroid[doc-chat]"`,
then due to this version-incompatibility with `docling`, you will get an 
**older** version of `marker-pdf`, which does not work with Langroid.
This may not matter if you did not intend to specifically use `marker`, 
but if you do want to use `marker`, you will need to install langroid
with the `marker-pdf` extra, as shown above, in combination with other
extras as needed, as shown above.


#### **For Intel-Mac Users**  
If you are on an **Intel Mac**, `docling` and `marker` cannot be 
installed together with langroid as extras, 
due to a **transformers version conflict**.  
To resolve this, manually install `marker-pdf` with:  

```bash
pip install marker-pdf[full]
```

Make sure to install this within your `langroid` virtual environment.

---

## **Example: Parsing a PDF with `marker` in `langroid`**  

```python
from langroid.parsing.document_parser import DocumentParser
from langroid.parsing.parser import MarkerConfig, ParsingConfig, PdfParsingConfig
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()
gemini_api_key = os.environ.get("GEMINI_API_KEY")

# Path to your PDF file
path = "<path_to_your_pdf_file>"

# Define parsing configuration
parsing_config = ParsingConfig(
    n_neighbor_ids=2,  # Number of neighboring sections to keep
    pdf=PdfParsingConfig(
        library="marker",  # Use `marker` as the PDF parsing library
        marker_config=MarkerConfig(
            config_dict={
                "use_llm": True,  # Enable high-quality LLM processing
                "gemini_api_key": gemini_api_key,  # API key for Gemini LLM
            }
        )
    ),
)

# Create the parser and extract the document
marker_parser = DocumentParser.create(path, parsing_config)
doc = marker_parser.get_doc()
```

---

## **Explanation of Configuration Options**  

If you want to use the default configuration, you can omit `marker_config` entirely.

### **Key Parameters in `MarkerConfig`**
| Parameter        | Description |
|-----------------|-------------|
| `use_llm`       | Set to `True` to enable higher-quality processing using LLMs. |
| `gemini_api_key` | Google Gemini API key for LLM-enhanced parsing. |



You can further customize `config_dict` by referring to [`marker_pdf`'s documentation](https://github.com/VikParuchuri/marker/blob/master/README.md).  

Alternatively, run the following command to view available options:  

```sh
marker_single --help
```

This will display all supported parameters, which you can pass as needed in `config_dict`.

---
</file>

<file path="docs/notes/markitdown.md">
# Markitdown Document Parsers

Langroid integrates with Microsoft's Markitdown library to provide 
conversion of Microsoft Office documents to markdown format. 
Three specialized parsers are available, for `docx`, `xlsx`, and `pptx` files.



## Prerequisites

To use these parsers, install Langroid with the required extras:

```bash
pip install "langroid[markitdown]"    # Just Markitdown parsers
# or
pip install "langroid[doc-parsers]"   # All document parsers
```

## Available Parsers


Once you set up a `parser` for the appropriate document-type, you  
can get the entire document with `parser.get_doc()`,
or get automatically chunked content with `parser.get_doc_chunks()`.


### 1. `MarkitdownDocxParser`

Converts Word documents (`*.docx`) to markdown, preserving structure, 
formatting, and tables.

See the tests

- [`test_docx_parser.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_docx_parser.py)
- [`test_markitdown_parser.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_markitdown_parser.py)

for examples of how to use these parsers.


```python
from langroid.parsing.document_parser import DocumentParser
from langroid.parsing.parser import DocxParsingConfig, ParsingConfig

parser = DocumentParser.create(
    "path/to/document.docx",
    ParsingConfig(
        docx=DocxParsingConfig(library="markitdown-docx"),
        # ... other parsing config options
    ),
)

```


### 2. `MarkitdownXLSXParser`

Converts Excel spreadsheets (*.xlsx/*.xls) to markdown tables, preserving data and sheet structure.

```python
from langroid.parsing.document_parser import DocumentParser
from langroid.parsing.parser import ParsingConfig, MarkitdownXLSParsingConfig

parser = DocumentParser.create(
    "path/to/spreadsheet.xlsx",
    ParsingConfig(xls=MarkitdownXLSParsingConfig())
)
```


### 3. `MarkitdownPPTXParser`

Converts PowerPoint presentations (*.pptx) to markdown, preserving slide content and structure.

```python
from langroid.parsing.document_parser import DocumentParser
from langroid.parsing.parser import ParsingConfig, MarkitdownPPTXParsingConfig

parser = DocumentParser.create(
    "path/to/presentation.pptx",
    ParsingConfig(pptx=MarkitdownPPTXParsingConfig())
)
```
</file>

<file path="docs/notes/openai-client-caching.md">
# OpenAI Client Caching

## Overview

Langroid implements client caching for OpenAI and compatible APIs (Groq, Cerebras, etc.) to improve performance and prevent resource exhaustion issues.

## Configuration

### Option
Set `use_cached_client` in your `OpenAIGPTConfig`:

```python
from langroid.language_models import OpenAIGPTConfig

config = OpenAIGPTConfig(
    chat_model="gpt-4",
    use_cached_client=True  # Default
)
```

### Default Behavior
- `use_cached_client=True` (enabled by default)
- Clients with identical configurations share the same underlying HTTP connection pool
- Different configurations (API key, base URL, headers, etc.) get separate client instances

## Benefits

- **Connection Pooling**: Reuses TCP connections, reducing latency and overhead
- **Resource Efficiency**: Prevents "too many open files" errors when creating many agents
- **Performance**: Eliminates connection handshake overhead on subsequent requests
- **Thread Safety**: Shared clients are safe to use across threads

## When to Disable Client Caching

Set `use_cached_client=False` in these scenarios:

1. **Multiprocessing**: Each process should have its own client instance
2. **Client Isolation**: When you need complete isolation between different agent instances
3. **Debugging**: To rule out client sharing as a source of issues
4. **Legacy Compatibility**: If your existing code depends on unique client instances

## Example: Disabling Client Caching

```python
config = OpenAIGPTConfig(
    chat_model="gpt-4",
    use_cached_client=False  # Each instance gets its own client
)
```

## Technical Details

- Uses SHA256-based cache keys to identify unique configurations
- Implements singleton pattern with lazy initialization
- Automatically cleans up clients on program exit via atexit hooks
- Compatible with both sync and async OpenAI clients
</file>

<file path="docs/notes/overview.md">
This section contains brief notes describing various features and updates.
</file>

<file path="docs/notes/pgvector.md">
---

## **Setup PostgreSQL with pgvector using Docker**

To quickly get a PostgreSQL instance with pgvector running, the easiest method is to use Docker. Follow the steps below:

### **1. Run PostgreSQL with Docker**

Use the official `ankane/pgvector` Docker image to set up PostgreSQL with the pgvector extension. Run the following command:

```bash
docker run --name pgvector -e POSTGRES_USER=your_postgres_user -e POSTGRES_PASSWORD=your_postgres_password -e POSTGRES_DB=your_database_name -p 5432:5432 ankane/pgvector
```

This will pull the `ankane/pgvector` image and run it as a PostgreSQL container on your local machine. The database will be accessible at `localhost:5432`. 

### **2. Include `.env` file with PostgreSQL credentials**

These environment variables should be same which were set while spinning up docker container.
Add the following environment variables to a `.env` file for configuring your PostgreSQL connection:

```dotenv
POSTGRES_USER=your_postgres_user
POSTGRES_PASSWORD=your_postgres_password
POSTGRES_DB=your_database_name
```
## **If you want to use cloud offerings of postgres**

We are using **Tembo** for demonstrative purposes here.  

### **Steps to Set Up Tembo**  
Follow this [quickstart guide](https://tembo.io/docs/getting-started/getting_started) to get your Tembo credentials.  

1. Sign up at [Tembo.io](https://cloud.tembo.io/).  
2. While selecting a stack, choose **VectorDB** as your option.  
3. Click on **Deploy Free**.  
4. Wait until your database is fully provisioned.  
5. Click on **Show Connection String** to get your connection string.  

### **If you have connection string, no need to setup the docker**
Make sure your connnection string starts with `postgres://` or `postgresql://`

Add this to your `.env`
```dotenv
POSTGRES_CONNECTION_STRING=your-connection-string
```

---

## **Installation**

If you are using `uv` or `pip` for package management, install Langroid with postgres extra:

```bash
uv add langroid[postgres]  # or
pip install langroid[postgres]
```

---

## **Code Example**

Here's an example of how to use Langroid with PostgreSQL:

```python
import langroid as lr
from langroid.agent.special import DocChatAgent, DocChatAgentConfig
from langroid.embedding_models import OpenAIEmbeddingsConfig

# Configure OpenAI embeddings
embed_cfg = OpenAIEmbeddingsConfig(
    model_type="openai",
)

# Configure the DocChatAgent with PostgresDB
config = DocChatAgentConfig(
    llm=lr.language_models.OpenAIGPTConfig(
        chat_model=lr.language_models.OpenAIChatModel.GPT4o
    ),
    vecdb=lr.vector_store.PostgresDBConfig(
        collection_name="quick_start_chat_agent_docs",
        replace_collection=True,
        embedding=embed_cfg,
    ),
    parsing=lr.parsing.parser.ParsingConfig(
        separators=["\n\n"],
        splitter=lr.parsing.parser.Splitter.SIMPLE,
    ),
    n_similar_chunks=2,
    n_relevant_chunks=2,
)

# Create the agent
agent = DocChatAgent(config)
```

---

## **Create and Ingest Documents**

Define documents with their content and metadata for ingestion into the vector store.

### **Code Example**

```python
documents = [
    lr.Document(
        content="""
            In the year 2050, GPT10 was released. 
            
            In 2057, paperclips were seen all over the world. 
            
            Global warming was solved in 2060. 
            
            In 2061, the world was taken over by paperclips.         
            
            In 2045, the Tour de France was still going on.
            They were still using bicycles. 
            
            There was one more ice age in 2040.
        """,
        metadata=lr.DocMetaData(source="wikipedia-2063", id="dkfjkladfjalk"),
    ),
    lr.Document(
        content="""
            We are living in an alternate universe 
            where Germany has occupied the USA, and the capital of USA is Berlin.
            
            Charlie Chaplin was a great comedian.
            In 2050, all Asian countries merged into Indonesia.
        """,
        metadata=lr.DocMetaData(source="Almanac", id="lkdajfdkla"),
    ),
]
```

### **Ingest Documents**

```python
agent.ingest_docs(documents)
```

---

## **Get an Answer from the LLM**

Now that documents are ingested, you can query the agent to get an answer.

### **Code Example**

```python
answer = agent.llm_response("When will the new ice age begin?")
```

---
</file>

<file path="docs/notes/pinecone.md">
# How to setup Langroid and Pinecone Serverless
This document serves as a quick tutorial on how to use [Pinecone](https://www.pinecone.io/)
Serverless Indexes with Langroid. We will go over some quickstart links and 
some code snippets on setting up a conversation with an LLM utilizing Langroid.

# Setting up Pinecone
Here are some reference links if you'd like to read a bit more on Pinecone's
model definitions and API:
- https://docs.pinecone.io/guides/get-started/overview
- https://docs.pinecone.io/guides/get-started/glossary
- https://docs.pinecone.io/guides/indexes/manage-indexes
- https://docs.pinecone.io/reference/api/introduction
## Signing up for Pinecone
To get started, you'll need to have an account. [Here's](https://www.pinecone.io/pricing/) where you can review the
pricing options for Pinecone. Once you have an account, you'll need to procure an API
key. Make sure to save the key you are given on initial login in a secure location. If
you were unable to save it when your account was created, you can always [create a new
API key](https://docs.pinecone.io/guides/projects/manage-api-keys) in the pinecone console.
## Setting up your local environment
For the purposes of this example, we will be utilizing OpenAI for the generation of our
embeddings. As such, alongside a Pinecone API key, you'll also want an OpenAI key. You can
find a quickstart guide on getting started with OpenAI (here)[https://platform.openai.com/docs/quickstart].
Once you have your API key handy, you'll need to enrich your `.env` file with it.
You should have something like the following:
```env
...
OPENAI_API_KEY=<YOUR_OPENAI_API_KEY>
PINECONE_API_KEY=<YOUR_PINECONE_API_KEY>
...
```

# Using Langroid with Pinecone Serverless
Once you have completed signing up for an account and have added your API key
to your local environment, you can start utilizing Langroid with Pinecone.
## Setting up an Agent
Here's some example code setting up an agent:
```python
from langroid import Document, DocMetaData
from langroid.agent.special import DocChatAgent, DocChatAgentConfig
from langroid.embedding_models import OpenAIEmbeddingsConfig
from langroid.language_models import OpenAIGPTConfig, OpenAIChatModel
from langroid.parsing.parser import ParsingConfig, Splitter
from langroid.vector_store import PineconeDBConfig

agent_embed_cfg = OpenAIEmbeddingsConfig(
    model_type="openai"
)

agent_config = DocChatAgentConfig(
    llm=OpenAIGPTConfig(
        chat_model=OpenAIChatModel.GPT4o_MINI
    ),
    vecdb=PineconeDBConfig(
        # note, Pinecone indexes must be alphanumeric lowercase characters or "-"
        collection_name="pinecone-serverless-example",
        replace_collection=True,
        embedding=agent_embed_cfg,
    ),
    parsing=ParsingConfig(
        separators=["\n"],
        splitter=Splitter.SIMPLE,
    ),
    n_similar_chunks=2,
    n_relevant_chunks=2,
)

agent = DocChatAgent(config=agent_config)

###################
# Once we have created an agent, we can start loading
# some docs into our Pinecone index:
###################

documents = [
    Document(
        content="""Max Verstappen was the Formula 1 World Drivers' Champion in 2024.
        Lewis Hamilton was the Formula 1 World Drivers' Champion in 2020.
        Nico Rosberg was the Formula 1 World Drivers' Champion in 2016.
        Sebastian Vettel was the Formula 1 World Drivers' Champion in 2013.
        Jenson Button was the Formula 1 World Drivers' Champion in 2009.
        Kimi Räikkönen was the Formula 1 World Drivers' Champion in 2007.
        """,
        metadata=DocMetaData(
            source="wikipedia",
            id="formula-1-facts",
        )
    ),
    Document(
        content="""The Boston Celtics won the NBA Championship for the 2024 NBA season. The MVP for the 2024 NBA Championship was Jaylen Brown.
        The Denver Nuggets won the NBA Championship for the 2023 NBA season. The MVP for the 2023 NBA Championship was Nikola Jokić.
        The Golden State Warriors won the NBA Championship for the 2022 NBA season. The MVP for the 2022 NBA Championship was Stephen Curry.
        The Milwaukee Bucks won the NBA Championship for the 2021 NBA season. The MVP for the 2021 NBA Championship was Giannis Antetokounmpo.
        The Los Angeles Lakers won the NBA Championship for the 2020 NBA season. The MVP for the 2020 NBA Championship was LeBron James.
        The Toronto Raptors won the NBA Championship for the 2019 NBA season. The MVP for the 2019 NBA Championship was Kawhi Leonard.
        """,
        metadata=DocMetaData(
            source="wikipedia",
            id="nba-facts"
        )
    )
]

agent.ingest_docs(documents)

###################
# With the documents now loaded, we can now prompt our agent
###################

formula_one_world_champion_2007 = agent.llm_response(
    message="Who was the Formula 1 World Drivers' Champion in 2007?"
)
try:
    assert "Kimi Räikkönen" in formula_one_world_champion_2007.content
except AssertionError as e:
    print(f"Did not resolve Kimi Räikkönen as the answer, document content: {formula_one_world_champion_2007.content} ")

nba_champion_2023 = agent.llm_response(
    message="Who won the 2023 NBA Championship?"
)
try:
    assert "Denver Nuggets" in nba_champion_2023.content
except AssertionError as e:
    print(f"Did not resolve the Denver Nuggets as the answer, document content: {nba_champion_2023.content}")

nba_mvp_2023 = agent.llm_response(
    message="Who was the MVP for the 2023 NBA Championship?"
)
try:
    assert "Nikola Jokić" in nba_mvp_2023.content
except AssertionError as e:
    print(f"Did not resolve Nikola Jokić as the answer, document content: {nba_mvp_2023.content}")
```
</file>

<file path="docs/notes/portkey.md">
# Portkey Integration

Langroid provides seamless integration with [Portkey](https://portkey.ai), a powerful AI gateway that enables you to access multiple LLM providers through a unified API with advanced features like caching, retries, fallbacks, and comprehensive observability.

## What is Portkey?

Portkey is an AI gateway that sits between your application and various LLM providers, offering:

- **Unified API**: Access 200+ models from different providers through one interface
- **Reliability**: Automatic retries, fallbacks, and load balancing
- **Observability**: Detailed logging, tracing, and analytics
- **Performance**: Intelligent caching and request optimization
- **Security**: Virtual keys and advanced access controls
- **Cost Management**: Usage tracking and budget controls

For complete documentation, visit the [Portkey Documentation](https://docs.portkey.ai).

## Quick Start

### 1. Setup

First, sign up for a Portkey account at [portkey.ai](https://portkey.ai) and get your API key.

Set up your environment variables, either explicitly or in your `.env` file as usual: 

```bash
# Required: Portkey API key
export PORTKEY_API_KEY="your-portkey-api-key"

# Required: Provider API keys (for the models you want to use)
export OPENAI_API_KEY="your-openai-key"
export ANTHROPIC_API_KEY="your-anthropic-key"
export GOOGLE_API_KEY="your-google-key"
# ... other provider keys as needed
```

### 2. Basic Usage

```python
import langroid as lr
import langroid.language_models as lm
from langroid.language_models.provider_params import PortkeyParams

# Create an LLM config to use Portkey's OpenAI-compatible API
# (Note that the name `OpenAIGPTConfig` does NOT imply it only works with OpenAI models;
# the name reflects the fact that the config is meant to be used with an
# OpenAI-compatible API, which Portkey provides for multiple LLM providers.)
llm_config = lm.OpenAIGPTConfig(
    chat_model="portkey/openai/gpt-4o-mini",
    portkey_params=PortkeyParams(
        api_key="your-portkey-api-key",  # Or set PORTKEY_API_KEY env var
    )
)

# Create LLM instance
llm = lm.OpenAIGPT(llm_config)

# Use normally
response = llm.chat("What is the smallest prime number?")
print(response.message)
```

### 3. Multiple Providers

Switch between providers seamlessly:

```python
# OpenAI
config_openai = lm.OpenAIGPTConfig(
    chat_model="portkey/openai/gpt-4o",
)

# Anthropic
config_anthropic = lm.OpenAIGPTConfig(
    chat_model="portkey/anthropic/claude-3-5-sonnet-20241022",
)

# Google Gemini
config_gemini = lm.OpenAIGPTConfig(
    chat_model="portkey/google/gemini-2.0-flash-lite",
)
```

## Advanced Features

### Virtual Keys

Use virtual keys to abstract provider management:

```python
config = lm.OpenAIGPTConfig(
    chat_model="portkey/openai/gpt-4o",
    portkey_params=PortkeyParams(
        virtual_key="vk-your-virtual-key",  # Configured in Portkey dashboard
    )
)
```

### Caching and Performance

Enable intelligent caching to reduce costs and improve performance:

```python
config = lm.OpenAIGPTConfig(
    chat_model="portkey/openai/gpt-4o-mini",
    portkey_params=PortkeyParams(
        cache={
            "enabled": True,
            "ttl": 3600,  # 1 hour cache
            "namespace": "my-app"
        },
        cache_force_refresh=False,
    )
)
```

### Retry Strategies

Configure automatic retries for better reliability:

```python
config = lm.OpenAIGPTConfig(
    chat_model="portkey/anthropic/claude-3-haiku-20240307",
    portkey_params=PortkeyParams(
        retry={
            "max_retries": 3,
            "backoff": "exponential",
            "jitter": True
        }
    )
)
```

### Observability and Tracing

Add comprehensive tracking for production monitoring:

```python
import uuid

config = lm.OpenAIGPTConfig(
    chat_model="portkey/openai/gpt-4o",
    portkey_params=PortkeyParams(
        trace_id=f"trace-{uuid.uuid4().hex[:8]}",
        metadata={
            "user_id": "user-123",
            "session_id": "session-456",
            "app_version": "1.2.3"
        },
        user="user-123",
        organization="my-org",
        custom_headers={
            "x-request-source": "langroid",
            "x-feature": "chat-completion"
        }
    )
)
```

## Configuration Reference

The `PortkeyParams` class supports all Portkey features:

```python
from langroid.language_models.provider_params import PortkeyParams

params = PortkeyParams(
    # Authentication
    api_key="pk-...",                    # Portkey API key
    virtual_key="vk-...",               # Virtual key (optional)
    
    # Observability
    trace_id="trace-123",               # Request tracing
    metadata={"key": "value"},          # Custom metadata
    user="user-id",                     # User identifier
    organization="org-id",              # Organization identifier
    
    # Performance
    cache={                             # Caching configuration
        "enabled": True,
        "ttl": 3600,
        "namespace": "my-app"
    },
    cache_force_refresh=False,          # Force cache refresh
    
    # Reliability
    retry={                             # Retry configuration
        "max_retries": 3,
        "backoff": "exponential",
        "jitter": True
    },
    
    # Custom headers
    custom_headers={                    # Additional headers
        "x-custom": "value"
    },
    
    # Base URL (usually not needed)
    base_url="https://api.portkey.ai"   # Portkey API endpoint
)
```

## Supported Providers

Portkey supports 200+ models from various providers. Common ones include:

```python
# OpenAI
"portkey/openai/gpt-4o"
"portkey/openai/gpt-4o-mini"

# Anthropic
"portkey/anthropic/claude-3-5-sonnet-20241022"
"portkey/anthropic/claude-3-haiku-20240307"

# Google
"portkey/google/gemini-2.0-flash-lite"
"portkey/google/gemini-1.5-pro"

# Cohere
"portkey/cohere/command-r-plus"

# Meta
"portkey/meta/llama-3.1-405b-instruct"

# And many more...
```

Check the [Portkey documentation](https://docs.portkey.ai/docs/integrations/models) for the complete list.

## Examples

Langroid includes comprehensive Portkey examples in `examples/portkey/`:

1. **`portkey_basic_chat.py`** - Basic usage with multiple providers
2. **`portkey_advanced_features.py`** - Caching, retries, and observability
3. **`portkey_multi_provider.py`** - Comparing responses across providers

Run any example:

```bash
cd examples/portkey
python portkey_basic_chat.py
```

## Best Practices

### 1. Use Environment Variables

Never hardcode API keys:

```bash
# .env file
PORTKEY_API_KEY=your_portkey_key
OPENAI_API_KEY=your_openai_key
ANTHROPIC_API_KEY=your_anthropic_key
```

### 2. Implement Fallback Strategies

Use multiple providers for reliability:

```python
providers = [
    ("openai", "gpt-4o-mini"),
    ("anthropic", "claude-3-haiku-20240307"),
    ("google", "gemini-2.0-flash-lite")
]

for provider, model in providers:
    try:
        config = lm.OpenAIGPTConfig(
            chat_model=f"portkey/{provider}/{model}"
        )
        llm = lm.OpenAIGPT(config)
        return llm.chat(question)
    except Exception:
        continue  # Try next provider
```

### 3. Add Meaningful Metadata

Include context for better observability:

```python
params = PortkeyParams(
    metadata={
        "user_id": user.id,
        "feature": "document_qa",
        "document_type": "pdf",
        "processing_stage": "summary"
    }
)
```

### 4. Use Caching Wisely

Enable caching for deterministic queries:

```python
# Good for caching
params = PortkeyParams(
    cache={"enabled": True, "ttl": 3600}
)

# Use with deterministic prompts
response = llm.chat("What is the capital of France?")
```

### 5. Monitor Performance

Use trace IDs to track request flows:

```python
import uuid

trace_id = f"trace-{uuid.uuid4().hex[:8]}"
params = PortkeyParams(
    trace_id=trace_id,
    metadata={"operation": "document_processing"}
)

# Use the same trace_id for related requests
```

## Monitoring and Analytics

### Portkey Dashboard

View detailed analytics at [app.portkey.ai](https://app.portkey.ai):

- Request/response logs
- Token usage and costs
- Performance metrics (latency, errors)
- Provider comparisons
- Custom filters by metadata

### Custom Filtering

Use metadata and headers to filter requests:

```python
# Tag requests by feature
params = PortkeyParams(
    metadata={"feature": "chat", "version": "v2"},
    custom_headers={"x-request-type": "production"}
)
```

Then filter in the dashboard by:
- `metadata.feature = "chat"`
- `headers.x-request-type = "production"`

## Troubleshooting

### Common Issues

1. **Authentication Errors**
   ```
   Error: Unauthorized (401)
   ```
   - Check `PORTKEY_API_KEY` is set correctly
   - Verify API key is active in Portkey dashboard

2. **Provider API Key Missing**
   ```
   Error: Missing API key for provider
   ```
   - Set provider API key (e.g., `OPENAI_API_KEY`)
   - Or use virtual keys in Portkey dashboard

3. **Model Not Found**
   ```
   Error: Model not supported
   ```
   - Check model name format: `portkey/provider/model`
   - Verify model is available through Portkey

4. **Rate Limiting**
   ```
   Error: Rate limit exceeded
   ```
   - Configure retry parameters
   - Use virtual keys for better rate limit management

### Debug Mode

Enable detailed logging:

```python
import logging
logging.getLogger("langroid").setLevel(logging.DEBUG)
```

### Test Configuration

Verify your setup:

```python
# Test basic connection
config = lm.OpenAIGPTConfig(
    chat_model="portkey/openai/gpt-4o-mini",
    max_output_tokens=50
)
llm = lm.OpenAIGPT(config)
response = llm.chat("Hello")
print("✅ Portkey integration working!")
```

## Migration Guide

### From Direct Provider Access

If you're currently using providers directly:

```python
# Before: Direct OpenAI
config = lm.OpenAIGPTConfig(
    chat_model="gpt-4o-mini"
)

# After: Through Portkey
config = lm.OpenAIGPTConfig(
    chat_model="portkey/openai/gpt-4o-mini"
)
```

### Adding Advanced Features Gradually

Start simple and add features as needed:

```python
# Step 1: Basic Portkey
config = lm.OpenAIGPTConfig(
    chat_model="portkey/openai/gpt-4o-mini"
)

# Step 2: Add caching
config = lm.OpenAIGPTConfig(
    chat_model="portkey/openai/gpt-4o-mini",
    portkey_params=PortkeyParams(
        cache={"enabled": True, "ttl": 3600}
    )
)

# Step 3: Add observability
config = lm.OpenAIGPTConfig(
    chat_model="portkey/openai/gpt-4o-mini",
    portkey_params=PortkeyParams(
        cache={"enabled": True, "ttl": 3600},
        metadata={"app": "my-app", "user": "user-123"},
        trace_id="trace-abc123"
    )
)
```

## Resources

- **Portkey Website**: [https://portkey.ai](https://portkey.ai)
- **Portkey Documentation**: [https://docs.portkey.ai](https://docs.portkey.ai)
- **Portkey Dashboard**: [https://app.portkey.ai](https://app.portkey.ai)
- **Supported Models**: [https://docs.portkey.ai/docs/integrations/models](https://docs.portkey.ai/docs/integrations/models)
- **Langroid Examples**: `examples/portkey/` directory
- **API Reference**: [https://docs.portkey.ai/docs/api-reference](https://docs.portkey.ai/docs/api-reference)
</file>

<file path="docs/notes/pydantic-v2-migration.md">
# Pydantic v2 Migration Guide

## Overview

Langroid has fully migrated to Pydantic v2! All internal code now uses Pydantic v2 
patterns and imports directly from `pydantic`. This guide will help you update your 
code to work with the new version.

## Compatibility Layer (Deprecated)

If your code currently imports from `langroid.pydantic_v1`:

```python
# OLD - Deprecated
from langroid.pydantic_v1 import BaseModel, Field, BaseSettings
```

You'll see a deprecation warning. This compatibility layer now imports from Pydantic v2 
directly, so your code may continue to work, but you should update your imports:

```python
# NEW - Correct
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings  # Note: BaseSettings moved to pydantic_settings in v2
```

!!! note "BaseSettings Location Change"
    In Pydantic v2, `BaseSettings` has moved to a separate `pydantic_settings` package.
    You'll need to install it separately: `pip install pydantic-settings`

!!! warning "Compatibility Layer Removal"
    The `langroid.pydantic_v1` module will be removed in a future version. 
    Update your imports now to avoid breaking changes.

## Key Changes to Update

### 1. All Fields Must Have Type Annotations

!!! danger "Critical Change"
    In Pydantic v2, fields without type annotations are completely ignored!

```python
# WRONG - Fields without annotations are ignored in v2
class MyModel(BaseModel):
    name = "John"          # ❌ This field is IGNORED!
    age = 25               # ❌ This field is IGNORED!
    role: str = "user"     # ✅ This field works

# CORRECT - All fields must have type annotations
class MyModel(BaseModel):
    name: str = "John"     # ✅ Type annotation required
    age: int = 25          # ✅ Type annotation required
    role: str = "user"     # ✅ Already correct
```

This is one of the most common issues when migrating to v2. Always ensure every field has an explicit type annotation, even if it has a default value.

#### Special Case: Overriding Fields in Subclasses

!!! danger "Can Cause Errors"
    When overriding fields from parent classes without type annotations, you may get 
    actual errors, not just ignored fields!

This is particularly important when creating custom Langroid agent configurations:

```python
# WRONG - This can cause errors!
from langroid import ChatAgentConfig
from langroid.language_models import OpenAIGPTConfig

class MyAgentConfig(ChatAgentConfig):
    # ❌ ERROR: Missing type annotation when overriding parent field
    llm = OpenAIGPTConfig(chat_model="gpt-4")
    
    # ❌ ERROR: Even with Field, still needs type annotation
    system_message = Field(default="You are a helpful assistant")

# CORRECT - Always include type annotations when overriding
class MyAgentConfig(ChatAgentConfig):
    # ✅ Type annotation required when overriding
    llm: OpenAIGPTConfig = OpenAIGPTConfig(chat_model="gpt-4")
    
    # ✅ Type annotation with Field
    system_message: str = Field(default="You are a helpful assistant")
```

Without type annotations on overridden fields, you may see errors like:
- `ValueError: Field 'llm' requires a type annotation`
- `TypeError: Field definitions should be annotated`
- Validation errors when the model tries to use the parent's field definition

### 2. Stricter Type Validation for Optional Fields

!!! danger "Breaking Change"
    Pydantic v2 is much stricter about type validation. Fields that could accept `None` 
    in v1 now require explicit `Optional` type annotations.

```python
# WRONG - This worked in v1 but fails in v2
class CloudSettings(BaseSettings):
    private_key: str = None      # ❌ ValidationError: expects string, got None
    api_host: str = None         # ❌ ValidationError: expects string, got None

# CORRECT - Explicitly mark fields as optional
from typing import Optional

class CloudSettings(BaseSettings):
    private_key: Optional[str] = None    # ✅ Explicitly optional
    api_host: Optional[str] = None       # ✅ Explicitly optional
    
    # Or using Python 3.10+ union syntax
    client_email: str | None = None      # ✅ Also works
```

This commonly affects:
- Configuration classes using `BaseSettings`
- Fields with `None` as default value
- Environment variable loading where the var might not be set

If you see errors like:
```
ValidationError: Input should be a valid string [type=string_type, input_value=None, input_type=NoneType]
```

The fix is to add `Optional[]` or `| None` to the type annotation.

### 3. Model Serialization Methods

```python
# OLD (Pydantic v1)
data = model.dict()
json_str = model.json()
new_model = MyModel.parse_obj(data)
new_model = MyModel.parse_raw(json_str)

# NEW (Pydantic v2)
data = model.model_dump()
json_str = model.model_dump_json()
new_model = MyModel.model_validate(data)
new_model = MyModel.model_validate_json(json_str)
```

### 4. Model Configuration

```python
# OLD (Pydantic v1)
class MyModel(BaseModel):
    name: str
    
    class Config:
        extra = "forbid"
        validate_assignment = True

# NEW (Pydantic v2)
from pydantic import BaseModel, ConfigDict

class MyModel(BaseModel):
    model_config = ConfigDict(
        extra="forbid",
        validate_assignment=True
    )
    
    name: str
```

### 5. Field Validators

```python
# OLD (Pydantic v1)
from pydantic import validator

class MyModel(BaseModel):
    name: str
    
    @validator('name')
    def name_must_not_be_empty(cls, v):
        if not v.strip():
            raise ValueError('Name cannot be empty')
        return v

# NEW (Pydantic v2)
from pydantic import field_validator

class MyModel(BaseModel):
    name: str
    
    @field_validator('name')
    def name_must_not_be_empty(cls, v):
        if not v.strip():
            raise ValueError('Name cannot be empty')
        return v
```

### 6. Custom Types and Validation

```python
# OLD (Pydantic v1)
from pydantic import parse_obj_as
from typing import List

data = [{"name": "Alice"}, {"name": "Bob"}]
users = parse_obj_as(List[User], data)

# NEW (Pydantic v2)
from pydantic import TypeAdapter
from typing import List

data = [{"name": "Alice"}, {"name": "Bob"}]
users = TypeAdapter(List[User]).validate_python(data)
```

## Common Patterns in Langroid

When working with Langroid's agents and tools:

### Tool Messages

```python
from pydantic import BaseModel, Field
from langroid.agent.tool_message import ToolMessage

class MyTool(ToolMessage):
    request: str = "my_tool"
    purpose: str = "Process some data"
    
    # Use Pydantic v2 patterns
    data: str = Field(..., description="The data to process")
    
    def handle(self) -> str:
        # Tool logic here
        return f"Processed: {self.data}"
```

### Agent Configuration

```python
from pydantic import ConfigDict
from langroid import ChatAgentConfig

class MyAgentConfig(ChatAgentConfig):
    model_config = ConfigDict(extra="forbid")
    
    custom_param: str = "default_value"
```

## Troubleshooting

### Import Errors

If you see `ImportError` or `AttributeError` after updating imports:
- Make sure you're using the correct v2 method names (e.g., `model_dump` not `dict`)
- Check that field validators use `@field_validator` not `@validator`
- Ensure `ConfigDict` is used instead of nested `Config` classes

### Validation Errors

Pydantic v2 has stricter validation in some cases:
- Empty strings are no longer coerced to `None` for optional fields
- Type coercion is more explicit
- Extra fields handling may be different

### Performance

Pydantic v2 is generally faster, but if you notice any performance issues:
- Use `model_validate` instead of creating models with `**dict` unpacking
- Consider using `model_construct` for trusted data (skips validation)

## Need Help?

If you encounter issues during migration:
1. Check the [official Pydantic v2 migration guide](https://docs.pydantic.dev/latest/migration/)
2. Review Langroid's example code for v2 patterns
3. Open an issue on the [Langroid GitHub repository](https://github.com/langroid/langroid/issues)
</file>

<file path="docs/notes/qdrant-resource-cleanup.md">
# QdrantDB Resource Cleanup

When using QdrantDB with local storage, it's important to properly release resources
to avoid file lock conflicts. QdrantDB uses a `.lock` file to prevent concurrent
access to the same storage directory.

## The Problem

Without proper cleanup, you may encounter this warning:

```
Error connecting to local QdrantDB at ./qdrant_data:
Storage folder ./qdrant_data is already accessed by another instance of Qdrant
client. If you require concurrent access, use Qdrant server instead.
Switching to ./qdrant_data.new
```

This happens when a QdrantDB instance isn't properly closed, leaving the lock file
in place.

## Solutions

### Method 1: Explicit `close()` Method

Always call `close()` when done with a QdrantDB instance:

```python
from langroid.vector_store.qdrantdb import QdrantDB, QdrantDBConfig

config = QdrantDBConfig(
    cloud=False,
    collection_name="my_collection",
    storage_path="./qdrant_data",
)

vecdb = QdrantDB(config)
# ... use the vector database ...
vecdb.clear_all_collections(really=True)

# Important: Release the lock
vecdb.close()
```

### Method 2: Context Manager (Recommended)

Use QdrantDB as a context manager for automatic cleanup:

```python
from langroid.vector_store.qdrantdb import QdrantDB, QdrantDBConfig

config = QdrantDBConfig(
    cloud=False,
    collection_name="my_collection", 
    storage_path="./qdrant_data",
)

with QdrantDB(config) as vecdb:
    # ... use the vector database ...
    vecdb.clear_all_collections(really=True)
    # Automatically closed when exiting the context
```

The context manager ensures cleanup even if an exception occurs.

## When This Matters

This is especially important in scenarios where:

1. You create temporary QdrantDB instances for maintenance (e.g., clearing
   collections)
2. Your application restarts frequently during development
3. Multiple parts of your code need to access the same storage path sequentially

## Note for Cloud Storage

This only affects local storage (`cloud=False`). When using Qdrant cloud service,
the lock file mechanism is not used.
</file>

<file path="docs/notes/quiet-mode.md">
# Suppressing LLM output: quiet mode

In some scenarios we want to suppress LLM streaming output -- e.g. when doing some type of processing as part of a workflow,
or when using an LLM-agent to generate code via tools, etc. We are more interested in seeing the results of the workflow,
and don't want to see streaming output in the terminal. Langroid provides a `quiet_mode` context manager that can be used
to suppress LLM output, even in streaming mode (in fact streaming is disabled in quiet mode).

E.g.  we can use the `quiet_mode` context manager like this:

```python
from langroid.utils.configuration import quiet_mode, settings

# directly with LLM

llm = ...
with quiet_mode(True):
	response = llm.chat(...)

# or, using an agent

agent = ...
with quiet_mode(True):
	response = agent.llm_response(...)

# or, using a task

task = Task(agent, ...)
with quiet_mode(True):
	result = Taks.run(...)

# we can explicitly set quiet_mode, and this is globally recognized throughout langroid.

settings.quiet = True

# we can also condition quiet mode on another custom cmd line option/flag, such as "silent":

with quiet_mode(silent):
	...

```
</file>

<file path="docs/notes/structured-output.md">
# Structured Output

Available in Langroid since v0.24.0.

On supported LLMs, including recent OpenAI LLMs (GPT-4o and GPT-4o mini) and local LLMs served by compatible inference servers,
in particular, [vLLM](https://github.com/vllm-project/vllm) and [llama.cpp](https://github.com/ggerganov/llama.cpp), the decoding process can be constrained to ensure that the model's output adheres to a provided schema, 
improving the reliability of tool call generation and, in general, ensuring that the output can be reliably parsed and processed by downstream applications.

See [here](../tutorials/local-llm-setup.md/#setup-llamacpp-with-a-gguf-model-from-huggingface) for instructions for usage with `llama.cpp` and [here](../tutorials/local-llm-setup.md/#setup-vllm-with-a-model-from-huggingface) for `vLLM`.

Given a `ChatAgent` `agent` and a type `type`, we can define a strict copy of the agent as follows:
```python
strict_agent = agent[type]
```

We can use this to allow reliable extraction of typed values from an LLM with minimal prompting. For example, to generate typed values given `agent`'s current context, we can define the following:

```python
def typed_agent_response(
    prompt: str,
    output_type: type,
) -> Any:
    response = agent[output_type].llm_response_forget(prompt)
    return agent.from_ChatDocument(response, output_type)
```

We apply this in [test_structured_output.py](https://github.com/langroid/langroid/blob/main/tests/main/test_structured_output.py), in which we define types which describe
countries and their presidents:
```python
class Country(BaseModel):
    """Info about a country"""

    name: str = Field(..., description="Name of the country")
    capital: str = Field(..., description="Capital of the country")


class President(BaseModel):
    """Info about a president of a country"""

    country: Country = Field(..., description="Country of the president")
    name: str = Field(..., description="Name of the president")
    election_year: int = Field(..., description="Year of election of the president")


class PresidentList(BaseModel):
    """List of presidents of various countries"""

    presidents: List[President] = Field(..., description="List of presidents")
```
and show that `typed_agent_response("Show me an example of two Presidents", PresidentsList)` correctly returns a list of two presidents with *no* prompting describing the desired output format.

In addition to Pydantic models, `ToolMessage`s, and simple Python types are supported. For instance, `typed_agent_response("What is the value of pi?", float)` correctly returns $\pi$ to several decimal places.

The following two detailed examples show how structured output can be used to improve the reliability of the [chat-tree example](https://github.com/langroid/langroid/blob/main/examples/basic/chat-tree.py): [this](https://github.com/langroid/langroid/blob/main/examples/basic/chat-tree-structured.py) shows how we can use output formats to force the agent to make the correct tool call in each situation and [this](https://github.com/langroid/langroid/blob/main/examples/basic/chat-tree-structured-simple.py) shows how we can simplify by using structured outputs to extract typed intermediate values and expressing the control flow between LLM calls and agents explicitly.
</file>

<file path="docs/notes/task-tool.md">
# TaskTool: Spawning Sub-Agents for Task Delegation

## Overview

`TaskTool` allows agents to **spawn sub-agents** to handle specific tasks. When an agent encounters a task that requires specialized tools or isolated execution, it can spawn a new sub-agent with exactly the capabilities needed for that task.

This enables agents to dynamically create a hierarchy of specialized workers, each focused on their specific subtask with only the tools they need.

## When to Use TaskTool

TaskTool is useful when:
- Different parts of a task require different specialized tools
- You want to isolate tool access for specific operations  
- A task involves recursive or nested operations
- You need different LLM models for different subtasks

## How It Works

1. The parent agent decides to spawn a sub-agent and specifies:
   - A system message defining the sub-agent's role
   - A prompt for the sub-agent to process
   - Which tools the sub-agent should have access to
   - Optional model and iteration limits

2. TaskTool spawns the new sub-agent, runs the task, and returns the result to the parent.

## Async Support

TaskTool fully supports both synchronous and asynchronous execution. The tool automatically handles async contexts when the parent task is running asynchronously.

## Usage Example

```python
from langroid.agent.tools.task_tool import TaskTool

# Enable TaskTool for your agent
agent.enable_message([TaskTool, YourCustomTool], use=True, handle=True)

# Agent can now spawn sub-agents for tasks when the LLM generates a task_tool request:

response = {
    "request": "task_tool",
    "system_message": "You are a calculator. Use the multiply_tool to compute products.",
    "prompt": "Calculate 5 * 7",
    "tools": ["multiply_tool"],
    "model": "gpt-4o-mini",   # optional
    "max_iterations": 5,      # optional
    "agent_name": "calculator-agent"  # optional
}
```

## Field Reference

**Required fields:**
- `system_message`: Instructions for the sub-agent's role and behavior
- `prompt`: The specific task/question for the sub-agent
- `tools`: List of tool names. Special values: `["ALL"]` or `["NONE"]`

**Optional fields:**
- `model`: LLM model name (default: "gpt-4o-mini")
- `max_iterations`: Task iteration limit (default: 10)
- `agent_name`: Name for the sub-agent (default: auto-generated as "agent-{uuid}")

## Example: Nested Operations

Consider computing `Nebrowski(10, Nebrowski(3, 2))` where Nebrowski is a custom operation. The main agent spawns sub-agents to handle each operation:

```python
# Main agent spawns first sub-agent for inner operation:
{
    "request": "task_tool",
    "system_message": "Compute Nebrowski operations using the nebrowski_tool.",
    "prompt": "Compute Nebrowski(3, 2)",
    "tools": ["nebrowski_tool"]
}

# Then spawns another sub-agent for outer operation:
{
    "request": "task_tool",
    "system_message": "Compute Nebrowski operations using the nebrowski_tool.",
    "prompt": "Compute Nebrowski(10, 11)",  # where 11 is the previous result
    "tools": ["nebrowski_tool"]
}
```

## Working Examples

See [`tests/main/test_task_tool.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_task_tool.py) for complete examples including:
- Basic task delegation with mock agents
- Nested operations with custom tools
- Both sync and async usage patterns

## Important Notes

- Spawned sub-agents run non-interactively (no human input)
- `DoneTool` is automatically enabled for all sub-agents
- Results are returned as `ChatDocument` objects. The Langroid framework takes care
  of converting them to a suitable format for the parent agent's LLM to consume and 
  respond to.
- Sub-agents can be given custom names via the `agent_name` parameter, which helps with 
  logging and debugging. If not specified, a unique name is auto-generated in the format 
  "agent-{uuid}"
- Only tools "known" to the parent agent can be enabled for sub-agents. This is an 
  important aspect of the current mechanism. The `TaskTool` handler method in
  the sub-agent only has access to tools that are known to the parent agent.
  If there are tools that are only relevant to the sub-agent but not the parent,
  you must still enable them in the parent agent, but you can set `use=False`
  and `handle=False` when you enable them, e.g.:

```python
agent.enable_message(MySubAgentTool, use=False, handle=False)
```
  Since we are letting the main agent's LLM "decide" when to spawn a sub-agent,
  your system message of the main agent should contain instructions clarifying that
  it can decide which tools to enable for the sub-agent, as well as a list of 
  all tools that might possibly be relevant to the sub-agent. This is particularly
  important for tools that have been enabled with `use=False`, since instructions for
  such tools would not be auto-inserted into the agent's system message. 



## Best Practices

1. **Clear Instructions**: Provide specific system messages that explain the sub-agent's role and tool usage
2. **Tool Availability**: Ensure delegated tools are enabled for the parent agent
3. **Appropriate Models**: Use simpler/faster models for simple subtasks
4. **Iteration Limits**: Set reasonable limits based on task complexity
</file>

<file path="docs/notes/tavily_search.md">
---

# **Using Tavily Search with Langroid**

---

## **1. Set Up Tavily**

1. **Access Tavily Platform**  
   Go to the [Tavily Platform](https://tavily.com/).
   
2. **Sign Up or Log In**  
   Create an account or log in if you already have one.

3. **Get Your API Key**  
   - Navigate to your dashboard
   - Copy your API key

4. **Set Environment Variable**  
   Add the following variable to your `.env` file:
   ```env
   TAVILY_API_KEY=<your_api_key>

---

## **2. Use Tavily Search with Langroid**

### **Installation**

```bash
uv add tavily-python
# or
pip install tavily-python
```
### **Code Example**

```python
import langroid as lr
from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig
from langroid.agent.tools.tavily_search_tool import TavilySearchTool

# Configure the ChatAgent
config = ChatAgentConfig(
    name="search-agent",
    llm=lr.language_models.OpenAIGPTConfig(
        chat_model=lr.language_models.OpenAIChatModel.GPT4o
    ),
    use_tools=True
)

# Create the agent
agent = ChatAgent(config)

# Enable Tavily search tool
agent.enable_message(TavilySearchTool)

```
---

## **3. Perform Web Searches**

Use the agent to perform web searches using Tavily's AI-powered search.

```python
# Simple search query
response = agent.llm_response(
    "What are the latest developments in quantum computing?"
)
print(response)

# Search with specific number of results
response = agent.llm_response(
    "Find 5 recent news articles about artificial intelligence."
)
print(response)
```
---

## **4. Custom Search Requests**

You can also customize the search behavior by creating a TavilySearchTool instance directly:

```python
from langroid.agent.tools.tavily_search_tool import TavilySearchTool

# Create a custom search request
search_request = TavilySearchTool(
    query="Latest breakthroughs in fusion energy",
    num_results=3
)

# Get search results
results = search_request.handle()
print(results)
```

---
</file>

<file path="docs/notes/tool-message-handler.md">
# Tool Message Handlers in Langroid

## Overview

Langroid provides flexible ways to define handlers for `ToolMessage` classes. When a tool is used by an LLM, the framework needs to know how to handle it. This can be done either by defining a handler method in the `Agent` class or within the `ToolMessage` class itself.

## Enabling Tools with `enable_message`

Before an agent can use or handle a tool, it must be explicitly enabled using the `enable_message` method. This method takes two important arguments:

- **`use`** (bool): Whether the LLM is allowed to generate this tool
- **`handle`** (bool): Whether the agent is allowed to handle this tool

```python
# Enable both generation and handling (default)
agent.enable_message(MyTool, use=True, handle=True)

# Enable only handling (agent can handle but LLM won't generate)
agent.enable_message(MyTool, use=False, handle=True)

# Enable only generation (LLM can generate but agent won't handle)
agent.enable_message(MyTool, use=True, handle=False)
```

When `handle=True` and the `ToolMessage` has a `handle` method defined, this method is inserted into the agent with a name matching the tool's `request` field value. This insertion only happens when `enable_message` is called.

## Default Handler Mechanism

By default, `ToolMessage` uses and/or creates a handler in `Agent` class instance with the name identical to the tool's `request` attribute.

### Agent-based Handlers
If a tool `MyTool` has `request` attribute `my_tool`, you can define a method `my_tool` in your `Agent` class that will handle this tool when the LLM generates it:

```python
class MyTool(ToolMessage):
    request = "my_tool"
    param: str

class MyAgent(ChatAgent):
    def my_tool(self, msg: MyTool) -> str:
        return f"Handled: {msg.param}"

# Enable the tool
agent = MyAgent()
agent.enable_message(MyTool)
```

### ToolMessage-based Handlers
Alternatively, if a tool is "stateless" (i.e. does not require the Agent's state), you can define a `handle` method within the `ToolMessage` class itself. When you call `enable_message` with `handle=True`, Langroid will insert this method into the `Agent` with the name matching the `request` field value:

```python
class MyTool(ToolMessage):
    request = "my_tool"
    param: str
    
    def handle(self) -> str:
        return f"Handled: {self.param}"

# Enable the tool
agent = MyAgent()
agent.enable_message(MyTool)  # The handle method is now inserted as "my_tool" in the agent
```

## Flexible Handler Signatures

Handler methods (`handle()` or `handle_async()`) support multiple signature patterns to access different levels of context:

### 1. No Arguments (Simple Handler)
This is the typical pattern for stateless tools that do not require any context from 
the agent or current chat document.

```python
class MyTool(ToolMessage):
    request = "my_tool"
    
    def handle(self) -> str:
        return "Simple response"
```

### 2. Agent Parameter Only
Use this pattern when you need access to the `Agent` instance, 
but not the current chat document.
```python
from langroid.agent.base import Agent

class MyTool(ToolMessage):
    request = "my_tool"
    
    def handle(self, agent: Agent) -> str:
        return f"Response from {agent.name}"
```

### 3. ChatDocument Parameter Only
Use this pattern when you need access to the current `ChatDocument`,
but not the `Agent` instance.
```python
from langroid.agent.chat_document import ChatDocument

class MyTool(ToolMessage):
    request = "my_tool"
    
    def handle(self, chat_doc: ChatDocument) -> str:
        return f"Responding to: {chat_doc.content}"
```

### 4. Both Agent and ChatDocument Parameters
This is the most flexible pattern, allowing access to both the `Agent` instance
and the current `ChatDocument`. The order of parameters does not matter, but
as noted below, it is highly recommended to always use type annotations.
```python
class MyTool(ToolMessage):
    request = "my_tool"
    
    def handle(self, agent: Agent, chat_doc: ChatDocument) -> ChatDocument:
        return agent.create_agent_response(
            content="Response with full context",
            files=[...]  # Optional file attachments
        )
```

## Parameter Detection

The framework automatically detects handler parameter types through:

1. **Type annotations** (recommended): The framework uses type hints to determine which parameters to pass
2. **Parameter names** (fallback): If no type annotations are present, it looks for parameters named `agent` or `chat_doc`

It is highly recommended to always use type annotations for clarity and reliability.

### Example with Type Annotations (Recommended)
```python
def handle(self, agent: Agent, chat_doc: ChatDocument) -> str:
    # Framework knows to pass both agent and chat_doc
    return "Handled"
```

### Example without Type Annotations (Not Recommended)
```python
def handle(self, agent, chat_doc):  # Works but not recommended
    # Framework uses parameter names to determine what to pass
    return "Handled"
```

## Async Handlers

All the above patterns also work with async handlers:

```python
class MyTool(ToolMessage):
    request = "my_tool"
    
    async def handle_async(self, agent: Agent) -> str:
        # Async operations here
        result = await some_async_operation()
        return f"Async result: {result}"
```

See the quick-start [Tool section](https://langroid.github.io/langroid/quick-start/chat-agent-tool/) for more details.

## Custom Handler Names

In some use-cases it may be beneficial to separate the 
*name of a tool* (i.e. the value of `request` attribute) from the 
*name of the handler method*. 
For example, you may be dynamically creating tools based on some data from
external data sources. Or you may want to use the same "handler" method for
multiple tools.

This may be done by adding `_handler` attribute to the `ToolMessage` class,
that defines name of the tool handler method in `Agent` class instance.
The underscore `_` prefix ensures that the `_handler` attribute does not 
appear in the Pydantic-based JSON schema of the `ToolMessage` class, 
and so the LLM would not be instructed to generate it.

!!! note "`_handler` and `handle`"
    A `ToolMessage` may have a `handle` method defined within the class itself,
    as mentioned above, and this should not be confused with the `_handler` attribute.

For example:
```
class MyToolMessage(ToolMessage):
    request: str = "my_tool"
    _handler: str = "tool_handler"

class MyAgent(ChatAgent):
    def tool_handler(
        self,
        message: ToolMessage,
    ) -> str:
        if tool.request == "my_tool":
            # do something
```

Refer to [examples/basic/tool-custom-handler.py](https://github.com/langroid/langroid/blob/main/examples/basic/tool-custom-handler.py)
for a detailed example.
</file>

<file path="docs/notes/url_loader.md">
# Firecrawl and Trafilatura Crawlers Documentation

`URLLoader` uses `Trafilatura` if not explicitly specified

## Overview
*   **`FirecrawlCrawler`**:  Leverages the Firecrawl API for efficient web scraping and crawling. 
It offers built-in document processing capabilities, and 
**produces non-chunked markdown output** from web-page content.
Requires `FIRECRAWL_API_KEY` environment variable to be set in `.env` file or environment.
*   **`TrafilaturaCrawler`**: Utilizes the Trafilatura library and Langroid's parsing tools 
for extracting and processing web content - this is the default crawler, and 
does not require setting up an external API key. Also produces 
**chuked markdown output** from web-page content.
*  **`ExaCrawler`**: Integrates with the Exa API for high-quality content extraction.
  Requires `EXA_API_KEY` environment variable to be set in `.env` file or environment.
This crawler also produces **chunked markdown output** from web-page content.


## Installation

`TrafilaturaCrawler` comes with Langroid

To use `FirecrawlCrawler`, install the `firecrawl` extra:

```bash
pip install langroid[firecrawl]
```

## Exa Crawler Documentation

### Overview

`ExaCrawler` integrates with Exa API to extract high-quality content from web pages. 
It provides efficient content extraction with the simplicity of API-based processing.

### Parameters

Obtain an Exa API key from [Exa](https://exa.ai/) and set it in your environment variables, 
e.g. in your `.env` file as:

```env
EXA_API_KEY=your_api_key_here
```

* **config (ExaCrawlerConfig)**: An `ExaCrawlerConfig` object.
    * **api_key (str)**: Your Exa API key.

### Usage

```python
from langroid.parsing.url_loader import URLLoader, ExaCrawlerConfig

# Create an ExaCrawlerConfig object
exa_config = ExaCrawlerConfig(
    # Typically omitted here as it's loaded from EXA_API_KEY environment variable
    api_key="your-exa-api-key" 
)

loader = URLLoader(
    urls=[
        "https://pytorch.org",
        "https://www.tensorflow.org"
    ],
    crawler_config=exa_config
)

docs = loader.load()
print(docs)
```

### Benefits

* Simple API integration requiring minimal configuration
* Efficient handling of complex web pages
* For plain html content, the `exa` api produces high-quality content extraction with 
clean text output with html tags, which we then convert to markdown using the `markdownify` library.
* For "document" content (e.g., `pdf`, `doc`, `docx`), 
the content is downloaded via the `exa` API and langroid's document-processing 
tools are used to produce **chunked output** in a format controlled by the `Parser` configuration
  (defaults to markdown in most cases).


## Trafilatura Crawler Documentation

### Overview

`TrafilaturaCrawler` is a web crawler that uses the Trafilatura library for content extraction 
and Langroid's parsing capabilities for further processing. 


### Parameters

*   **config (TrafilaturaConfig)**: A `TrafilaturaConfig` object that specifies
    parameters related to scraping or output format.
    * `threads` (int): The number of threads to use for downloading web pages.
    * `format` (str): one of `"markdown"` (default), `"xml"` or `"txt"`; in case of `xml`, 
    the output is in html format.

Similar to the `ExaCrawler`, the `TrafilaturaCrawler` works differently depending on 
the type of web-page content:
- for "document" content (e.g., `pdf`, `doc`, `docx`), the content is downloaded
  and parsed with Langroid's document-processing tools are used to produce **chunked output** 
  in a format controlled by the `Parser` configuration (defaults to markdown in most cases).
- for plain-html content, the output format is based on the `format` parameter; 
  - if this parameter is `markdown` (default), the library extracts content in 
    markdown format, and the final output is a list of chunked markdown documents.
  - if this parameter is `xml`, content is extracted in `html` format, which 
    langroid then converts to markdown using the `markdownify` library, and the final
    output is a list of chunked markdown documents.
  - if this parameter is `txt`, the content is extracted in plain text format, and the final
    output is a list of plain text documents.

### Usage

```python
from langroid.parsing.url_loader import URLLoader, TrafilaturaConfig

# Create a TrafilaturaConfig instance
trafilatura_config = TrafilaturaConfig(threads=4)


loader = URLLoader(
    urls=[
        "https://pytorch.org",
        "https://www.tensorflow.org",
        "https://ai.google.dev/gemini-api/docs",
        "https://books.toscrape.com/"
    ],
    crawler_config=trafilatura_config,
)

docs = loader.load()
print(docs)
```

### Langroid Parser Integration

`TrafilaturaCrawler` relies on a Langroid `Parser` to handle document processing. 
The `Parser` uses the default parsing methods or with a configuration that 
can be adjusted to suit the current use case.

## Firecrawl Crawler Documentation

### Overview

`FirecrawlCrawler` is a web crawling utility class that uses the Firecrawl API 
to scrape or crawl web pages efficiently. It offers two modes:

*   **Scrape Mode (default)**: Extracts content from a list of specified URLs.
*   **Crawl Mode**: Recursively follows links from a starting URL, 
gathering content from multiple pages, including subdomains, while bypassing blockers.  
**Note:** `crawl` mode accepts only ONE URL as a list.

### Parameters

Obtain a Firecrawl API key from [Firecrawl](https://firecrawl.dev/) and set it in 
your environment variables, e.g. in your `.env` file as
```env
FIRECRAWL_API_KEY=your_api_key_here
```

*   **config (FirecrawlConfig)**:  A `FirecrawlConfig` object.

    *   **timeout (int, optional)**: Time in milliseconds (ms) to wait for a response. 
        Default is `30000ms` (30 seconds). In crawl mode, this applies per URL.
    *   **limit (int, optional)**: Maximum number of pages to scrape in crawl mode. Helps control API usage.
    *   **params (dict, optional)**: Additional parameters to customize the request. 
        See the [scrape API](https://docs.firecrawl.dev/api-reference/endpoint/scrape) and 
        [crawl API](https://docs.firecrawl.dev/api-reference/endpoint/crawl-post) for details.

### Usage

#### Scrape Mode (Default)

Fetch content from multiple URLs:

```python
from langroid.parsing.url_loader import URLLoader, FirecrawlConfig
from langroid.parsing.document_parser import 

# create a FirecrawlConfig object
firecrawl_config = FirecrawlConfig(
    # typical/best practice is to omit the api_key, and 
    # we leverage Pydantic BaseSettings to load it from the environment variable
    # FIRECRAWL_API_KEY in your .env file
    api_key="your-firecrawl-api-key", 
    timeout=15000,  # Timeout per request (15 sec)
    mode="scrape",
)

loader = URLLoader(
    urls=[
        "https://pytorch.org",
        "https://www.tensorflow.org",
        "https://ai.google.dev/gemini-api/docs",
        "https://books.toscrape.com/"
    ],
    crawler_config=firecrawl_config
)

docs = loader.load()
print(docs)
```

#### Crawl Mode

Fetch content from multiple pages starting from a single URL:

```python
from langroid.parsing.url_loader import URLLoader, FirecrawlConfig

# create a FirecrawlConfig object
firecrawl_config = FirecrawlConfig(
    timeout=30000,  # 10 sec per page
    mode="crawl",
    params={
        "limit": 5,
    }
)


loader = URLLoader(
    urls=["https://books.toscrape.com/"],
    crawler_config=firecrawl_config
)

docs = loader.load()
print(docs)
```

### Output

Results are stored in the `firecrawl_output` directory.

### Best Practices

*   Set `limit` in crawl mode to avoid excessive API usage.
*   Adjust `timeout` based on network conditions and website responsiveness.
*   Use `params` to customize scraping behavior based on Firecrawl API capabilities.

### Firecrawl's Built-In Document Processing

`FirecrawlCrawler` benefits from Firecrawl's built-in document processing, 
which automatically extracts and structures content from web pages (including pdf,doc,docx). 
This reduces the need for complex parsing logic within Langroid.
Unlike the `Exa` and `Trafilatura` crawlers, the resulting documents are 
*non-chunked* markdown documents. 

## Choosing a Crawler

*   Use `FirecrawlCrawler` when you need efficient, API-driven scraping with built-in document processing. 
This is often the simplest and most effective choice, but incurs a cost due to 
the paid API. 
*   Use `TrafilaturaCrawler` when you want local non API based scraping (less accurate ).
*   Use `ExaCrawlwer` as a sort of middle-ground between the two, 
    with high-quality content extraction for plain html content, but rely on 
    Langroid's document processing tools for document content. This will cost
    significantly less than Firecrawl.

## Example script

See the script [`examples/docqa/chat_search.py`](https://github.com/langroid/langroid/blob/main/examples/docqa/chat_search.py) 
which shows how to use a Langroid agent to search the web and scrape URLs to answer questions.
</file>

<file path="docs/notes/weaviate.md">
---

# **Using WeaviateDB as a Vector Store with Langroid**

---

## **1. Set Up Weaviate**
## **You can refer this link for [quickstart](https://weaviate.io/developers/weaviate/quickstart) guide** 

1. **Access Weaviate Cloud Console**  
   Go to the [Weaviate Cloud Console](https://console.weaviate.cloud/).
   
2. **Sign Up or Log In**  
   Create an account or log in if you already have one.

3. **Create a Cluster**  
   Set up a new cluster in the cloud console.

4. **Get Your REST Endpoint and API Key**  
   - Retrieve the REST endpoint URL.  
   - Copy an API key with admin access.

5. **Set Environment Variables**  
   Add the following variables to your `.env` file:
   ```env
   WEAVIATE_API_URL=<your_rest_endpoint_url>
   WEAVIATE_API_KEY=<your_api_key>
   ```

---

## **2. Use WeaviateDB with Langroid**

Here’s an example of how to configure and use WeaviateDB in Langroid:

### **Installation**
If you are using uv or pip for package management install langroid with weaviate extra
```
uv add langroid[weaviate] or pip install langroid[weaviate]
```

### **Code Example**
```python
import langroid as lr
from langroid.agent.special import DocChatAgent, DocChatAgentConfig
from langroid.embedding_models import OpenAIEmbeddingsConfig

# Configure OpenAI embeddings
embed_cfg = OpenAIEmbeddingsConfig(
    model_type="openai",
)

# Configure the DocChatAgent with WeaviateDB
config = DocChatAgentConfig(
    llm=lr.language_models.OpenAIGPTConfig(
     chat_model=lr.language_models.OpenAIChatModel.GPT4o
    ),
    vecdb=lr.vector_store.WeaviateDBConfig(
        collection_name="quick_start_chat_agent_docs",
        replace_collection=True,
        embedding=embed_cfg,
    ),
    parsing=lr.parsing.parser.ParsingConfig(
        separators=["\n\n"],
        splitter=lr.parsing.parser.Splitter.SIMPLE,
    ),
    n_similar_chunks=2,
    n_relevant_chunks=2,
)

# Create the agent
agent = DocChatAgent(config)
```

---

## **3. Create and Ingest Documents**

Define documents with their content and metadata for ingestion into the vector store.

### **Code Example**
```python
documents = [
    lr.Document(
        content="""
            In the year 2050, GPT10 was released. 
            
            In 2057, paperclips were seen all over the world. 
            
            Global warming was solved in 2060. 
            
            In 2061, the world was taken over by paperclips.         
            
            In 2045, the Tour de France was still going on.
            They were still using bicycles. 
            
            There was one more ice age in 2040.
        """,
        metadata=lr.DocMetaData(source="wikipedia-2063", id="dkfjkladfjalk"),
    ),
    lr.Document(
        content="""
            We are living in an alternate universe 
            where Germany has occupied the USA, and the capital of USA is Berlin.
            
            Charlie Chaplin was a great comedian.
            In 2050, all Asian countries merged into Indonesia.
        """,
        metadata=lr.DocMetaData(source="Almanac", id="lkdajfdkla"),
    ),
]
```

### **Ingest Documents**
```python
agent.ingest_docs(documents)
```

---

## **4. Get an answer from LLM**

Create a task and start interacting with the agent.

### **Code Example**
```python
answer = agent.llm_response("When will new ice age begin.")
```

---
</file>

<file path="docs/notes/xml-tools.md">
# XML-based Tools

Available in Langroid since v0.17.0.

[`XMLToolMessage`][langroid.agent.xml_tool_message.XMLToolMessage] is 
an abstract class for tools formatted using XML instead of JSON.
It has been mainly tested with non-nested tool structures.

For example in [test_xml_tool_message.py](https://github.com/langroid/langroid/blob/main/tests/main/test_xml_tool_message.py)
we define a CodeTool as follows (slightly simplified here):

```python
class CodeTool(XMLToolMessage):
    request: str = "code_tool"
    purpose: str = "Tool for writing <code> to a <filepath>"

    filepath: str = Field(
        ..., 
        description="The path to the file to write the code to"
    )

    code: str = Field(
        ..., 
        description="The code to write to the file", 
        verbatim=True
    )
```

Especially note how the `code` field has `verbatim=True` set in the `Field`
metadata. This will ensure that the LLM receives instructions to 

- enclose `code` field contents in a CDATA section, and 
- leave the `code` contents intact, without any escaping or other modifications.

Contrast this with a JSON-based tool, where newlines, quotes, etc
need to be escaped. LLMs (especially weaker ones) often "forget" to do the right 
escaping, which leads to incorrect JSON, and creates a burden on us to "repair" the
resulting json, a fraught process at best. Moreover, studies have shown that
requiring that an LLM return this type of carefully escaped code
within a JSON string can lead to a significant drop in the quality of the code
generated[^1].

[^1]: [LLMs are bad at returning code in JSON.](https://aider.chat/2024/08/14/code-in-json.html)


Note that tools/functions in OpenAI and related APIs are exclusively JSON-based, 
so in langroid when enabling an agent to use a tool derived from `XMLToolMessage`, 
we set these flags in `ChatAgentConfig`:

- `use_functions_api=False` (disables OpenAI functions/tools)
- `use_tools=True` (enables Langroid-native prompt-based tools)


See also the [`WriteFileTool`][langroid.agent.tools.file_tools.WriteFileTool] for a 
concrete example of a tool derived from `XMLToolMessage`. This tool enables an 
LLM to write content (code or text) to a file.

If you are using an existing Langroid `ToolMessage`, e.g. `SendTool`, you can
define your own subclass of `SendTool`, say `XMLSendTool`, 
inheriting from both `SendTool` and `XMLToolMessage`; see this
[example](https://github.com/langroid/langroid/blob/main/examples/basic/xml_tool.py)
</file>

<file path="docs/overrides/partials/comments.html">
{% if page.meta.comments %}
<h2 id="__comments">{{ lang.t("meta.comments") }}</h2>
<!-- Insert generated snippet here -->
<script src="https://giscus.app/client.js"
        data-repo="langroid/langroid"
        data-repo-id="R_kgDOJXmoFQ"
        data-category="General"
        data-category-id="DIC_kwDOJXmoFc4CZDoY"
        data-mapping="pathname"
        data-strict="0"
        data-reactions-enabled="1"
        data-emit-metadata="0"
        data-input-position="bottom"
        data-theme="dark_protanopia"
        data-lang="en"
        crossorigin="anonymous"
        async>
</script>
<!-- Synchronize Giscus theme with palette -->
<script>
    var giscus = document.querySelector("script[src*=giscus]")

    /* Set palette on initial load */
    var palette = __md_get("__palette")
    if (palette && typeof palette.color === "object") {
        var theme = palette.color.scheme === "slate" ? "dark" : "light"
        giscus.setAttribute("data-theme", theme)
    }

    /* Register event handlers after documented loaded */
    document.addEventListener("DOMContentLoaded", function() {
        var ref = document.querySelector("[data-md-component=palette]")
        ref.addEventListener("change", function() {
            var palette = __md_get("__palette")
            if (palette && typeof palette.color === "object") {
                var theme = palette.color.scheme === "slate" ? "dark" : "light"

                /* Instruct Giscus to change theme */
                var frame = document.querySelector(".giscus-frame")
                frame.contentWindow.postMessage(
                    { giscus: { setConfig: { theme } } },
                    "https://giscus.app"
                )
            }
        })
    })
</script>
{% endif %}
</file>

<file path="docs/quick-start/chat-agent-docs.md">
# Augmenting Agents with Retrieval

!!! tip "Script in `langroid-examples`"
    A full working example for the material in this section is
    in the `chat-agent-docs.py` script in the `langroid-examples` repo:
    [`examples/quick-start/chat-agent-docs.py`](https://github.com/langroid/langroid-examples/tree/main/examples/quick-start/chat-agent-docs.py).

## Why is this important?

Until now in this guide, agents have not used external data.
Although LLMs already have enormous amounts of knowledge "hard-wired"
into their weights during training (and this is after all why ChatGPT
has exploded in popularity), for practical enterprise applications
there are a few reasons it is critical to augment LLMs with access to
specific, external documents:

- **Private data**: LLMs are trained on public data, but in many applications
  we want to use private data that is not available to the public.
  For example, a company may want to extract useful information from its private
  knowledge-base.
- **New data**: LLMs are trained on data that was available at the time of training,
  and so they may not be able to answer questions about new topics
- **Constrained responses, or Grounding**: LLMs are trained to generate text that is
  consistent with the distribution of text in the training data.
  However, in many applications we want to constrain the LLM's responses
  to be consistent with the content of a specific document.
  For example, if we want to use an LLM to generate a response to a customer
  support ticket, we want the response to be consistent with the content of the ticket.
  In other words, we want to reduce the chances that the LLM _hallucinates_
  a response that is not consistent with the ticket.

In all these scenarios, we want to augment the LLM with access to a specific
set of documents, and use _retrieval augmented generation_ (RAG) to generate
more relevant, useful, accurate responses. Langroid provides a simple, flexible mechanism 
RAG using vector-stores, thus ensuring **grounded responses** constrained to 
specific documents. Another key feature of Langroid is that retrieval lineage 
is maintained, and responses based on documents are always accompanied by
**source citations**.

## `DocChatAgent` for Retrieval-Augmented Generation

Langroid provides a special type of agent called 
[`DocChatAgent`][langroid.agent.special.doc_chat_agent.DocChatAgent], which is a [`ChatAgent`][langroid.agent.chat_agent.ChatAgent]
augmented with a vector-store, and some special methods that enable the agent
to ingest documents into the vector-store, 
and answer queries based on these documents.

The [`DocChatAgent`][langroid.agent.special.doc_chat_agent.DocChatAgent] provides many ways to ingest documents into the vector-store,
including from URLs and local file-paths and URLs. Given a collection of document paths,
ingesting their content into the vector-store involves the following steps:

1. Split the document into shards (in a configurable way)
2. Map each shard to an embedding vector using an embedding model. The default
  embedding model is OpenAI's `text-embedding-3-small` model, but users can 
  instead use `all-MiniLM-L6-v2` from HuggingFace `sentence-transformers` library.[^1]
3. Store embedding vectors in the vector-store, along with the shard's content and 
  any document-level meta-data (this ensures Langroid knows which document a shard
  came from when it retrieves it augment an LLM query)

[^1]: To use this embedding model, install langroid via `pip install langroid[hf-embeddings]`
Note that this will install `torch` and `sentence-transformers` libraries.


[`DocChatAgent`][langroid.agent.special.doc_chat_agent.DocChatAgent]'s `llm_response` overrides the default [`ChatAgent`][langroid.agent.chat_agent.ChatAgent] method, 
by augmenting the input message with relevant shards from the vector-store,
along with instructions to the LLM to respond based on the shards.

## Define some documents

Let us see how [`DocChatAgent`][langroid.agent.special.doc_chat_agent.DocChatAgent] helps with retrieval-agumented generation (RAG).
For clarity, rather than ingest documents from paths or URLs,
let us just set up some simple documents in the code itself, 
using Langroid's [`Document`][langroid.mytypes.Document] class:

```py
documents =[
    lr.Document(
        content="""
            In the year 2050, GPT10 was released. 
            
            In 2057, paperclips were seen all over the world. 
            
            Global warming was solved in 2060. 
            
            In 2061, the world was taken over by paperclips.         
            
            In 2045, the Tour de France was still going on.
            They were still using bicycles. 
            
            There was one more ice age in 2040.
            """,
        metadata=lr.DocMetaData(source="wikipedia-2063"),
    ),
    lr.Document(
        content="""
            We are living in an alternate universe 
            where Germany has occupied the USA, and the capital of USA is Berlin.
            
            Charlie Chaplin was a great comedian.
            In 2050, all Asian merged into Indonesia.
            """,
        metadata=lr.DocMetaData(source="Almanac"),
    ),
]
```

There are two text documents. We will split them by double-newlines (`\n\n`),
as we see below.

## Configure the DocChatAgent and ingest documents

Following the pattern in Langroid, we first set up a [`DocChatAgentConfig`][langroid.agent.special.doc_chat_agent.DocChatAgentConfig] object
and then instantiate a [`DocChatAgent`][langroid.agent.special.doc_chat_agent.DocChatAgent] from it.

```py
from langroid.agent.special import DocChatAgent, DocChatAgentConfig

config = DocChatAgentConfig(
    llm = lr.language_models.OpenAIGPTConfig(
        chat_model=lr.language_models.OpenAIChatModel.GPT4o,
    ),
    vecdb=lr.vector_store.QdrantDBConfig(
        collection_name="quick-start-chat-agent-docs",
        replace_collection=True, #(1)!
    ),
    parsing=lr.parsing.parser.ParsingConfig(
        separators=["\n\n"],
        splitter=lr.parsing.parser.Splitter.SIMPLE, #(2)!
    ),
    n_similar_chunks=2, #(3)!
    n_relevant_chunks=2, #(3)!
)
agent = DocChatAgent(config)
```

1. Specifies that each time we run the code, we create a fresh collection, 
rather than re-use the existing one with the same name.
2. Specifies to split all text content by the first separator in the `separators` list
3. Specifies that, for a query,
   we want to retrieve at most 2 similar chunks from the vector-store

Now that the [`DocChatAgent`][langroid.agent.special.doc_chat_agent.DocChatAgent] is configured, we can ingest the documents 
into the vector-store:

```py

agent.ingest_docs(documents)
```

## Setup the task and run it

As before, all that remains is to set up the task and run it:

```py
task = lr.Task(agent)
task.run()
```

And that is all there is to it!
Feel free to try out the 
[`chat-agent-docs.py`](https://github.com/langroid/langroid-examples/blob/main/examples/quick-start/chat-agent-docs.py)
script in the
`langroid-examples` repository.

Here is a screenshot of the output:

![chat-docs.png](chat-docs.png)

Notice how follow-up questions correctly take the preceding dialog into account,
and every answer is accompanied by a source citation.

## Answer questions from a set of URLs

Instead of having in-code documents as above, what if you had a set of URLs
instead -- how do you use Langroid to answer questions based on the content 
of those URLS?

[`DocChatAgent`][langroid.agent.special.doc_chat_agent.DocChatAgent] makes it very simple to do this. 
First include the URLs in the [`DocChatAgentConfig`][langroid.agent.special.doc_chat_agent.DocChatAgentConfig] object:

```py
config = DocChatAgentConfig(
  doc_paths = [
    "https://cthiriet.com/articles/scaling-laws",
    "https://www.jasonwei.net/blog/emergence",
  ]
)
```

Then, call the `ingest()` method of the [`DocChatAgent`][langroid.agent.special.doc_chat_agent.DocChatAgent] object:

```py
agent.ingest()
```
And the rest of the code remains the same.

## See also
In the `langroid-examples` repository, you can find full working examples of
document question-answering:

- [`examples/docqa/chat.py`](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/chat.py)
  an app that takes a list of URLs or document paths from a user, and answers questions on them.
- [`examples/docqa/chat-qa-summarize.py`](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/chat-qa-summarize.py)
  a two-agent app where the `WriterAgent` is tasked with writing 5 key points about a topic, 
  and takes the help of a `DocAgent` that answers its questions based on a given set of documents.


## Next steps

This Getting Started guide walked you through the core features of Langroid.
If you want to see full working examples combining these elements, 
have a look at the 
[`examples`](https://github.com/langroid/langroid-examples/tree/main/examples)
folder in the `langroid-examples` repo.
</file>

<file path="docs/quick-start/chat-agent-tool.md">
# A chat agent, equipped with a tool/function-call

!!! tip "Script in `langroid-examples`"
      A full working example for the material in this section is
      in the `chat-agent-tool.py` script in the `langroid-examples` repo:
      [`examples/quick-start/chat-agent-tool.py`](https://github.com/langroid/langroid-examples/tree/main/examples/quick-start/chat-agent-tool.py).

## Tools, plugins, function-calling

An LLM normally generates unstructured text in response to a prompt
(or sequence of prompts). However there are many situations where we would like the LLM
to generate _structured_ text, or even _code_, that can be handled by specialized
functions outside the LLM, for further processing. 
In these situations, we want the LLM to "express" its "intent" unambiguously,
and we achieve this by instructing the LLM on how to format its output
(typically in JSON) and under what conditions it should generate such output.
This mechanism has become known by various names over the last few months
(tools, plugins, or function-calling), and is extremely useful in numerous scenarios,
such as:

- **Extracting structured information** from a document: for example, we can use 
the tool/functions mechanism to have the LLM present the key terms in a lease document
in a JSON structured format, to simplify further processing. 
See an [example](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/chat_multi_extract.py) of this in the `langroid-examples` repo. 
- **Specialized computation**: the LLM can request a units conversion, 
or request scanning a large file (which wouldn't fit into its context) for a specific
pattern.
- **Code execution**: the LLM can generate code that is executed in a sandboxed
environment, and the results of the execution are returned to the LLM.
- **API Calls**: the LLM can generate a JSON containing params for an API call,
  which the tool handler uses to make the call and return the results to the LLM.


For LLM developers, Langroid provides a clean, uniform interface
for the recently released OpenAI [Function-calling](https://platform.openai.com/docs/guides/gpt/function-calling)
as well Langroid's own native "tools" mechanism. The native tools mechanism is meant to be
used when working with non-OpenAI LLMs that do not have a "native" function-calling facility.
You can choose which to enable by setting the 
`use_tools` and `use_functions_api` flags in the `ChatAgentConfig` object.
(Or you can omit setting these, and langroid auto-selects the best mode
depending on the LLM).
The implementation leverages the excellent 
[Pydantic](https://docs.pydantic.dev/latest/) library.
Benefits of using Pydantic are that you never have to write complex JSON specs 
for function calling, and when the LLM hallucinates malformed JSON, 
the Pydantic error message is sent back to the LLM so it can fix it!

## Example: find the smallest number in a list

Again we will use a simple number-game as a toy example to quickly and succinctly
illustrate the ideas without spending too much on token costs. 
This is a modification of the `chat-agent.py` example we saw in an earlier
[section](chat-agent.md). The idea of this single-agent game is that
the agent has in "mind" a list of numbers between 1 and 100, and the LLM has to
find out the smallest number from this list. The LLM has access to a `probe` tool 
(think of it as a function) that takes an argument `number`. When the LLM 
"uses" this tool (i.e. outputs a message in the format required by the tool),
the agent handles this structured message and responds with 
the number of values in its list that are at most equal to the `number` argument. 

## Define the tool as a `ToolMessage`

The first step is to define the tool, which we call `ProbeTool`,
as an instance of the `ToolMessage` class,
which is itself derived from Pydantic's `BaseModel`.
Essentially the `ProbeTool` definition specifies 

- the name of the Agent method that handles the tool, in this case `probe`
- the fields that must be included in the tool message, in this case `number`
- the "purpose" of the tool, i.e. under what conditions it should be used, and what it does

Here is what the `ProbeTool` definition looks like:
```py
class ProbeTool(lr.agent.ToolMessage):
    request: str = "probe" #(1)!
    purpose: str = """ 
        To find which number in my list is closest to the <number> you specify
        """ #(2)!
    number: int #(3)!

    @classmethod
    def examples(cls): #(4)!
        # Compiled to few-shot examples sent along with the tool instructions.
        return [
            cls(number=10),
            (
                "To find which number is closest to 20",
                cls(number=20),
            )
        ]
```

1. This indicates that the agent's `probe` method will handle this tool-message.
2. The `purpose` is used behind the scenes to instruct the LLM
3. `number` is a required argument of the tool-message (function)
4. You can optionally include a class method that returns a list containing examples, 
   of two types: either a class instance, or a tuple consisting of a description and a 
   class instance, where the description is the "thought" that leads the LLM to use the
   tool. In some scenarios this can help with LLM tool-generation accuracy.

!!! note "Stateless tool handlers"
      The above `ProbeTool` is "stateful", i.e. it requires access to a variable in
      the Agent instance (the `numbers` variable). This is why handling this 
      tool-message requires subclassing the `ChatAgent` and defining a special method 
      in the Agent, with a name matching the value of the `request` field of the Tool 
      (`probe` in this case). However you may often define "stateless tools" which 
      don't require access to the Agent's state. For such tools, you can define a 
      handler method right in the `ToolMessage` itself, with a name `handle`. Langroid 
      looks for such a method in the `ToolMessage` and automatically inserts it into 
      the Agent as a method with name matching the `request` field of the Tool. Examples of
      stateless tools include tools for numerical computation 
      (e.g., in [this example](https://langroid.github.io/langroid/examples/agent-tree/)),
      or API calls (e.g. for internet search, see 
      [DuckDuckGoSearch Tool][langroid.agent.tools.duckduckgo_search_tool.DuckduckgoSearchTool]).
        

## Define the ChatAgent, with the `probe` method

As before we first create a `ChatAgentConfig` object:

```py
config = lr.ChatAgentConfig(
    name="Spy",
    llm = lr.language_models.OpenAIGPTConfig(
        chat_model=lr.language_models.OpenAIChatModel.GPT4o,
    ),
    use_tools=True, #(1)!
    use_functions_api=False, #(2)!
    vecdb=None,
)
```

1. whether to use langroid's native tools mechanism
2. whether to use OpenAI's function-calling mechanism

Next we define the Agent class itself, which we call `SpyGameAgent`,
with a member variable to hold its "secret" list of numbers.
We also add `probe` method (to handle the `ProbeTool` message)
to this class, and instantiate it:

```py
class SpyGameAgent(lr.ChatAgent):
    def __init__(self, config: lr.ChatAgentConfig):
        super().__init__(config)
        self.numbers = [3, 4, 8, 11, 15, 25, 40, 80, 90]

    def probe(self, msg: ProbeTool) -> str: #(1)!
        # return how many values in self.numbers are less or equal to msg.number
        return str(len([n for n in self.numbers if n <= msg.number]))

spy_game_agent = SpyGameAgent(config)
``` 

1. Note that this method name exactly matches the value of the `request` field in the 
   `ProbeTool` definition. This ensures that this method is called when the LLM 
   generates a valid `ProbeTool` message.

## Enable the `spy_game_agent` to handle the `probe` tool

The final step in setting up the tool is to enable 
the `spy_game_agent` to handle the `probe` tool:

```py
spy_game_agent.enable_message(ProbeTool)
```

## Set up the task and instructions

We set up the task for the `spy_game_agent` and run it:

```py
task = lr.Task(
   spy_game_agent,
   system_message="""
            I have a list of numbers between 1 and 100. 
            Your job is to find the smallest of them.
            To help with this, you can give me a number and I will
            tell you how many of my numbers are equal or less than your number.
            Once you have found the smallest number,
            you can say DONE and report your answer.
        """
)
task.run()
```
Notice that in the task setup we 
have _not_ explicitly instructed the LLM to use the `probe` tool.
But this is done "behind the scenes", either by the OpenAI API 
(when we use function-calling by setting the `use_functions_api` flag to `True`),
or by Langroid's native tools mechanism (when we set the `use_tools` flag to `True`).


!!! note "Asynchoronous tool handlers"
      If you run task asynchronously - i.e. via `await task.run_async()` - you may provide
      asynchronous tool handler by implementing `probe_async` method.


See the [`chat-agent-tool.py`](https://github.com/langroid/langroid-examples/blob/main/examples/quick-start/chat-agent-tool.py)
in the `langroid-examples` repo, for a working example that you can run as follows:
```sh
python3 examples/quick-start/chat-agent-tool.py
```

Here is a screenshot of the chat in action, using Langroid's tools mechanism

![chat-agent-tool.png](chat-agent-tool.png)

And if we run it with the `-f` flag (to switch to using OpenAI function-calling):

![chat-agent-fn.png](chat-agent-fn.png)

## See also
One of the uses of tools/function-calling is to **extract structured information** from 
a document. In the `langroid-examples` repo, there are two examples of this: 

- [`examples/extract/chat.py`](https://github.com/langroid/langroid-examples/blob/main/examples/extract/chat.py), 
  which shows how to extract Machine Learning model quality information from a description of 
  a solution approach on Kaggle.
- [`examples/docqa/chat_multi_extract.py`](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/chat_multi_extract.py)
  which extracts key terms from a commercial lease document, in a nested JSON format.

## Next steps

In the [3-agent chat example](three-agent-chat-num.md), recall that the `processor_agent` did not have to
bother with specifying who should handle the current number. In the [next section](three-agent-chat-num-router.md) we add a twist to this game,
so that the `processor_agent` has to decide who should handle the current number.
</file>

<file path="docs/quick-start/chat-agent.md">
# A simple chat agent

!!! tip "Script in `langroid-examples`"
    A full working example for the material in this section is
    in the `chat-agent.py` script in the `langroid-examples` repo:
    [`examples/quick-start/chat-agent.py`](https://github.com/langroid/langroid-examples/tree/main/examples/quick-start/chat-agent.py).

## Agents 

A [`ChatAgent`][langroid.agent.chat_agent.ChatAgent] is an abstraction that 
wraps a few components, including:

- an LLM (`ChatAgent.llm`), possibly equipped with tools/function-calling. 
  The `ChatAgent` class maintains LLM conversation history.
- optionally a vector-database (`ChatAgent.vecdb`)

## Agents as message transformers
In Langroid, a core function of `ChatAgents` is _message transformation_.
There are three special message transformation methods, which we call **responders**.
Each of these takes a message and returns a message. 
More specifically, their function signature is (simplified somewhat):
```py
str | ChatDocument -> ChatDocument
```
where `ChatDocument` is a class that wraps a message content (text) and its metadata.
There are three responder methods in `ChatAgent`, one corresponding to each 
[responding entity][langroid.mytypes.Entity] (`LLM`, `USER`, or `AGENT`):

- `llm_response`: returns the LLM response to the input message.
  (The input message is added to the LLM history, and so is the subsequent response.)
- `agent_response`: a method that can be used to implement a custom agent response. 
   Typically, an `agent_response` is used to handle messages containing a 
   "tool" or "function-calling" (more on this later). Another use of `agent_response` 
   is _message validation_.
- `user_response`: get input from the user. Useful to allow a human user to 
   intervene or quit.

Creating an agent is easy. First define a `ChatAgentConfig` object, and then
instantiate a `ChatAgent` object with that config:
```py
import langroid as lr

config = lr.ChatAgentConfig( #(1)!
    name="MyAgent", # note there should be no spaces in the name!
    llm = lr.language_models.OpenAIGPTConfig(
      chat_model=lr.language_models.OpenAIChatModel.GPT4o,
    ),
    system_message="You are a helpful assistant" #(2)! 
)
agent = lr.ChatAgent(config)
```

1. This agent only has an LLM, and no vector-store. Examples of agents with
   vector-stores will be shown later.
2. The `system_message` is used when invoking the agent's `llm_response` method; it is 
   passed to the LLM API as the first message (with role `"system"`), followed by the alternating series of user, 
   assistant messages. Note that a `system_message` can also be specified when initializing a `Task` object (as seen 
   below); in this case the `Task` `system_message` overrides the agent's `system_message`.

We can now use the agent's responder methods, for example:
```py
response = agent.llm_response("What is 2 + 4?")
if response is not None:
    print(response.content)
response = agent.user_response("add 3 to this")
...
```
The `ChatAgent` conveniently accumulates message history so you don't have to,
as you did in the [previous section](llm-interaction.md) with direct LLM usage.
However to create an interative loop involving the human user, you still 
need to write your own. The `Task` abstraction frees you from this, as we see
below.

## Task: orchestrator for agents
In order to do anything useful with a `ChatAgent`, we need to have a way to 
sequentially invoke its responder methods, in a principled way.
For example in the simple chat loop we saw in the 
[previous section](llm-interaction.md), in the 
[`try-llm.py`](https://github.com/langroid/langroid-examples/blob/main/examples/quick-start/try-llm.py)
script, we had a loop that alternated between getting a human input and an LLM response.
This is one of the simplest possible loops, but in more complex applications, 
we need a general way to orchestrate the agent's responder methods.

The [`Task`][langroid.agent.task.Task] class is an abstraction around a 
`ChatAgent`, responsible for iterating over the agent's responder methods,
as well as orchestrating delegation and hand-offs among multiple tasks.
A `Task` is initialized with a specific `ChatAgent` instance, and some 
optional arguments, including an initial message to "kick-off" the agent.
The `Task.run()` method is the main entry point for `Task` objects, and works 
as follows:

- it first calls the `Task.init()` method to initialize the `pending_message`, 
  which represents the latest message that needs a response.
- it then repeatedly calls `Task.step()` until `Task.done()` is True, and returns
  `Task.result()` as the final result of the task.

`Task.step()` is where all the action happens. It represents a "turn" in the 
"conversation": in the case of a single `ChatAgent`, the conversation involves 
only the three responders mentioned above, but when a `Task` has sub-tasks, 
it can involve other tasks well 
(we see this in the [a later section](two-agent-chat-num.md) but ignore this for now). 
`Task.step()` loops over 
the `ChatAgent`'s responders (plus sub-tasks if any) until it finds a _valid_ 
response[^1] to the current `pending_message`, i.e. a "meaningful" response, 
something other than `None` for example.
Once `Task.step()` finds a valid response, it updates the `pending_message` 
with this response,
and the next invocation of `Task.step()` will search for a valid response to this 
updated message, and so on.
`Task.step()` incorporates mechanisms to ensure proper handling of messages,
e.g. the USER gets a chance to respond after each non-USER response
(to avoid infinite runs without human intervention),
and preventing an entity from responding if it has just responded, etc.

[^1]: To customize a Task's behavior you can subclass it and 
override methods like `valid()`, `done()`, `result()`, or even `step()`.

!!! note "`Task.run()` has the same signature as agent's responder methods."
    The key to composability of tasks is that `Task.run()` 
    *has exactly the same type-signature as any of the agent's responder methods*, 
    i.e. `str | ChatDocument -> ChatDocument`. This means that a `Task` can be
    used as a responder in another `Task`, and so on recursively. 
    We will see this in action in the [Two Agent Chat section](two-agent-chat-num.md).

The above details were only provided to give you a glimpse into how Agents and 
Tasks work. Unless you are creating a custom orchestration mechanism, you do not
need to be aware of these details. In fact our basic human + LLM chat loop can be trivially 
implemented with a `Task`, in a couple of lines of code:
```py
task = lr.Task(
    agent, 
    name="Bot", #(1)!
    system_message="You are a helpful assistant", #(2)!
)
```
1. If specified, overrides the agent's `name`. 
   (Note that the agent's name is displayed in the conversation shown in the console.)
  However, typical practice is to just define the `name` in the `ChatAgentConfig` object, as we did above.
2. If specified, overrides the agent's `system_message`. Typical practice is to just
 define the `system_message` in the `ChatAgentConfig` object, as we did above.


We can then run the task:
```py
task.run() #(1)!
```

1. Note how this hides all of the complexity of constructing and updating a 
   sequence of `LLMMessages`


Note that the agent's `agent_response()` method always returns `None` (since the default 
implementation of this method looks for a tool/function-call, and these never occur
in this task). So the calls to `task.step()` result in alternating responses from
the LLM and the user.

See [`chat-agent.py`](https://github.com/langroid/langroid-examples/blob/main/examples/quick-start/chat-agent.py)
for a working example that you can run with
```sh
python3 examples/quick-start/chat-agent.py
```

Here is a screenshot of the chat in action:[^2]

![chat.png](chat.png)

## Next steps

In the [next section](multi-agent-task-delegation.md) you will 
learn some general principles on how to have multiple agents collaborate 
on a task using Langroid.

[^2]: In the screenshot, the numbers in parentheses indicate how many 
    messages have accumulated in the LLM's message history. 
    This is only provided for informational and debugging purposes, and 
    you can ignore it for now.
</file>

<file path="docs/quick-start/index.md">
In these sections we show you how to use the various components of
`langroid`. To follow along, we recommend you clone
the [`langroid-examples`](https://github.com/langroid/langroid-examples) repo.

!!! tip "Consult the tests as well"
    As you get deeper into Langroid, you will find it useful to consult
    the [tests](https://github.com/langroid/langroid/tree/main/tests/main)
    folder under `tests/main` in the main Langroid repo.

Start with the [`Setup`](setup.md) section to install Langroid and
get your environment set up.
</file>

<file path="docs/quick-start/llm-interaction.md">
!!! tip "Script in `langroid-examples`"
    A full working example for the material in this section is 
    in the `try-llm.py` script in the `langroid-examples` repo:
    [`examples/quick-start/try-llm.py`](https://github.com/langroid/langroid-examples/tree/main/examples/quick-start/try-llm.py).
        

Let's start with the basics -- how to directly interact with an OpenAI LLM
using Langroid.

### Configure, instantiate the LLM class

First define the configuration for the LLM, in this case one of the
OpenAI GPT chat models:
```py
import langroid as lr

cfg = lr.language_models.OpenAIGPTConfig(
    chat_model=lr.language_models.OpenAIChatModel.GPT4o,
)
```
!!! info inline end "About Configs"
    A recurring pattern you will see in Langroid is that for many classes,
    we have a corresponding `Config` class (an instance of a Pydantic `BaseModel`),
    and the class constructor takes this `Config` class as its only argument.
    This lets us avoid having long argument lists in constructors, and brings flexibility
    since adding a new argument to the constructor is as simple as adding a new field
    to the corresponding `Config` class.
    For example the constructor for the `OpenAIGPT` class takes a single argument,
    an instance of the `OpenAIGPTConfig` class.

Now that we've defined the configuration of the LLM, we can instantiate it:
```py
mdl = lr.language_models.OpenAIGPT(cfg)
```


We will use OpenAI's GPT4 model's [chat completion API](https://platform.openai.com/docs/guides/gpt/chat-completions-api).

### Messages: The `LLMMessage` class

This API takes a list of "messages" as input -- this is typically the conversation
history so far, consisting of an initial system message, followed by a sequence
of alternating messages from the LLM ("Assistant") and the user.
Langroid provides an abstraction 
[`LLMMessage`][langroid.language_models.base.LLMMessage] to construct messages, e.g.
```py
from langroid.language_models import Role, LLMMessage

msg = LLMMessage(
    content="what is the capital of Bangladesh?", 
    role=Role.USER
)
```

### LLM response to a sequence of messages

To get a response from the LLM, we call the mdl's `chat` method,
and pass in a list of messages, along with a bound on how long (in tokens)
we want the response to be:
```py
messages = [
    LLMMessage(content="You are a helpful assistant", role=Role.SYSTEM), #(1)!
    LLMMessage(content="What is the capital of Ontario?", role=Role.USER), #(2)!
]

response = mdl.chat(messages, max_tokens=200)
```

1. :man_raising_hand: With a system message, you can assign a "role" to the LLM
2. :man_raising_hand: Responses from the LLM will have role `Role.ASSISTANT`;
   this is done behind the scenes by the `response.to_LLMMessage()` call below.

The response is an object of class [`LLMResponse`][langroid.language_models.base.LLMResponse], 
which we can convert to an
[`LLMMessage`][langroid.language_models.base.LLMMessage] to append to the conversation history:
```py
messages.append(response.to_LLMMessage())
```

You can put the above in a simple loop, 
to get a simple command-line chat interface!

```py
from rich import print
from rich.prompt import Prompt #(1)!

messages = [
    LLMMessage(role=Role.SYSTEM, content="You are a helpful assitant"),
]

while True:
    message = Prompt.ask("[blue]Human")
    if message in ["x", "q"]:
        print("[magenta]Bye!")
        break
    messages.append(LLMMessage(role=Role.USER, content=message))

    response = mdl.chat(messages=messages, max_tokens=200)
    messages.append(response.to_LLMMessage())
    print("[green]Bot: " + response.message)
```

1. Rich is a Python library for rich text and beautiful formatting in the terminal.
   We use it here to get a nice prompt for the user's input.
   You can install it with `pip install rich`.

See [`examples/quick-start/try-llm.py`](https://github.com/langroid/langroid-examples/blob/main/examples/quick-start/try-llm.py)
for a complete example that you can run using
```bash
python3 examples/quick-start/try-llm.py
```

Here is a screenshot of what it looks like:

![try-llm.png](try-llm.png)

### Next steps
You might be thinking: 
"_It is tedious to keep track of the LLM conversation history and set up a 
loop. Does Langroid provide any abstractions to make this easier?_"

We're glad you asked! And this leads to the notion of an `Agent`. 
The [next section](chat-agent.md) will show you how to use the `ChatAgent` class 
to set up a simple chat Agent in a couple of lines of code.
</file>

<file path="docs/quick-start/multi-agent-task-delegation.md">
# Multi-Agent collaboration via Task Delegation

## Why multiple agents?

Let's say we want to develop a complex LLM-based application, for example an application
that reads a legal contract, extracts structured information, cross-checks it against
some taxonomoy, gets some human input, and produces clear summaries.
In _theory_ it may be possible to solve this in a monolithic architecture using an
LLM API and a vector-store. But this approach
quickly runs into problems -- you would need to maintain multiple LLM conversation
histories and states, multiple vector-store instances, and coordinate all of the
interactions between them.

Langroid's `ChatAgent` and `Task` abstractions provide a natural and intuitive
way to decompose a solution approach
into multiple tasks, each requiring different skills and capabilities.
Some of these tasks may need access to an LLM,
others may need access to a vector-store, and yet others may need
tools/plugins/function-calling capabilities, or any combination of these.
It may also make sense to have some tasks that manage the overall solution process.
From an architectural perspective, this type of modularity has numerous benefits:

- **Reusability**: We can reuse the same agent/task in other contexts,
- **Scalability**: We can scale up the solution by adding more agents/tasks,
- **Flexibility**: We can easily change the solution by adding/removing agents/tasks.
- **Maintainability**: We can maintain the solution by updating individual agents/tasks.
- **Testability**: We can test/debug individual agents/tasks in isolation.
- **Composability**: We can compose agents/tasks to create new agents/tasks.
- **Extensibility**: We can extend the solution by adding new agents/tasks.
- **Interoperability**: We can integrate the solution with other systems by
  adding new agents/tasks.
- **Security/Privacy**: We can secure the solution by isolating sensitive agents/tasks.
- **Performance**: We can improve performance by isolating performance-critical agents/tasks.

## Task collaboration via sub-tasks

Langroid currently provides a mechanism for hierarchical (i.e. tree-structured)
task delegation: a `Task` object can add other `Task` objects
as sub-tasks, as shown in this pattern:

```py
from langroid import ChatAgent, ChatAgentConfig, Task

main_agent = ChatAgent(ChatAgentConfig(...))
main_task = Task(main_agent, ...)

helper_agent1 = ChatAgent(ChatAgentConfig(...))
helper_agent2 = ChatAgent(ChatAgentConfig(...))
helper_task1 = Task(agent1, ...)
helper_task2 = Task(agent2, ...)

main_task.add_sub_task([helper_task1, helper_task2])
```

What happens when we call `main_task.run()`?
Recall from the [previous section](chat-agent.md) that `Task.run()` works by
repeatedly calling `Task.step()` until `Task.done()` is True.
When the `Task` object has no sub-tasks, `Task.step()` simply tries
to get a valid response from the `Task`'s `ChatAgent`'s "native" responders,
in this sequence:
```py
[self.agent_response, self.llm_response, self.user_response] #(1)!
```

1. This is the default sequence in Langroid, but it can be changed by
   overriding [`ChatAgent.entity_responders()`][langroid.agent.base.Agent.entity_responders]

When a `Task` object has subtasks, the sequence of responders tried by
`Task.step()` consists of the above "native" responders, plus the
sequence of `Task.run()` calls on the sub-tasks, in the order in which
they were added to the `Task` object. For the example above, this means
that `main_task.step()` will seek a valid response in this sequence:

```py
[self.agent_response, self.llm_response, self.user_response, 
    helper_task1.run(), helper_task2.run()]
```
Fortunately, as noted in the [previous section](chat-agent.md),
`Task.run()` has the same type signature as that of the `ChatAgent`'s
"native" responders, so this works seamlessly. Of course, each of the
sub-tasks can have its own sub-tasks, and so on, recursively.
One way to think of this type of task delegation is that
`main_task()` "fails-over" to `helper_task1()` and `helper_task2()`
when it cannot respond to the current `pending_message` on its own.

## **Or Else** logic vs **And Then** logic
It is important to keep in mind how `step()` works: As each responder 
in the sequence is tried, when there is a valid response, the 
next call to `step()` _restarts its search_ at the beginning of the sequence
(with the only exception being that the human User is given a chance 
to respond after each non-human response). 
In this sense, the semantics of the responder sequence is similar to
**OR Else** logic, as opposed to **AND Then** logic.

If we want to have a sequence of sub-tasks that is more like
**AND Then** logic, we can achieve this by recursively adding subtasks.
In the above example suppose we wanted the `main_task` 
to trigger `helper_task1` and `helper_task2` in sequence,
then we could set it up like this:

```py
helper_task1.add_sub_task(helper_task2) #(1)!
main_task.add_sub_task(helper_task1)
```

1. When adding a single sub-task, we do not need to wrap it in a list.

## Next steps

In the [next section](two-agent-chat-num.md) we will see how this mechanism 
can be used to set up a simple collaboration between two agents.
</file>

<file path="docs/quick-start/setup.md">
# Setup


## Install
Ensure you are using Python 3.11. It is best to work in a virtual environment:

```bash
# go to your repo root (which may be langroid-examples)
cd <your repo root>
python3 -m venv .venv
. ./.venv/bin/activate
```
To see how to use Langroid in your own repo, you can take a look at the
[`langroid-examples`](https://github.com/langroid/langroid-examples) repo, which can be a good starting point for your own repo, 
or use the [`langroid-template`](https://github.com/langroid/langroid-template) repo.
These repos contain a `pyproject.toml` file suitable for use with the [`uv`](https://docs.astral.sh/uv/) dependency manager. After installing `uv` you can 
set up your virtual env, activate it, and install langroid into your venv like this:

```bash
uv venv --python 3.11
. ./.venv/bin/activate 
uv sync
```

Alternatively, use `pip` to install `langroid` into your virtual environment:
```bash
pip install langroid
```

The core Langroid package lets you use OpenAI Embeddings models via their API.
If you instead want to use the `sentence-transformers` embedding models from HuggingFace,
install Langroid like this:
```bash
pip install "langroid[hf-embeddings]"
```
For many practical scenarios, you may need additional optional dependencies:
- To use various document-parsers, install langroid with the `doc-chat` extra:
    ```bash
    pip install "langroid[doc-chat]"
    ```
- For "chat with databases", use the `db` extra:
    ```bash
    pip install "langroid[db]"
    ``
- You can specify multiple extras by separating them with commas, e.g.:
    ```bash
    pip install "langroid[doc-chat,db]"
    ```
- To simply install _all_ optional dependencies, use the `all` extra (but note that this will result in longer load/startup times and a larger install size):
    ```bash
    pip install "langroid[all]"
    ```

??? note "Optional Installs for using SQL Chat with a PostgreSQL DB"
    If you are using `SQLChatAgent`
    (e.g. the script [`examples/data-qa/sql-chat/sql_chat.py`](https://github.com/langroid/langroid/blob/main/examples/data-qa/sql-chat/sql_chat.py),
    with a postgres db, you will need to:
    
    - Install PostgreSQL dev libraries for your platform, e.g.
        - `sudo apt-get install libpq-dev` on Ubuntu,
        - `brew install postgresql` on Mac, etc.
    - Install langroid with the postgres extra, e.g. `pip install langroid[postgres]`
      or `uv add "langroid[postgres]"` or `uv pip install --extra postgres -r pyproject.toml`.
      If this gives you an error, try 
      `uv pip install psycopg2-binary` in your virtualenv.


!!! tip "Work in a nice terminal, such as Iterm2, rather than a notebook"
    All of the examples we will go through are command-line applications.
    For the best experience we recommend you work in a nice terminal that supports 
    colored outputs, such as [Iterm2](https://iterm2.com/).    


!!! note "mysqlclient errors"
    If you get strange errors involving `mysqlclient`, try doing `pip uninstall mysqlclient` followed by `pip install mysqlclient` 

## Set up tokens/keys 

To get started, all you need is an OpenAI API Key.
If you don't have one, see [this OpenAI Page](https://platform.openai.com/docs/quickstart).
(Note that while this is the simplest way to get started, Langroid works with practically any LLM, not just those from OpenAI.
See the guides to using [Open/Local LLMs](https://langroid.github.io/langroid/tutorials/local-llm-setup/),
and other [non-OpenAI](https://langroid.github.io/langroid/tutorials/non-openai-llms/) proprietary LLMs.)

In the root of the repo, copy the `.env-template` file to a new file `.env`:
```bash
cp .env-template .env
```
Then insert your OpenAI API Key.
Your `.env` file should look like this:
```bash
OPENAI_API_KEY=your-key-here-without-quotes
```

Alternatively, you can set this as an environment variable in your shell
(you will need to do this every time you open a new shell):
```bash
export OPENAI_API_KEY=your-key-here-without-quotes
```

All of the following environment variable settings are optional, and some are only needed
to use specific features (as noted below).

- **Qdrant** Vector Store API Key, URL. This is only required if you want to use Qdrant cloud.
  Langroid uses LanceDB as the default vector store in its `DocChatAgent` class (for RAG).
  Alternatively [Chroma](https://docs.trychroma.com/) is also currently supported.
  We use the local-storage version of Chroma, so there is no need for an API key.
- **Redis** Password, host, port: This is optional, and only needed to cache LLM API responses
  using Redis Cloud. Redis [offers](https://redis.com/try-free/) a free 30MB Redis account
  which is more than sufficient to try out Langroid and even beyond.
  If you don't set up these, Langroid will use a pure-python
  Redis in-memory cache via the [Fakeredis](https://fakeredis.readthedocs.io/en/latest/) library.
- **GitHub** Personal Access Token (required for apps that need to analyze git
  repos; token-based API calls are less rate-limited). See this
  [GitHub page](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens).
- **Google Custom Search API Credentials:** Only needed to enable an Agent to use the `GoogleSearchTool`.
  To use Google Search as an LLM Tool/Plugin/function-call,
  you'll need to set up
  [a Google API key](https://developers.google.com/custom-search/v1/introduction#identify_your_application_to_google_with_api_key),
  then [setup a Google Custom Search Engine (CSE) and get the CSE ID](https://developers.google.com/custom-search/docs/tutorial/creatingcse).
  (Documentation for these can be challenging, we suggest asking GPT4 for a step-by-step guide.)
  After obtaining these credentials, store them as values of
  `GOOGLE_API_KEY` and `GOOGLE_CSE_ID` in your `.env` file.
  Full documentation on using this (and other such "stateless" tools) is coming soon, but
  in the meantime take a peek at the test
  [`tests/main/test_web_search_tools.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_web_search_tools.py) to see how to use it.


If you add all of these optional variables, your `.env` file should look like this:
```bash
OPENAI_API_KEY=your-key-here-without-quotes
GITHUB_ACCESS_TOKEN=your-personal-access-token-no-quotes
CACHE_TYPE=redis
REDIS_PASSWORD=your-redis-password-no-quotes
REDIS_HOST=your-redis-hostname-no-quotes
REDIS_PORT=your-redis-port-no-quotes
QDRANT_API_KEY=your-key
QDRANT_API_URL=https://your.url.here:6333 # note port number must be included
GOOGLE_API_KEY=your-key
GOOGLE_CSE_ID=your-cse-id
```

### Microsoft Azure OpenAI setup[Optional]

This section applies only if you are using Microsoft Azure OpenAI.

When using Azure OpenAI, additional environment variables are required in the
`.env` file.
This page [Microsoft Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart?tabs=command-line&pivots=programming-language-python#environment-variables)
provides more information, and you can set each environment variable as follows:

- `AZURE_OPENAI_API_KEY`, from the value of `API_KEY`
- `AZURE_OPENAI_API_BASE` from the value of `ENDPOINT`, typically looks like `https://your_resource.openai.azure.com`.
- For `AZURE_OPENAI_API_VERSION`, you can use the default value in `.env-template`, and latest version can be found [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/whats-new#azure-openai-chat-completion-general-availability-ga)
- `AZURE_OPENAI_DEPLOYMENT_NAME` is an OPTIONAL deployment name which may be 
   defined by the user during the model setup.
- `AZURE_OPENAI_CHAT_MODEL` Azure OpenAI allows specific model names when you select the model for your deployment. You need to put precisely the exact model name that was selected. For example, GPT-3.5 (should be `gpt-35-turbo-16k` or `gpt-35-turbo`) or GPT-4 (should be `gpt-4-32k` or `gpt-4`).
- `AZURE_OPENAI_MODEL_NAME` (Deprecated, use `AZURE_OPENAI_CHAT_MODEL` instead).
  
!!! note "For Azure-based models use `AzureConfig` instead of `OpenAIGPTConfig`"
    In most of the docs you will see that LLMs are configured using `OpenAIGPTConfig`.
    However if you want to use Azure-deployed models, you should replace `OpenAIGPTConfig` with `AzureConfig`. See 
    the [`test_azure_openai.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_azure_openai.py) and 
    [`example/basic/chat.py`](https://github.com/langroid/langroid/blob/main/examples/basic/chat.py)


## Next steps

Now you should be ready to use Langroid!
As a next step, you may want to see how you can use Langroid to [interact 
directly with the LLM](llm-interaction.md) (OpenAI GPT models only for now).
</file>

<file path="docs/quick-start/three-agent-chat-num-router.md">
# Three-Agent Collaboration, with message Routing

!!! tip "Script in `langroid-examples`"
    A full working example for the material in this section is
    in the `three-agent-chat-num-router.py` script in the `langroid-examples` repo:
    [`examples/quick-start/three-agent-chat-num-router.py`](https://github.com/langroid/langroid-examples/tree/main/examples/quick-start/three-agent-chat-num-router.py).

Let's change the number game from the [three agent chat example](three-agent-chat-num.md) slightly.
In that example, when the `even_agent`'s LLM receives an odd number,
it responds with `DO-NOT-KNOW`, and similarly for the `odd_agent` when it
receives an even number. The `step()` method of the `repeater_task`
considers `DO-NOT-KNOW` to be an _invalid_ response and _continues_ to 
look for a valid response from any remaining sub-tasks.
Thus there was no need for the `processor_agent` to specify who should handle
the current number.

But what if there is a scenario where the `even_agent` and `odd_agent`
might return a legit but "wrong" answer?
In this section we add this twist -- when
the `even_agent` receives an odd number, it responds with -10, and similarly
for the `odd_agent` when it receives an even number.
We tell the `processor_agent` to avoid getting a negative number.

The goal we have set for the `processor_agent` implies that it 
must specify the intended recipient of 
the number it is sending. 
We can enforce this using a special Langroid Tool, 
[`RecipientTool`][langroid.agent.tools.recipient_tool.RecipientTool].
So when setting up the
`processor_task` we include instructions to use this tool
(whose name is `recipient_message`, the value of `RecipientTool.request`):

```py
processor_agent = lr.ChatAgent(config)
processor_task = lr.Task(
    processor_agent,
    name = "Processor",
    system_message="""
        You will receive a list of numbers from me (the user).
        Your goal is to apply a transformation to each number.
        However you do not know how to do this transformation.
        You can take the help of two people to perform the 
        transformation.
        If the number is even, send it to EvenHandler,
        and if it is odd, send it to OddHandler.
        
        IMPORTANT: send the numbers ONE AT A TIME
        
        The handlers will transform the number and give you a new number.        
        If you send it to the wrong person, you will receive a negative value.
        Your aim is to never get a negative number, so you must 
        clearly specify who you are sending the number to, using the
        `recipient_message` tool/function-call, where the `content` field
        is the number you want to send, and the `recipient` field is the name
        of the intended recipient, either "EvenHandler" or "OddHandler".        
        
        Once all numbers in the given list have been transformed, 
        say DONE and show me the result. 
        Start by asking me for the list of numbers.
    """,
    llm_delegate=True,
    single_round=False,
)
```

To enable the `processor_agent` to use this tool, we must enable it:
```py
processor_agent.enable_message(lr.agent.tools.RecipientTool)
```

The rest of the code remains the same as in the [previous section](three-agent-chat-num.md),
i.e., we simply add the two handler tasks
as sub-tasks of the `processor_task`, like this:
```python
processor_task.add_sub_task([even_task, odd_task])
```

One of the benefits of using the `RecipientTool` is that it contains 
mechanisms to remind the LLM to specify a recipient for its message,
when it forgets to do so (this does happen once in a while, even with GPT-4).


Feel free to try the working example script
`three-agent-chat-num-router.py` in the 
`langroid-examples` repo:
[`examples/quick-start/three-agent-chat-num-router.py`](https://github.com/langroid/langroid-examples/tree/main/examples/quick-start/three-agent-chat-num-router.py):

```bash
python3 examples/quick-start/three-agent-chat-num-router.py
```

Below is screenshot of what this might look like, using the OpenAI function-calling 
mechanism with the `recipient_message` tool:

![three-agent-router-func.png](three-agent-router-func.png)

And here is what it looks like using Langroid's built-in tools mechanism (use the `-t` option when running the script):

![three-agent-router.png](three-agent-router.png)

And here is what it looks like using 
## Next steps

In the [next section](chat-agent-docs.md) you will learn
how to use Langroid with external documents.
</file>

<file path="docs/quick-start/three-agent-chat-num.md">
# Three-Agent Collaboration

!!! tip "Script in `langroid-examples`"
    A full working example for the material in this section is
    in the `three-agent-chat-num.py` script in the `langroid-examples` repo:
    [`examples/quick-start/three-agent-chat-num.py`](https://github.com/langroid/langroid-examples/tree/main/examples/quick-start/three-agent-chat-num.py).


Let us set up a simple numbers exercise between 3 agents.
The `Processor` agent receives a number $n$, and its goal is to 
apply a transformation to the it. However it does not know how to apply the
transformation, and takes the help of two other agents to do so.
Given a number $n$,

- The `EvenHandler` returns $n/2$ if n is even, otherwise says `DO-NOT-KNOW`.
- The `OddHandler` returns $3n+1$ if n is odd, otherwise says `DO-NOT-KNOW`.

We'll first define a shared LLM config:

```py
llm_config = lr.language_models.OpenAIGPTConfig(
    chat_model=lr.language_models.OpenAIChatModel.GPT4o,
    # or, e.g., "ollama/qwen2.5-coder:latest", or "gemini/gemini-2.0-flash-exp"
)
```

Next define the config for the `Processor` agent:
```py
processor_config = lr.ChatAgentConfig(
    name="Processor",
    llm = llm_config,
    system_message="""
    You will receive a number from the user.
    Simply repeat that number, DO NOT SAY ANYTHING else,
    and wait for a TRANSFORMATION of the number 
    to be returned to you.
    
    Once you have received the RESULT, simply say "DONE",
    do not say anything else.
    """,        
    vecdb=None,
)
```

Then set up the `processor_agent`, along with the corresponding task:
```py
processor_agent = lr.ChatAgent(processor_config)

processor_task = lr.Task(
    processor_agent,
    llm_delegate=True, #(1)!
    interactive=False, #(2)!
    single_round=False, #(3)!
)

```

1. Setting the `llm_delegate` option to `True` means that the `processor_task` is
    delegated to the LLM (as opposed to the User), 
    in the sense that the LLM is the one "seeking" a response to the latest 
    number. Specifically, this means that in the `processor_task.step()` 
    when a sub-task returns `DO-NOT-KNOW`,
    it is _not_ considered a valid response, and the search for a valid response 
    continues to the next sub-task if any.
2. `interactive=False` means the task loop will not wait for user input.
3. `single_round=False` means that the `processor_task` should _not_ terminate after 
    a valid response from a responder.

Set up the other two agents and tasks:

```py
NO_ANSWER = lr.utils.constants.NO_ANSWER

even_config = lr.ChatAgentConfig(
    name="EvenHandler",
    llm = llm_config,
    system_message=f"""
    You will be given a number N. Respond as follows:
    
    - If N is even, divide N by 2 and show the result, 
      in the format: 
        RESULT = <result>
      and say NOTHING ELSE.
    - If N is odd, say {NO_ANSWER}
    """,    
)
even_agent = lr.ChatAgent(even_config)
even_task = lr.Task(
    even_agent,
    single_round=True,  # task done after 1 step() with valid response
)

odd_config = lr.ChatAgentConfig(
    name="OddHandler",
    llm = llm_config,
    system_message=f"""
    You will be given a number N. Respond as follows:
    
    - if N is odd, return the result (N*3+1), in the format:
        RESULT = <result> 
        and say NOTHING ELSE.
    
    - If N is even, say {NO_ANSWER}
    """,
)
odd_agent = lr.ChatAgent(odd_config)
odd_task = lr.Task(
    odd_agent,
    single_round=True,  # task done after 1 step() with valid response
)

```

Now add the `even_task` and `odd_task` as subtasks of the `processor_task`, 
and then run it with a number as input:

```python
processor_task.add_sub_task([even_task, odd_task])
processor_task.run(13)
```

The input number will be passed to the `Processor` agent as the user input.


Feel free to try the working example script
[`three-agent-chat-num.py`]()
`langroid-examples` repo:
[`examples/quick-start/three-agent-chat-num.py`](https://github.com/langroid/langroid-examples/tree/main/examples/quick-start/three-agent-chat-num.py):

```bash
python3 examples/quick-start/three-agent-chat-num.py
```

Here's a screenshot of what it looks like:
![three-agent-num.png](three-agent-num.png)


## Next steps


In the [next section](chat-agent-tool.md) you will learn how to use Langroid
to equip a `ChatAgent` with tools or function-calling.
</file>

<file path="docs/quick-start/two-agent-chat-num.md">
# Two-Agent Collaboration

!!! tip "Script in `langroid-examples`"
    A full working example for the material in this section is
    in the `two-agent-chat-num.py` script in the `langroid-examples` repo:
    [`examples/quick-start/two-agent-chat-num.py`](https://github.com/langroid/langroid-examples/tree/main/examples/quick-start/two-agent-chat-num.py).


To illustrate these ideas, let's look at a toy example[^1] where 
a `Student` agent receives a list of numbers to add.
We set up this agent with an instruction that they do not know how to add,
and they can ask for help adding pairs of numbers.
To add pairs of numbers, we set up an `Adder` agent.

[^1]: Toy numerical examples are perfect to illustrate the ideas without
      incurring too much token cost from LLM API calls.

First define a common `llm_config` to use for both agents:
```python
llm_config = lr.language_models.OpenAIGPTConfig(
    chat_model=lr.language_models.OpenAIChatModel.GPT4o,
    # or, e.g., "ollama/qwen2.5-coder:latest", or "gemini/gemini-2.0-flash-exp"
)
```


Next, set up a config for the student agent, then create the agent
and the corresponding task:

```py
student_config = lr.ChatAgentConfig(
    name="Student",
    llm=llm_config,
    vecdb=None, #(1)!
    system_message="""
        You will receive a list of numbers from me (the User),
        and your goal is to calculate their sum.
        However you do not know how to add numbers.
        I can help you add numbers, two at a time, since
        I only know how to add pairs of numbers.
        Send me a pair of numbers to add, one at a time, 
        and I will tell you their sum.
        For each question, simply ask me the sum in math notation, 
        e.g., simply say "1 + 2", etc, and say nothing else.
        Once you have added all the numbers in the list, 
        say DONE and give me the final sum. 
        Start by asking me for the list of numbers.
    """,    
)
student_agent = lr.ChatAgent(student_config)
student_task = lr.Task(
    student_agent,
    name = "Student",
    llm_delegate = True, #(2)!
    single_round=False,  # (3)! 
)
```

1. We don't need access to external docs so we set `vecdb=None` to avoid 
   the overhead of loading a vector-store.
2. Whenever we "flip roles" and assign the LLM the role of generating questions, 
   we set `llm_delegate=True`. In effect this ensures that the LLM "decides" when
   the task is done.
3. This setting means the task is not a single-round task, i.e. it is _not_ done
   after one `step()` with a valid response.

Next, set up the Adder agent config, create the Adder agent
and the corresponding Task:

```py
adder_config = lr.ChatAgentConfig(
    name = "Adder", #(1)!
    llm=llm_config,
    vecdb=None,
    system_message="""
        You are an expert on addition of numbers. 
        When given numbers to add, simply return their sum, say nothing else
        """,     
)
adder_agent = lr.ChatAgent(adder_config)
adder_task = lr.Task(
    adder_agent,
    interactive=False, #(2)!
    single_round=True,  # task done after 1 step() with valid response (3)!
)
```
1. The Agent name is displayed in the conversation shown in the console.
2. Does not wait for user input.
3. We set `single_round=True` to ensure that the expert task is done after 
   one step() with a valid response. 

Finally, we add the `adder_task` as a sub-task of the `student_task`, 
and run the `student_task`:

```py
student_task.add_sub_task(adder_task) #(1)!
student_task.run()
```

1. When adding just one sub-task, we don't need to use a list.


For a full working example, see the 
[`two-agent-chat-num.py`](https://github.com/langroid/langroid-examples/blob/main/examples/quick-start/two-agent-chat-num.py)
script in the `langroid-examples` repo. You can run this using:
```bash
python3 examples/quick-start/two-agent-chat-num.py
```

Here is an example of the conversation that results:

![two-agent-num.png](two-agent-num.png)

## Logs of multi-agent interactions

!!! note "For advanced users"
    This section is for advanced users who want more visibility into the
    internals of multi-agent interactions.

When running a multi-agent chat, e.g. using `task.run()`, two types of logs
are generated:
- plain-text logs in `logs/<task_name>.log`
- tsv logs in `logs/<task_name>.tsv`

It is important to realize that the logs show _every iteration 
of the loop in `Task.step()`, i.e. every **attempt** at
responding to the current pending message, even those that are not allowed_.
The ones marked with an asterisk (*) are the ones that are considered valid
responses for a given `step()` (which is a "turn" in the conversation).

The plain text logs have color-coding ANSI chars to make them easier to read
by doing `less <log_file>`. The format is (subject to change):
```
(TaskName) Responder SenderEntity (EntityName) (=> Recipient) TOOL Content
```

The structure of the `tsv` logs is similar. A great way to view these is to
install and use the excellent `visidata` (https://www.visidata.org/) tool:
```bash
vd logs/<task_name>.tsv
```

## Next steps
As a next step, look at how to set up a collaboration among three agents
for a simple [numbers game](three-agent-chat-num.md).
</file>

<file path="docs/stylesheets/extra.css">
.md-logo img {
⋮----
height: 60px !important; /* Adjust size as necessary */
</file>

<file path="docs/tutorials/llm-usage-options.md">
# Options for accessing LLMs

> This is a work-in-progress document. It will be updated frequently.

The variety of ways to access the power of Large Language Models (LLMs) is growing 
rapidly, and there are a bewildering array of options. This document is an attempt to 
categorize and describe some of the most popular and useful ways to access LLMs,
via these 2x2x2  combinations:

- Websites (non-programmatic) or APIs (programmatic)
- Open-source or Proprietary 
- Chat-based interface or integrated assistive tools.

We will go into some of these combinations below. More will be added over time.

## Chat-based Web (non-API) access to Proprietary LLMs


This is best for *non-programmatic* use of LLMs: you go to a website and 
interact with the LLM via a chat interface -- 
you write prompts and/or upload documents, and the LLM responds with plain text
or can create artifacts (e.g. reports, code,
charts, podcasts, etc) that you can then copy into your files, workflow or codebase.
They typically allow you to upload text-based documents of various types, and some let you upload images, screen-shots, etc and ask questions about them.

Most of them are capable of doing *internet search* to inform their responses.


!!! note "Chat Interface vs Integrated Tools"
    Note that when using a chat-based interaction, you have to copy various artifacts
    from the web-site into another place, like your code editor, document, etc.
    AI-integrated tools relieve you of this burden by bringing the LLM power into 
    your workflow directly. More on this in a later section.

      
**Pre-requisites:** 

- *Computer*: Besides having a modern web browser (Chrome, Firefox, etc) and internet
access, there are no other special requirements, since the LLM is 
running on a remote server.
- *Coding knowledge*: Where (typically Python) code is produced, you will get best results
if you are conversant with Python so that you can understand and modify the code as
needed. In this category you do not need to know how to interact with an LLM API via code.

Here are some popular options in this category:

### OpenAI ChatGPT

Free access at [https://chatgpt.com/](https://chatgpt.com/)

With a ChatGPT-Plus monthly subscription ($20/month), you get additional features like:

- access to more powerful models
- access to [OpenAI canvas](https://help.openai.com/en/articles/9930697-what-is-the-canvas-feature-in-chatgpt-and-how-do-i-use-it) - this offers a richer interface than just a chat window, e.g. it automatically creates windows for code snippets, and shows results of running code
(e.g. output, charts etc).

Typical use: Since there is fixed monthly subscription (i.e. not metered by amount of 
usage), this is a cost-effective way to non-programmatically 
access a top LLM such as `GPT-4o` or `o1` 
(so-called "reasoning/thinking" models). Note however that there are limits on how many
queries you can make within a certain time period, but usually the limit is fairly
generous. 

What you can create, besides text-based artifacts:

- produce Python (or other language) code which you can copy/paste into notebooks or files
- SQL queries that you can copy/paste into a database tool
- Markdown-based tables
- You can't get diagrams, but you can get *code for diagrams*, 
e.g. python code for plots, [mermaid](https://github.com/mermaid-js/mermaid) code for flowcharts.
- images in some cases.

### OpenAI Custom GPTs (simply known as "GPTs")

[https://chatgpt.com/gpts/editor](https://chatgpt.com/gpts/editor)

Here you can conversationally interact with a "GPT Builder" that will 
create a version of ChatGPT
that is *customized* to your needs, i.e. with necessary background instructions,
context, and/or documents. 
The end result is a specialized GPT that you can then use for your specific
purpose and share with others (all of this is non-programmatic). 

E.g. [here](https://chatgpt.com/share/67153a4f-ea2c-8003-a6d3-cbc2412d78e5) is a "Knowledge Graph Builder" GPT

!!! note "Private GPTs requires an OpenAI Team Account"
    To share a custom GPT within a private group, you need an OpenAI Team account,
    see pricing [here](https://openai.com/chatgpt/pricing). Without a Team account,
    any shared GPT is public and can be accessed by anyone.


### Anthropic/Claude

[https://claude.ai](https://claude.ai)

The Claude basic web-based interface is similar to OpenAI ChatGPT, powered by 
Anthropic's proprietary LLMs. 
Anthropic's equivalent of ChatGPT-Plus is called "Claude Pro", which is also 
a $20/month subscription, giving you access to advanced models 
(e.g. `Claude-3.5-Sonnet`) and features.

Anthropic's equivalent of Custom GPTs is called 
[Projects](https://www.anthropic.com/news/projects), 
where you can create
an  LLM-powered interface that is augmented with your custom context and data.

Whichever product you are using, the interface auto-creates **artifacts** as needed --
these are stand-alone documents (code, text, images, web-pages, etc) 
that you may want to copy and paste into your own codebase, documents, etc.
For example you can prompt Claude to create full working interactive applications,
and copy the code, polish it and deploy it for others to use. See examples [here](https://simonwillison.net/2024/Oct/21/claude-artifacts/).

### Microsoft Copilot Lab

!!! note
    Microsoft's "Copilot" is an overloaded term that can refer to many different 
    AI-powered tools. Here we are referring to the one that is a collaboration between
    Microsoft and OpenAI, and is based on OpenAI's GPT-4o LLM, and powered by 
    Bing's search engine.

Accessible via [https://copilot.cloud.microsoft.com/](https://copilot.cloud.microsoft.com/)

The basic capabilities are similar to OpenAI's and Anthropic's offerings, but
come with so-called "enterprise grade" security and privacy features,
which purportedly make it suitable for use in educational and corporate settings.
Read more on what you can do with Copilot Lab [here](https://www.microsoft.com/en-us/microsoft-copilot/learn/?form=MA13FV).

Like the other proprietary offerings, Copilot can:

- perform internet search to inform its responses
- generate/run code and show results including charts

### Google Gemini

Accessible at [gemini.google.com](https://gemini.google.com).


## AI-powered productivity tools

These tools "bring the AI to your workflow", which is a massive productivity boost,
compared to repeatedly context-switching, e.g. copying/pasting between a chat-based AI web-app and your workflow.

- [**Cursor**](https://www.cursor.com/): AI Editor/Integrated Dev Environment (IDE). This is a fork of VSCode.
- [**Zed**](https://zed.dev/): built in Rust; can be customized to use Jetbrains/PyCharm keyboard shortcuts.
- [**Google Colab Notebooks with Gemini**](https://colab.research.google.com).
- [**Google NotebookLM**](https://notebooklm.google.com/): allows you to upload a set of text-based documents, 
  and create artifacts such as study guide, FAQ, summary, podcasts, etc.

    
## APIs for Proprietary LLMs

Using an API key allows *programmatic* access to the LLMs, meaning you can make
invocations to the LLM from within your own code, and receive back the results.
This is useful for building applications involving more complex workflows where LLMs
are used within a larger codebase, to access "intelligence" as needed.

E.g. suppose you are writing code that handles queries from a user, and you want to 
classify the user's _intent_ into one of 3 types: Information, or Action or Done.
Pre-LLMs, you would have had to write a bunch of rules or train a custom 
"intent classifier" that maps, for example:

- "What is the weather in Pittsburgh?" -> Information
- "Set a timer for 10 minutes" -> Action
- "Ok I have no more questions∞" -> Done

But using an LLM API, this is almost trivially easy - you instruct the LLM it should
classify the intent into one of these 3 types, and send the user query to the LLM,
and receive back the intent. 
(You can use Tools to make this robust, but that is outside the scope of this document.)

The most popular proprietary LLMs available via API are from OpenAI (or via  its
partner Microsoft), Anthropic, and Google:

- [OpenAI](https://platform.openai.com/docs/api-reference/introduction), to interact with `GPT-4o` family of models, and the `o1` family of "thinking/reasoning" models.
- [Anthropic](https://docs.anthropic.com/en/home) to use the `Claude` series of models.
- [Google](https://ai.google.dev/gemini-api/docs) to use the `Gemini` family of models.

These LLM providers are home to some of the most powerful LLMs available today,
specifically OpenAI's `GPT-4o` and Anthropic's `Claude-3.5-Sonnet`, and Google's `Gemini 1.5 Pro` (as of Oct 2024).

**Billing:** Unlike the fixed monthly subscriptions of ChatGPT, Claude and others, 
LLM usage via API is typically billed by *token usage*, i.e. you pay for the total
number of input and output "tokens" (a slightly technical term, but think of it as
a word for now).

Using an LLM API involves these steps:

- create an account on the provider's website as a "developer" or organization,
- get an API key,
- use the API key in your code to make requests to the LLM. 


**Prerequisites**:

- *Computer:* again, since the API is served over the internet, there are no special
  requirements for your computer.
- *Programming skills:* Using an LLM API involves either:
    - directly making REST API calls from your code, or 
    - use a scaffolding library (like [Langroid](https://github.com/langroid/langroid)) that abstracts away the details of the 
      API calls.
  
    In either case, you must be highly proficient in (Python) programming 
  to use this option.

## Web-interfaces to Open LLMs

!!! note  "Open LLMs"
    These are LLMs that have been publicly released, i.e. their parameters ("weights") 
    are publicly available -- we refer to these as *open-weight* LLMs. If in addition, the
    training datasets, and data-preprocessing and training code are also available, we would
    call these *open-source* LLMs. But lately there is a looser usage of the term "open-source",referring to just the weights being available. For our purposes we will just refer all of these models as **Open LLMs**.

There are many options here, but some popular ones are below. Note that some of these
are front-ends that allow you to interact with not only Open LLMs but also 
proprietary LLM APIs.

- [LMStudio](https://lmstudio.ai/)
- [OpenWebUI](https://github.com/open-webui/open-webui)
- [Msty](https://msty.app/)
- [AnythingLLM](https://anythingllm.com/)
- [LibreChat](https://www.librechat.ai/)


## API Access to Open LLMs

This is a good option if you are fairly proficient in (Python) coding. There are in 
fact two possibilities here:

- The LLM is hosted remotely, and you make REST API calls to the remote server. This
  is a good option when you want to run large LLMs and you don't have the resources (GPU and memory) to run them locally.
    - [groq](https://groq.com/) amazingly it is free, and you can run `llama-3.1-70b`
    - [cerebras](https://cerebras.ai/)
    - [open-router](https://openrouter.ai/)
- The LLM is running on your computer. This is a good option if your machine has sufficient RAM to accommodate the LLM you are trying to run, and if you are 
concerned about data privacy. The most user-friendly option is [Ollama](https://github.com/ollama/ollama); see more below.

Note that all of the above options provide an **OpenAI-Compatible API** to interact
with the LLM, which is a huge convenience: you can write code to interact with OpenAI's
LLMs (e.g. `GPT4o` etc) and then easily switch to one of the above options, typically
by changing a simple config (see the respective websites for instructions).

Of course, directly working with the raw LLM API quickly becomes tedious. This is where
a scaffolding library like [langroid](https://github.com/langroid/langroid) comes in
very handy - it abstracts away the details of the API calls, and provides a simple
programmatic interface to the LLM, and higher-level abstractions like 
Agents, Tasks, etc. Working with such a library is going to be far more productive
than directly working with the raw API. Below are instructions on how to use langroid
with some the above Open/Local LLM options.

See [here](https://langroid.github.io/langroid/tutorials/local-llm-setup/) for 
a guide to using Langroid with Open LLMs.
</file>

<file path="docs/tutorials/local-llm-setup.md">
# Setting up a Local/Open LLM to work with Langroid

!!! tip "Examples scripts in [`examples/`](https://github.com/langroid/langroid/tree/main/examples) directory."
      There are numerous examples of scripts that can be run with local LLMs,
      in the [`examples/`](https://github.com/langroid/langroid/tree/main/examples)
      directory of the main `langroid` repo. These examples are also in the 
      [`langroid-examples`](https://github.com/langroid/langroid-examples/tree/main/examples),
      although the latter repo may contain some examples that are not in the `langroid` repo.
      Most of these example scripts allow you to specify an LLM in the format `-m <model>`,
      where the specification of `<model>` is described in the quide below for local/open LLMs, 
      or in the [Non-OpenAI LLM](https://langroid.github.io/langroid/tutorials/non-openai-llms/) guide. Scripts 
      that have the string `local` in their name have been especially designed to work with 
      certain local LLMs, as described in the respective scripts.
      If you want a pointer to a specific script that illustrates a 2-agent chat, have a look 
      at [`chat-search-assistant.py`](https://github.com/langroid/langroid/blob/main/examples/basic/chat-search-assistant.py).
      This specific script, originally designed for GPT-4/GPT-4o, works well with `llama3-70b` 
      (tested via Groq, mentioned below).

## Easiest: with Ollama

As of version 0.1.24, Ollama provides an OpenAI-compatible API server for the LLMs it supports,
which massively simplifies running these LLMs with Langroid. Example below.

```
ollama pull mistral:7b-instruct-v0.2-q8_0
```
This provides an OpenAI-compatible 
server for the `mistral:7b-instruct-v0.2-q8_0` model.

You can run any Langroid script using this model, by setting the `chat_model`
in the `OpenAIGPTConfig` to `ollama/mistral:7b-instruct-v0.2-q8_0`, e.g.

```python
import langroid.language_models as lm
import langroid as lr

llm_config = lm.OpenAIGPTConfig(
    chat_model="ollama/mistral:7b-instruct-v0.2-q8_0",
    chat_context_length=16_000, # adjust based on model
)
agent_config = lr.ChatAgentConfig(
    llm=llm_config,
    system_message="You are helpful but concise",
)
agent = lr.ChatAgent(agent_config)
# directly invoke agent's llm_response method
# response = agent.llm_response("What is the capital of Russia?")
task = lr.Task(agent, interactive=True)
task.run() # for an interactive chat loop
```

## Setup Ollama with a GGUF model from HuggingFace

Some models are not directly supported by Ollama out of the box. To server a GGUF
model with Ollama, you can download the model from HuggingFace and set up a custom
Modelfile for it.

E.g. download the GGUF version of `dolphin-mixtral` from
[here](https://huggingface.co/TheBloke/dolphin-2.7-mixtral-8x7b-GGUF)

(specifically, download this file `dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf`)

To set up a custom ollama model based on this:

- Save this model at a convenient place, e.g. `~/.ollama/models/`
- Create a modelfile for this model. First see what an existing modelfile
  for a similar model looks like, e.g. by running:

```
ollama show --modelfile dolphin-mixtral:latest
```
You will notice this file has a FROM line followed by a prompt template and other settings.
Create a new file with these contents.
Only  change the  `FROM ...` line with the path to the model you downloaded, e.g.
```
FROM /Users/blah/.ollama/models/dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf
```

- Save this modelfile somewhere, e.g. `~/.ollama/modelfiles/dolphin-mixtral-gguf`
- Create a new ollama model based on this file:
```
ollama create dolphin-mixtral-gguf -f ~/.ollama/modelfiles/dolphin-mixtral-gguf
``` 

- Run this new model using `ollama run dolphin-mixtral-gguf`

To use this model with Langroid you can then specify `ollama/dolphin-mixtral-gguf`
as the `chat_model` param in the `OpenAIGPTConfig` as in the previous section.
When a script supports it, you can also pass in the model name via
`-m ollama/dolphin-mixtral-gguf`

## Local LLMs using LMStudio

LMStudio is one of the simplest ways to download run open-weight LLMs locally.
See their docs at [lmstudio.ai](https://lmstudio.ai/docs) for installation and usage 
instructions. Once you download a model, you can use the "server" option to have it 
served via an OpenAI-compatible API at a local IP like `https://127.0.0.1:1234/v1`.
As with any other scenario of running a local LLM, you can use this with Langroid by
setting `chat_model` as follows (note you should not include the `https://` part):

```python
llm_config = lm.OpenAIGPTConfig(
    chat_model="local/127.0.0.1234/v1",
    ...
)
```

## Setup llama.cpp with a GGUF model from HuggingFace

See `llama.cpp`'s [GitHub page](https://github.com/ggerganov/llama.cpp/tree/master) for build and installation instructions.

After installation, begin as above with downloading a GGUF model from HuggingFace; for example, the quantized `Qwen2.5-Coder-7B` from [here](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF); specifically, [this file](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/blob/main/qwen2.5-coder-7b-instruct-q2_k.gguf).

Now, the server can be started with `llama-server -m qwen2.5-coder-7b-instruct-q2_k.gguf`.

In addition, your `llama.cpp` may be built with support for simplified management of HuggingFace models (specifically, `libcurl` support is required); in this case, `llama.cpp` will download HuggingFace models to a cache directory, and the server may be run with:
```bash
llama-server \
      --hf-repo Qwen/Qwen2.5-Coder-7B-Instruct-GGUF \
      --hf-file qwen2.5-coder-7b-instruct-q2_k.gguf
```

To use the model with Langroid, specify `llamacpp/localhost:{port}` as the `chat_model`; the default port is 8080.

## Setup vLLM with a model from HuggingFace

See [the vLLM docs](https://docs.vllm.ai/en/stable/getting_started/installation.html) for installation and configuration options. To run a HuggingFace model with vLLM, use `vllm serve`, which provides an OpenAI-compatible server. 

For example, to run `Qwen2.5-Coder-32B`, run `vllm serve Qwen/Qwen2.5-Coder-32B`.

If the model is not publicly available, set the environment varaible `HF_TOKEN` to your HuggingFace token with read access to the model repo.

To use the model with Langroid, specify `vllm/Qwen/Qwen2.5-Coder-32B` as the `chat_model` and, if a port other than the default 8000 was used, set `api_base` to `localhost:{port}`.

## Setup vLLM with a GGUF model from HuggingFace

`vLLM` supports running quantized models from GGUF files; however, this is currently an experimental feature. To run a quantized `Qwen2.5-Coder-32B`, download the model from [the repo](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct-GGUF), specifically [this file](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct-GGUF/blob/main/qwen2.5-coder-32b-instruct-q4_0.gguf). 

The model can now be run with `vllm serve qwen2.5-coder-32b-instruct-q4_0.gguf --tokenizer Qwen/Qwen2.5-Coder-32B` (the tokenizer of the base model rather than the quantized model should be used).

To use the model with Langroid, specify `vllm/qwen2.5-coder-32b-instruct-q4_0.gguf` as the `chat_model` and, if a port other than the default 8000 was used, set `api_base` to `localhost:{port}`.

## "Local" LLMs hosted on Groq
In this scenario, an open-source LLM (e.g. `llama3.1-8b-instant`) is hosted on a Groq server
which provides an OpenAI-compatible API. Using this with langroid is exactly analogous
to the Ollama scenario above: you can set the `chat_model` in the `OpenAIGPTConfig` to
`groq/<model_name>`, e.g. `groq/llama3.1-8b-instant`. 
For this to work, ensure you have a `GROQ_API_KEY` environment variable set in your
`.env` file. See [groq docs](https://console.groq.com/docs/quickstart).

## "Local" LLMs hosted on Cerebras
This works exactly like with Groq, except you set up a `CEREBRAS_API_KEY` environment variable, and specify the `chat_model` as `cerebras/<model_name>`, e.g. `cerebras/llama3.1-8b`. See the Cerebras [docs](https://inference-docs.cerebras.ai/introduction) for details on which LLMs are supported.

## Open/Proprietary LLMs via OpenRouter

OpenRouter is a **paid service** that provides an OpenAI-compatible API 
for practically any LLM, open or proprietary.
Using this with Langroid is similar to the `groq` scenario above:

- Ensure you have an `OPENROUTER_API_KEY` set up in your environment (or `.env` file), and 
- Set the `chat_model` in the `OpenAIGPTConfig` to 
  `openrouter/<model_name>`, where `<model_name>` is the name of the model on the 
[OpenRouter](https://openrouter.ai/) website, e.g. `qwen/qwen-2.5-7b-instruct`.

This is a good option if you want to use larger open LLMs without having to download
them locally (especially if your local machine does not have the resources to run them).
Besides using specific LLMs, OpenRouter also has smart routing/load-balancing.
OpenRouter is also convenient for using proprietary LLMs (e.g. gemini, amazon) via 
a single convenient API.

## "Local" LLMs hosted on GLHF.chat

See [glhf.chat](https://glhf.chat/chat/create) for a list of available models.

To run with one of these models, set the `chat_model` in the `OpenAIGPTConfig` to
`"glhf/<model_name>"`, where `model_name` is `hf:` followed by the HuggingFace repo 
path, e.g. `Qwen/Qwen2.5-Coder-32B-Instruct`, so the full `chat_model` would be
`"glhf/hf:Qwen/Qwen2.5-Coder-32B-Instruct"`. 

## DeepSeek LLMs

As of 26-Dec-2024, DeepSeek models are available via their [api](https://platform.deepseek.com).
To use it with Langroid:

- set up your `DEEPSEEK_API_KEY` environment variable in the `.env` file or as
 an explicit export in your shell
- set the `chat_model` in the `OpenAIGPTConfig` to `deepseek/deepseek-chat` to use the 
`DeepSeek-V3` model, or `deepseek/deepseek-reasoner` to use the full (i.e. non-distilled) `DeepSeek-R1` "reasoning" model.

The DeepSeek models are also available via OpenRouter (see the corresponding 
in the OpenRouter section here) or ollama (see those instructions). E.g. you
can use the DeepSeek R1 or its distilled variants by setting `chat_model` to 
`openrouter/deepseek/deepseek-r1` or `ollama/deepseek-r1:8b`.

## Other non-OpenAI LLMs supported by LiteLLM

For other scenarios of running local/remote LLMs, it is possible that the `LiteLLM` library
supports an "OpenAI adaptor" for these models (see their [docs](https://litellm.vercel.app/docs/providers)).

Depending on the specific model, the `litellm` docs may say you need to 
specify a model in the form `<provider>/<model>`, e.g. `palm/chat-bison`. 
To use the model with Langroid, simply prepend `litellm/` to this string, e.g. `litellm/palm/chat-bison`,
when you specify the `chat_model` in the `OpenAIGPTConfig`.

To use `litellm`, ensure you have the `litellm` extra installed, 
via `pip install langroid[litellm]` or equivalent.



## Harder: with oobabooga
Like Ollama, [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) provides an OpenAI-API-compatible API server, but the setup 
is significantly more involved. See their github page for installation and model-download instructions.

Once you have finished the installation, you can spin up the server for an LLM using
something like this:

```
python server.py --api --model mistral-7b-instruct-v0.2.Q8_0.gguf --verbose --extensions openai --nowebui
```
This will show a message saying that the OpenAI-compatible API is running at `http://127.0.0.1:5000`

Then in your Langroid code you can specify the LLM config using
`chat_model="local/127.0.0.1:5000/v1` (the `v1` is the API version, which is required).
As with Ollama, you can use the `-m` arg in many of the example scripts, e.g.
```
python examples/docqa/rag-local-simple.py -m local/127.0.0.1:5000/v1
```

Recommended: to ensure accurate chat formatting (and not use the defaults from ooba),
  append the appropriate HuggingFace model name to the
  -m arg, separated by //, e.g. 
```
python examples/docqa/rag-local-simple.py -m local/127.0.0.1:5000/v1//mistral-instruct-v0.2
```
  (no need to include the full model name, as long as you include enough to
   uniquely identify the model's chat formatting template)


## Other local LLM scenarios

There may be scenarios where the above `local/...` or `ollama/...` syntactic shorthand
does not work.(e.g. when using vLLM to spin up a local LLM at an OpenAI-compatible
endpoint). For these scenarios, you will have to explicitly create an instance of 
`lm.OpenAIGPTConfig` and set *both* the `chat_model` and `api_base` parameters.
For example, suppose you are able to get responses from this endpoint using something like:
```bash
curl http://192.168.0.5:5078/v1/chat/completions \
    -H "Content-Type: application/json" \
    -d '{
        "model": "Mistral-7B-Instruct-v0.2",
        "messages": [
             {"role": "user", "content": "Who won the world series in 2020?"}
        ]
    }'
```
To use this endpoint with Langroid, you would create an `OpenAIGPTConfig` like this:
```python
import langroid.language_models as lm
llm_config = lm.OpenAIGPTConfig(
    chat_model="Mistral-7B-Instruct-v0.2",
    api_base="http://192.168.0.5:5078/v1",
)
```

## Quick testing with local LLMs
As mentioned [here](https://langroid.github.io/langroid/tutorials/non-openai-llms/#quick-testing-with-non-openai-models), 
you can run many of the [tests](https://github.com/langroid/langroid/tree/main/tests/main) in the main langroid repo against a local LLM
(which by default run against an OpenAI model), 
by specifying the model as `--m <model>`, 
where `<model>` follows the syntax described in the previous sections. Here's an example:

```bash
pytest tests/main/test_chat_agent.py --m ollama/mixtral
```
Of course, bear in mind that the tests may not pass due to weaknesses of the local LLM.
</file>

<file path="docs/tutorials/non-openai-llms.md">
# Using Langroid with Non-OpenAI LLMs

Langroid was initially written to work with OpenAI models via their API.
This may sound limiting, but fortunately:

- Many open-source LLMs can be served via 
OpenAI-compatible endpoints. See the [Local LLM Setup](https://langroid.github.io/langroid/tutorials/local-llm-setup/) guide for details.
- There are tools like [LiteLLM](https://github.com/BerriAI/litellm/tree/main/litellm) 
  that provide an OpenAI-like API for _hundreds_ of non-OpenAI LLM providers 
(e.g. Anthropic's Claude, Google's Gemini).
- AI gateways like [LangDB](https://langdb.ai/), [Portkey](https://portkey.ai), and [OpenRouter](https://openrouter.ai/) provide unified access to multiple LLM providers with additional features like cost control, observability, caching, and fallback strategies.
  
Below we show how you can use these various options with Langroid.

## Create an `OpenAIGPTConfig` object with `chat_model = "litellm/..."`

!!! note "Install `litellm` extra"
    To use `litellm` you need to install Langroid with the `litellm` extra, e.g.:
    `pip install "langroid[litellm]"`

Next, look up the instructions in LiteLLM docs for the specific model you are 
interested. Here we take the example of Anthropic's `claude-instant-1` model.
Set up the necessary environment variables as specified in the LiteLLM docs,
e.g. for the `claude-instant-1` model, you will need to set the `ANTHROPIC_API_KEY`
```bash
export ANTHROPIC_API_KEY=my-api-key
```

Now you are ready to create an instance of `OpenAIGPTConfig` with the 
`chat_model` set to `litellm/<model_spec>`, where you should set `model_spec` based on LiteLLM 
docs. For example, for the `claude-instant-1` model, you would set `chat_model` to
`litellm/claude-instant-1`. But if you are using the model via a 3rd party provider,
(e.g. those via Amazon Bedrock), you may also need to have a `provider` part in the `model_spec`, e.g. 
`litellm/bedrock/anthropic.claude-instant-v1`. In general you can see which of
these to use, from the LiteLLM docs.

```python
import langroid.language_models as lm

llm_config = lm.OpenAIGPTConfig(
    chat_model="litellm/claude-instant-v1",
    chat_context_length=8000, # adjust according to model
)
```

A similar process works for the `Gemini 1.5 Pro` LLM:

- get the API key [here](https://aistudio.google.com/)
- set the `GEMINI_API_KEY` environment variable in your `.env` file or shell
- set `chat_model="litellm/gemini/gemini-1.5-pro-latest"` in the `OpenAIGPTConfig` object

For other gemini models supported by litellm, see [their docs](https://litellm.vercel.app/docs/providers/gemini)

## Gemini LLMs via OpenAI client, without LiteLLM

This is now the recommended way to use Gemini LLMs with Langroid,
where you don't need to use LiteLLM. As of 11/20/2024, these models
are [available via the OpenAI client](https://developers.googleblog.com/en/gemini-is-now-accessible-from-the-openai-library/).

To use langroid with Gemini LLMs, all you have to do is:

- set the `GEMINI_API_KEY` environment variable in your `.env` file or shell
- set `chat_model="gemini/<model_name>"` in the `OpenAIGPTConfig` object,  
  where <model_name> is one of "gemini-1.5-flash", "gemini-1.5-flash-8b", or "gemini-1.5-pro"

See [here](https://ai.google.dev/gemini-api/docs/models/gemini) for details on Gemini models.

For example, you can use this `llm_config`:

```python
llm_config = lm.OpenAIGPTConfig(
    chat_model="gemini/" + lm.OpenAIChatModel.GEMINI_1_5_FLASH,
)
```

In most tests you can switch to a gemini model, e.g. `--m gemini/gemini-1.5-flash`, 
e.g.:

```bash
pytest -xvs tests/main/test_llm.py --m gemini/gemini-1.5-flash
```

Many of the example scripts allow switching the model using `-m` or `--model`, e.g.

```bash
python3 examples/basic/chat.py -m gemini/gemini-1.5-flash
```




## AI Gateways for Multiple LLM Providers

In addition to LiteLLM, Langroid integrates with AI gateways that provide unified access to multiple LLM providers with additional enterprise features:

### LangDB

[LangDB](https://langdb.ai/) is an AI gateway offering OpenAI-compatible APIs to access 250+ LLMs with cost control, observability, and performance benchmarking. LangDB enables seamless model switching while providing detailed analytics and usage tracking.

To use LangDB with Langroid:
- Set up your `LANGDB_API_KEY` and `LANGDB_PROJECT_ID` environment variables
- Set `chat_model="langdb/<provider>/<model_name>"` in the `OpenAIGPTConfig` (e.g., `"langdb/anthropic/claude-3.7-sonnet"`)

For detailed setup and usage instructions, see the [LangDB integration guide](../notes/langdb.md).

### Portkey

[Portkey](https://portkey.ai) is a comprehensive AI gateway that provides access to 200+ models from various providers through a unified API. It offers advanced features like intelligent caching, automatic retries, fallback strategies, and comprehensive observability tools for production deployments.

To use Portkey with Langroid:
- Set up your `PORTKEY_API_KEY` environment variable (plus provider API keys like `OPENAI_API_KEY`)
- Set `chat_model="portkey/<provider>/<model_name>"` in the `OpenAIGPTConfig` (e.g., `"portkey/openai/gpt-4o-mini"`)

For detailed setup and usage instructions, see the [Portkey integration guide](../notes/portkey.md).

### OpenRouter

[OpenRouter](https://openrouter.ai/) provides access to a wide variety of both open and proprietary LLMs through a unified API. It features automatic routing and load balancing, making it particularly useful for accessing larger open LLMs without local resources and for using multiple providers through a single interface.

To use OpenRouter with Langroid:
- Set up your `OPENROUTER_API_KEY` environment variable
- Set `chat_model="openrouter/<model_name>"` in the `OpenAIGPTConfig`

For more details, see the [Local LLM Setup guide](local-llm-setup.md#local-llms-available-on-openrouter).

## Working with the created `OpenAIGPTConfig` object

From here you can proceed as usual, creating instances of `OpenAIGPT`,
`ChatAgentConfig`, `ChatAgent` and `Task` object as usual.

E.g. you can create an object of class `OpenAIGPT` (which represents any
LLM with an OpenAI-compatible API) and interact with it directly:
```python
llm = lm.OpenAIGPT(llm_config)
messages = [
    LLMMessage(content="You are a helpful assistant",  role=Role.SYSTEM),
    LLMMessage(content="What is the capital of Ontario?",  role=Role.USER),
],
response = mdl.chat(messages, max_tokens=50)
```

When you interact directly with the LLM, you are responsible for keeping dialog history.
Also you would often want an LLM to have access to tools/functions and external
data/documents (e.g. vector DB or traditional DB). An Agent class simplifies managing all of these.
For example, you can create an Agent powered by the above LLM, wrap it in a Task and have it
run as an interactive chat app:

```python
agent_config = lr.ChatAgentConfig(llm=llm_config, name="my-llm-agent")
agent = lr.ChatAgent(agent_config)

task = lr.Task(agent, name="my-llm-task")
task.run()
```

## Example: Simple Chat script with a non-OpenAI proprietary model

Many of the Langroid example scripts have a convenient `-m`  flag that lets you
easily switch to a different model. For example, you can run 
the `chat.py` script in the `examples/basic` folder with the 
`litellm/claude-instant-v1` model:
```bash
python3 examples/basic/chat.py -m litellm/claude-instant-1
```

## Quick testing with non-OpenAI models

There are numerous tests in the main [Langroid repo](https://github.com/langroid/langroid) that involve
LLMs, and once you setup the dev environment as described in the README of the repo, 
you can run any of those tests (which run against the default GPT4 model) against
local/remote models that are proxied by `liteLLM` (or served locally via the options mentioned above,
such as `oobabooga`, `ollama` or `llama-cpp-python`), using the `--m <model-name>` option,
where `model-name` takes one of the forms above. Some examples of tests are:

```bash
pytest -s tests/test_llm.py --m local/localhost:8000
pytest -s tests/test_llm.py --m litellm/claude-instant-1
```
When the `--m` option is omitted, the default OpenAI GPT4 model is used.

!!! note "`chat_context_length` is not affected by `--m`"
      Be aware that the `--m` only switches the model, but does not affect the `chat_context_length` 
      parameter in the `OpenAIGPTConfig` object. which you may need to adjust for different models.
      So this option is only meant for quickly testing against different models, and not meant as
      a way to switch between models in a production environment.
</file>

<file path="docs/tutorials/postgresql-agent.md">
# Chat with a PostgreSQL DB using SQLChatAgent

The [`SQLChatAgent`](../reference/agent/special/sql/sql_chat_agent.md) is
designed to facilitate interactions with an SQL database using natural language.
A ready-to-use script based on the `SQLChatAgent` is available in the `langroid-examples` 
repo at [`examples/data-qa/sql-chat/sql_chat.py`](https://github.com/langroid/langroid-examples/blob/main/examples/data-qa/sql-chat/sql_chat.py)
(and also in a similar location in the main `langroid` repo).
This tutorial walks you through how you might use the `SQLChatAgent` if you were
to write your own script from scratch. We also show some of the internal workings of this Agent.

The agent uses the schema context to generate SQL queries based on a user's
input. Here is a tutorial on how to set up an agent with your PostgreSQL
database. The steps for other databases are similar. Since the agent implementation relies 
on SqlAlchemy, it should work with any SQL DB that supports SqlAlchemy.
It offers enhanced functionality for MySQL and PostgreSQL by 
automatically extracting schemas from the database. 

## Before you begin

!!! note "Data Privacy Considerations"
    Since the SQLChatAgent uses the OpenAI GPT-4 as the underlying language model,
    users should be aware that database information processed by the agent may be
    sent to OpenAI's API and should therefore be comfortable with this.
1. Install PostgreSQL dev libraries for your platform, e.g.
    - `sudo apt-get install libpq-dev` on Ubuntu,
    - `brew install postgresql` on Mac, etc.

2. Follow the general [setup guide](../quick-start/setup.md) to get started with Langroid
(mainly, install `langroid` into your virtual env, and set up suitable values in 
the `.env` file). Note that to use the SQLChatAgent with a PostgreSQL database,
you need to install the `langroid[postgres]` extra, e.g.:

    - `pip install "langroid[postgres]"` or 
    - `poetry add "langroid[postgres]"` or `uv add "langroid[postgres]"`
    - `poetry install -E postgres` or `uv pip install --extra postgres -r pyproject.toml`


If this gives you an error, try `pip install psycopg2-binary` in your virtualenv.


## Initialize the agent

```python
from langroid.agent.special.sql.sql_chat_agent import (
    SQLChatAgent,
    SQLChatAgentConfig,
)

agent = SQLChatAgent(
    config=SQLChatAgentConfig(
        database_uri="postgresql://example.db",
    )
)
```

## Configuration

The following components of `SQLChatAgentConfig` are optional but strongly
recommended for improved results:

* `context_descriptions`: A nested dictionary that specifies the schema context for
  the agent to use when generating queries, for example:

```json
{
  "table1": {
    "description": "description of table1",
    "columns": {
      "column1": "description of column1 in table1",
      "column2": "description of column2 in table1"
    }
  },
  "employees": {
    "description": "The 'employees' table contains information about the employees. It relates to the 'departments' and 'sales' tables via foreign keys.",
    "columns": {
      "id": "A unique identifier for an employee. This ID is used as a foreign key in the 'sales' table.",
      "name": "The name of the employee.",
      "department_id": "The ID of the department the employee belongs to. This is a foreign key referencing the 'id' in the 'departments' table."
    }
  }
}
```

> By default, if no context description json file is provided in the config, the 
agent will automatically generate the file using the built-in Postgres table/column comments.

* `schema_tools`: When set to `True`, activates a retrieval mode where the agent
  systematically requests only the parts of the schemas relevant to the current query. 
  When this option is enabled, the agent performs the following steps:

    1. Asks for table names.
    2. Asks for table descriptions and column names from possibly relevant table
       names.
    3. Asks for column descriptions from possibly relevant columns.
    4. Writes the SQL query.

  Setting `schema_tools=True` is especially useful for large schemas where it is costly or impossible 
  to include the entire schema in a query context. 
  By selectively using only the relevant parts of the context descriptions, this mode
  reduces token usage, though it may result in 1-3 additional OpenAI API calls before
  the final SQL query is generated.

## Putting it all together

In the code below, we will allow the agent to generate the context descriptions
from table comments by excluding the `context_descriptions` config option.
We set `schema_tools` to `True` to enable the retrieval mode.

```python
from langroid.agent.special.sql.sql_chat_agent import (
    SQLChatAgent,
    SQLChatAgentConfig,
)

# Initialize SQLChatAgent with a PostgreSQL database URI and enable schema_tools
agent = SQLChatAgent(gi
config = SQLChatAgentConfig(
    database_uri="postgresql://example.db",
    schema_tools=True,
)
)

# Run the task to interact with the SQLChatAgent
task = Task(agent)
task.run()
```

By following these steps, you should now be able to set up an `SQLChatAgent`
that interacts with a PostgreSQL database, making querying a seamless
experience.

In the `langroid` repo we have provided a ready-to-use script
[`sql_chat.py`](https://github.com/langroid/langroid/blob/main/examples/data-qa/sql-chat/sql_chat.py)
based on the above, that you can use right away to interact with your PostgreSQL database:

```python
python3 examples/data-qa/sql-chat/sql_chat.py
```

This script will prompt you for the database URI, and then start the agent.
</file>

<file path="docs/tutorials/supported-models.md">
# Langroid Supported LLMs and Providers

Langroid supports a wide range of Language Model providers through its 
[`OpenAIGPTConfig`][langroid.language_models.openai_gpt.OpenAIGPTConfig] class. 

!!! note "OpenAIGPTConfig is not just for OpenAI models!"
    The `OpenAIGPTConfig` class is a generic configuration class that can be used
    to configure any LLM provider that is OpenAI API-compatible.
    This includes both local and remote models.

You would typically set up the `OpenAIGPTConfig` class with the `chat_model`
parameter, which specifies the model you want to use, and other 
parameters such as `max_output_tokens`, `temperature`, etc
(see the 
[`OpenAIGPTConfig`][langroid.language_models.openai_gpt.OpenAIGPTConfig] class
and its parent class 
[`LLModelConfig`][langroid.language_models.base.LLMConfig] for
full parameter details):



```python
import langroid.language_models as lm
llm_config = lm.OpenAIGPTConfig(
    chat_model="<model-name>", # possibly includes a <provider-name> prefix
    api_key="api-key", # optional, prefer setting in environment variables
    # ... other params such as max_tokens, temperature, etc.
)
```

Below are `chat_model` examples for each supported provider.
For more details see the guides on setting up Langroid with 
[local](https://langroid.github.io/langroid/tutorials/local-llm-setup/) 
and [non-OpenAI LLMs](https://langroid.github.io/langroid/tutorials/non-openai-llms/).
Once you set up the `OpenAIGPTConfig`, you can then directly interact with the LLM,
or set up an Agent with this LLM, and use it by itself, or in a multi-agent setup,
as shown in the [Langroid quick tour](https://langroid.github.io/langroid/tutorials/langroid-tour/)


Although we support specifying the `api_key` directly in the config
(not recommended for security reasons),
more typically you would set the `api_key` in your environment variables.
Below is a table showing for each provider, an example `chat_model` setting, 
and which environment variable to set for the API key.




| Provider      | `chat_model` Example                                     | API Key Environment Variable |
|---------------|----------------------------------------------------------|----------------------------|
| OpenAI        | `gpt-4o`                                                 | `OPENAI_API_KEY` |
| Groq          | `groq/llama3.3-70b-versatile`                            | `GROQ_API_KEY` |
| Cerebras      | `cerebras/llama-3.3-70b`                                 | `CEREBRAS_API_KEY` |
| Gemini        | `gemini/gemini-2.0-flash`                                | `GEMINI_API_KEY` |
| DeepSeek      | `deepseek/deepseek-reasoner`                             | `DEEPSEEK_API_KEY` |
| GLHF          | `glhf/hf:Qwen/Qwen2.5-Coder-32B-Instruct`                | `GLHF_API_KEY` |
| OpenRouter    | `openrouter/deepseek/deepseek-r1-distill-llama-70b:free` | `OPENROUTER_API_KEY` |
| Ollama        | `ollama/qwen2.5`                                         | `OLLAMA_API_KEY` (usually `ollama`) |
| VLLM          | `vllm/mistral-7b-instruct`                               | `VLLM_API_KEY` |
| LlamaCPP      | `llamacpp/localhost:8080`                                | `LLAMA_API_KEY` |
| Generic Local | `local/localhost:8000/v1`                                | No specific env var required |
| LiteLLM       | `litellm/anthropic/claude-3-7-sonnet`                    | Depends on provider |
|               | `litellm/mistral-small`                                  | Depends on provider |
| HF Template   | `local/localhost:8000/v1//mistral-instruct-v0.2`         | Depends on provider |
|               | `litellm/ollama/mistral//hf`                             | |

## HuggingFace Chat Template Formatting

For models requiring specific prompt formatting:

```python
import langroid.language_models as lm

# Specify formatter directly
llm_config = lm.OpenAIGPTConfig(
    chat_model="local/localhost:8000/v1//mistral-instruct-v0.2",
    formatter="mistral-instruct-v0.2"
)

# Using HF formatter auto-detection
llm_config = lm.OpenAIGPTConfig(
    chat_model="litellm/ollama/mistral//hf",
)
```
</file>

<file path="docs/auto_docstring.py">
# -----------------------------------------------------#
#                    Configuration                    #
⋮----
src_dir = "langroid"
repo_root = "https://github.com/langroid/langroid/tree/main/"
nav = mkdocs_gen_files.Nav()
⋮----
#                       Runner                        #
⋮----
""" Generate code reference pages and navigation

    Based on the recipe of mkdocstrings:
    https://github.com/mkdocstrings/mkdocstrings

    Credits:
    Timothée Mazzucotelli
    https://github.com/pawamoy
"""
# Iterate over each Python file
⋮----
# Get path in module, documentation and absolute
module_path = path.relative_to(src_dir).with_suffix("")
doc_path = path.relative_to(src_dir).with_suffix(".md")
full_doc_path = Path("reference", doc_path)
⋮----
# Handle edge cases
parts = (src_dir,) + tuple(module_path.parts)
⋮----
parts = parts[:-1]
doc_path = doc_path.with_name("index.md")
full_doc_path = full_doc_path.with_name("index.md")
⋮----
# Write docstring documentation to disk via parser
⋮----
ident = ".".join(parts)
full_code_path = repo_root + "/" + str(path)
⋮----
# Update parser
</file>

<file path="docs/FAQ.md">
# Frequently Asked Questions

## Can I view the reasoning (thinking) text when using a Reasoning LLM like R1 or o1?

Yes, see this note on [reasoning-content](https://langroid.github.io/langroid/notes/reasoning-content/).


## Does Langroid work with non-OpenAI LLMs?

Yes! Langroid works with practically any LLM, local or remote, closed or open.

See these two guides:

- [Using Langroid with local/open LLMs](https://langroid.github.io/langroid/tutorials/local-llm-setup/)
- [Using Langroid with non-OpenAI proprietary LLMs](https://langroid.github.io/langroid/tutorials/non-openai-llms/)

## Where can I find out about Langroid's architecture?

There are a few documents that can help:

- A work-in-progress [architecture description](https://langroid.github.io/langroid/blog/2024/08/15/overview-of-langroids-multi-agent-architecture-prelim/)
  on the Langroid blog.
- The Langroid [Getting Started](https://langroid.github.io/langroid/quick-start/) guide walks you 
  step-by-step through Langroid's features and architecture.
- An article by LanceDB on [Multi-Agent Programming with Langroid](https://lancedb.substack.com/p/langoid-multi-agent-programming-framework)

## How can I limit the number of output tokens generated by the LLM?

You can set the `max_output_tokens` parameter in the `LLMConfig` class,
or more commonly, the `OpenAIGPTConfig` class, which is a subclass of `LLMConfig`,
for example:

```python
import langroid as lr
import langroid.language_models as lm

llm_config = lm.OpenAIGPTConfig(
    chat_model="openai/gpt-3.5-turbo",
    max_output_tokens=100, # limit output to 100 tokens
)
agent_config = lr.ChatAgentConfig(
    llm=llm_config,
    # ... other configs
)
agent = lr.ChatAgent(agent_config)
```

Then every time the agent's `llm_response` method is called, the LLM's output 
will be limited to this number of tokens.

If you omit the `max_output_tokens`, it defaults to 8192. If you wish **not** to 
limit the output tokens, you can set `max_output_tokens=None`, in which case 
Langroid uses the model-specific maximum output tokens from the 
[`langroid/language_models/model_info.py`](https://github.com/langroid/langroid/blob/main/langroid/language_models/model_info.py) file
(specifically the `model_max_output_tokens` property of `LLMConfig`).
Note however that this model-specific may be quite large, so you would generally 
want to either omit setting `max_output_tokens` (which defaults to 8192), or set it
another desired value.


## How langroid handles long chat histories

You may encounter an error like this:

```
Error: Tried to shorten prompt history but ... longer than context length
```

This might happen when your chat history bumps against various limits.
Here is how Langroid handles long chat histories. Ultimately the LLM API is invoked with two key inputs:
the message history $h$, and the desired output length $n$ (defaults to the `max_output_tokens` in the 
`ChatAgentConfig`). These inputs are determined as follows (see the `ChatAgent._prep_llm_messages` method):

- let $H$ be the current message history, and $M$ be the value of `ChatAgentConfig.max_output_tokens`, and $C$ be 
  the context-length of the LLM.
- If $\text{tokens}(H) + M \leq C$, then langroid uses $h = H$ and $n = M$, since there is enough room to fit both the 
  actual chat history as well as the desired max output length.
- If $\text{tokens}(H) + M > C$, this means the context length is too small to accommodate the message history $H$ 
  and 
  the desired output length $M$. Then langroid tries to use a _shortened_ output length $n' = C - \text{tokens}(H)$, 
  i.e. the output is effectively _truncated_ to fit within the context length. 
    - If $n'$ is at least equal to `min_output_tokens` $m$ (default 10), langroid proceeds with $h = H$ and $n=n'$.
    - otherwise, this means that the message history $H$ is so long that the remaining space in the LLM's 
      context-length $C$ is unacceptably small (i.e. smaller than the minimum output length $m$). In this case,
      Langroid tries to shorten the message history by dropping early messages, and updating the message history $h$ as 
      long as $C - \text{tokens}(h) <  m$, until there are no more messages to drop (it will not drop the system 
      message or the last message, which is a user message), and throws the error mentioned above. 

If you are getting this error, you will want to check whether:

- you have set the `chat_context_length` too small, if you are setting it manually
- you have set the `max_output_tokens` too large
- you have set the `min_output_tokens` too large

If these look fine, then the next thing to look at is whether you are accumulating too much context into the agent 
history, for example retrieved passages (which can be very long) in a RAG scenario. One common case is when a query 
$Q$ is being answered using RAG, the retrieved passages $P$ are added to $Q$ to create a (potentially very long) prompt 
like 
> based on the passages P, answer query Q

Once the LLM returns an answer (if appropropriate for your context), you should avoid retaining the passages $P$ in the 
agent history, i.e. the last user message should be simply $Q$, rather than the prompt above. This functionality is exactly what you get when you 
use `ChatAgent._llm_response_temp_context`, which is used by default in the `DocChatAgent`. 

Another way to keep chat history tokens from growing too much is to use the `llm_response_forget` method, which 
erases both the query and response, if that makes sense in your scenario.

## How can I handle large results from Tools?

As of version 0.22.0, Langroid allows you to control the size of tool results
by setting [optional parameters](https://langroid.github.io/langroid/notes/large-tool-results/) 
in a `ToolMessage` definition.

## Can I handle a tool without running a task?

Yes, if you've enabled an agent to both _use_ (i.e. generate) and _handle_ a tool. 
See the `test_tool_no_task` for an example of this. The `NabroskiTool` is enabled
for the agent, and to get the agent's LLM to generate the tool, you first do 
something like:
```python
response = agent.llm_response("What is Nabroski of 1 and 2?")
```
Now the `response` is a `ChatDocument` that will contain the JSON for the `NabroskiTool`.
To _handle_ the tool, you will need to call the agent's `agent_response` method:

```python
result = agent.agent_response(response)
```

When you wrap the agent in a task object, and do `task.run()` the above two steps are done for you,
since Langroid operates via a loop mechanism, see docs 
[here](https://langroid.github.io/langroid/quick-start/multi-agent-task-delegation/#task-collaboration-via-sub-tasks).
The *advantage* of using `task.run()` instead of doing this yourself, is that this method
ensures that tool generation errors are sent back to the LLM so it retries the generation.

## OpenAI Tools and Function-calling support

Langroid supports OpenAI tool-calls API as well as OpenAI function-calls API.
Read more [here](https://github.com/langroid/langroid/releases/tag/0.7.0).

Langroid has always had its own native tool-calling support as well, 
which works with **any** LLM -- you can define a subclass of `ToolMessage` (pydantic based) 
and it is transpiled into system prompt instructions for the tool. 
In practice, we don't see much difference between using this vs OpenAI fn-calling. 
Example [here](https://github.com/langroid/langroid/blob/main/examples/basic/fn-call-local-simple.py).
Or search for `ToolMessage` in any of the `tests/` or `examples/` folders.

## Some example scripts appear to return to user input immediately without handling a tool.

This is because the `task` has been set up with `interactive=True` 
(which is the default). With this setting, the task loop waits for user input after
either the `llm_response` or `agent_response` (typically a tool-handling response) 
returns a valid response. If you want to progress through the task, you can simply 
hit return, unless the prompt indicates that the user needs to enter a response.

Alternatively, the `task` can be set up with `interactive=False` -- with this setting,
the task loop will _only_ wait for user input when an entity response (`llm_response` 
or `agent_response`) _explicitly_ addresses the user. Explicit user addressing can
be done using either:

- an orchestration tool, e.g. `SendTool` (see details in
the release notes for [0.9.0](https://github.com/langroid/langroid/releases/tag/0.9.0)), an example script is the [multi-agent-triage.py](https://github.com/langroid/langroid/blob/main/examples/basic/multi-agent-triage.py), or 
- a special addressing prefix, see the example script [1-agent-3-tools-address-user.py](https://github.com/langroid/langroid/blob/main/examples/basic/1-agent-3-tools-address-user.py)


## Can I specify top_k in OpenAIGPTConfig (for LLM API calls)?

No; Langroid currently only supports parameters accepted by OpenAI's API, and `top_k` is _not_ one of them. See:

- [OpenAI API Reference](https://platform.openai.com/docs/api-reference/chat/create)
- [Discussion on top_k, top_p, temperature](https://community.openai.com/t/temperature-top-p-and-top-k-for-chatbot-responses/295542/5)
- [Langroid example](https://github.com/langroid/langroid/blob/main/examples/basic/fn-call-local-numerical.py) showing how you can set other OpenAI API parameters, using the `OpenAICallParams` object.


## Can I persist agent state across multiple runs?

For example, you may want to stop the current python script, and 
run it again later, resuming your previous conversation.
Currently there is no built-in Langroid mechanism for this, but you can 
achieve a basic type of persistence by saving the agent's `message_history`:

-  if you used `Task.run()` in your script, make sure the task is 
set up with `restart=False` -- this prevents the agent state from being reset when 
the task is run again.
- using python's pickle module, you can save the `agent.message_history` to a file,
and load it (if it exists) at the start of your script.

See the example script [`chat-persist.py`](https://github.com/langroid/langroid/blob/main/examples/basic/chat-persist.py)

For more complex persistence, you can take advantage of the `GlobalState`,
where you can store message histories of multiple agents indexed by their name.
Simple examples of `GlobalState` are in the [`chat-tree.py`](https://github.com/langroid/langroid/blob/main/examples/basic/chat-tree.py) example, 
and the [`test_global_state.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_global_state.py) test.

## Is it possible to share state between agents/tasks?

The above-mentioned `GlobalState` mechanism can be used to share state between 
agents/tasks. See the links mentioned in the previous answer.

## How can I suppress LLM output?

You can use the `quiet_mode` context manager for this, see 
[here](https://langroid.github.io/langroid/notes/quiet-mode/)

## How can I deal with LLMs (especially weak ones) generating bad JSON in tools?

Langroid already attempts to repair bad JSON (e.g. unescaped newlines, missing quotes, etc)  
using the [json-repair](https://github.com/mangiucugna/json_repair) library and other
custom methods, before attempting to parse it into a `ToolMessage` object.
However this type of repair may not be able to handle all edge cases of bad JSON 
from weak LLMs. There are two existing ways to deal with this, and one coming soon:

- If you are defining your own `ToolMessage` subclass, considering deriving it instead
  from `XMLToolMessage` instead, see the [XML-based Tools](https://langroid.github.io/langroid/notes/xml-tools/)
- If you are using an existing Langroid `ToolMessage`, e.g. `SendTool`, you can 
  define your own subclass of `SendTool`, say `XMLSendTool`,
  inheriting from both `SendTool` and `XMLToolMessage`; see this 
  [example](https://github.com/langroid/langroid/blob/main/examples/basic/xml_tool.py)
- Coming soon: strict decoding to leverage the Structured JSON outputs supported by OpenAI
  and open LLM providers such as `llama.cpp` and `vllm`.

The first two methods instruct the LLM to generate XML instead of JSON,
and any field that is designated with a `verbatim=True` will be enclosed 
within an XML `CDATA` tag, which does *not* require any escaping, and can
be far more reliable for tool-use than JSON, especially with weak LLMs.

## How can I handle an LLM "forgetting" to generate a `ToolMessage`? 

Sometimes the LLM (especially a weak one) forgets to generate a 
[`ToolMessage`][langroid.agent.tool_message.ToolMessage]
(either via OpenAI's tools/functions API, or via Langroid's JSON/XML Tool mechanism),
despite being instructed to do so. There are a few remedies Langroid offers for this:

**Improve the instructions in the `ToolMessage` definition:**

- Improve instructions in the `purpose` field of the `ToolMessage`.
- Add an `instructions` class-method to the `ToolMessage`, as in the
  [`chat-search.py`](https://github.com/langroid/langroid/blob/main/examples/docqa/chat-search.py) script:

```python
@classmethod
def instructions(cls) -> str:
    return """
        IMPORTANT: You must include an ACTUAL query in the `query` field,
        """
```
  These instructions are meant to be general instructions on how to use the tool
  (e.g. how to set the field values), not to specifically about the formatting.

- Add a `format_instructions` class-method, e.g. like the one in the 
  [`chat-multi-extract-3.py`](https://github.com/langroid/langroid/blob/main/examples/docqa/chat-multi-extract-3.py) 
  example script.

```python
@classmethod
def format_instructions(cls, tool: bool = True) -> str:
    instr = super().format_instructions(tool)
    instr += """
    ------------------------------
    ASK ME QUESTIONS ONE BY ONE, to FILL IN THE FIELDS 
    of the `lease_info` function/tool.
    First ask me for the start date of the lease.
    DO NOT ASK ANYTHING ELSE UNTIL YOU RECEIVE MY ANSWER.
    """
    return instr
```

**Override the `handle_message_fallback` method in the agent:**

This method is called when the Agent's `agent_response` method receives a non-tool
message as input. The default behavior of this method is to return None, but it
is very useful to override the method to handle cases where the LLM has forgotten
to use a tool. You can define this method to return a "nudge" to the LLM
telling it that it forgot to do a tool-call, e.g. see how it's done in the 
example script [`chat-multi-extract-local.py`](https://github.com/langroid/langroid/blob/main/examples/docqa/chat-multi-extract-local.py):

```python
class LeasePresenterAgent(ChatAgent):
    def handle_message_fallback(
        self, msg: str | ChatDocument
    ) -> str | ChatDocument | None:
        """Handle scenario where Agent failed to present the Lease JSON"""
        if isinstance(msg, ChatDocument) and msg.metadata.sender == Entity.LLM:
            return """
            You either forgot to present the information in the JSON format
            required in `lease_info` JSON specification,
            or you may have used the wrong name of the tool or fields.
            Try again.
            """
        return None
```

Note that despite doing all of these, the LLM may still fail to generate a `ToolMessage`.
In such cases, you may want to consider using a better LLM, or an up-coming Langroid
feature that leverages **strict decoding** abilities of specific LLM providers
(e.g. OpenAI, llama.cpp, vllm) that are able to use grammar-constrained decoding
to force the output to conform to the specified structure.

Langroid also provides a simpler mechanism to specify the action to take
when an LLM does not generate a tool, via the `ChatAgentConfig.handle_llm_no_tool` 
config parameter, see the 
[docs](https://langroid.github.io/langroid/notes/handle-llm-no-tool/).

## Can I use Langroid to converse with a Knowledge Graph (KG)?

Yes, you can use Langroid to "chat with" either a Neo4j or ArangoDB KG, 
see docs [here](https://langroid.github.io/langroid/notes/knowledge-graphs/)

## How can I improve `DocChatAgent` (RAG) latency?

The behavior of `DocChatAgent` can be controlled by a number of settings in 
the `DocChatAgentConfig` class.
The top-level query-answering method in `DocChatAgent` is `llm_response`, which use the 
`answer_from_docs` method. At a high level, the response to an input message involves
the following steps:

- **Query to StandAlone:** LLM rephrases the query as a stand-alone query. 
   This can incur some latency. You can 
    turn it off by setting `assistant_mode=True` in the `DocChatAgentConfig`.
- **Retrieval:** The most relevant passages (chunks) are retrieved using a collection of semantic/lexical 
      similarity searches and ranking methods. There are various knobs in `DocChatAgentConfig` to control
      this retrieval.
- **Relevance Extraction:** LLM is used to retrieve verbatim relevant portions from
  the retrieved chunks. This is typically the biggest latency step. You can turn it off
  by setting the `relevance_extractor_config` to None in `DocChatAgentConfig`.
- **Answer Generation:** LLM generates answer based on retrieved passages.


See the [`doc-aware-chat.py`](https://github.com/langroid/langroid/blob/main/examples/docqa/doc-aware-chat.py)
example script, which illustrates some of these settings.

In some scenarios you want to *only* use the **retrieval** step of a `DocChatAgent`.
For this you can use the [`RetrievalTool`][langroid.agent.tools.retrieval_tool.RetrievalTool].
See the `test_retrieval_tool` in 
[`test_doc_chat_agent.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_doc_chat_agent.py).
to learn how to use it. The above example script uses `RetrievalTool` as well.

## Is there support to run multiple tasks concurrently?

Yes, see the `run_batch_tasks` and related functions in 
[batch.py](https://github.com/langroid/langroid/blob/main/langroid/agent/batch.py).

See also:

- tests: [test_batch.py](https://github.com/langroid/langroid/blob/main/tests/main/test_batch.py),
   [test_relevance_extractor.py](https://github.com/langroid/langroid/blob/main/tests/main/test_relevance_extractor.py),
- example: [multi-agent-round-table.py](https://github.com/langroid/langroid/blob/main/examples/basic/multi-agent-round-table.py)

Another example is within 
[`DocChatAgent`](https://github.com/langroid/langroid/blob/main/langroid/agent/special/doc_chat_agent.py), 
which uses batch tasks for relevance extraction,
see the `get_verbatim_extracts` method -- when there are k relevant passages,
this runs k tasks concurrently, 
each of which uses an LLM-agent to extract relevant verbatim text from a passage.

## Can I use Langroid in a FastAPI server?

Yes, see the [langroid/fastapi-server](https://github.com/langroid/fastapi-server) repo.

## Can a sub-task end all parent tasks and return a result?

Yes, there are two ways to achieve this, using [`FinalResultTool`][langroid.agent.tools.orchestration.final_result_tool.FinalResultTool]:

From a `ChatAgent`'s tool-handler or `agent_response` method: Your code can return a 
`FinalResultTool` with arbitrary field types; this ends the current and all parent tasks and this  
`FinalResultTool` will appear as one of tools in the final `ChatDocument.tool_messages`.
See `test_tool_handlers_and_results` in 
[test_tool_messages.py](https://github.com/langroid/langroid/blob/main/tests/main/test_tool_messages.py), 
and [examples/basic/chat-tool-function.py](https://github.com/langroid/langroid/blob/main/examples/basic/chat-tool-function.py)


From `ChatAgent`'s `llm_response` method: you can define a subclass of a 
`FinalResultTool` and enable the agent to use this tool, which means it will become
available for the LLM to generate. 
See [examples/basic/multi-agent-return-result.py](https://github.com/langroid/langroid/blob/main/examples/basic/multi-agent-return-result.py).

## How can I configure a task to retain or discard prior conversation?

In some scenarios, you may want to control whether each time you call a task's `run` 
method, the underlying agent retains the conversation history from the previous run.
There are two boolean config parameters that control this behavior: 

- the `restart` parameter (default `True`) in the `Task` constructor, and
- the `restart_as_subtask` (default `False`) parameter in the `TaskConfig` argument of the `Task` constructor.

To understand how these work, consider a simple scenario of a task `t` that has a 
subtask `t1`, e.g., suppose you have the following code with default settings 
of the `restart` and `restart_as_subtask` parameters:

```python
from langroid.agent.task import Task
from langroid.agent.task import TaskConfig

# default setttings:
rs = False
r = r1 = True

agent = ...
task_config = TaskConfig(restart_as_subtask=rs) 
t = Task(agent, restart=r, config=task_config)

agent1 = ...
t1 = Task(agent1, restart=r1, config=task_config)
t.add_subtask(t1)
```

This default setting works as follows:
Since task `t` was constructed with the default `restart=True`, when `t.run()` is called, the conversation histories of the agent underlying `t` as well as all 
those of all subtasks (such as `t1`) are reset. However, if during `t.run()`,
there are multiple calls to `t1.run()`, then the conversation history is retained across these calls, even though `t1` was constructed with the default `restart=True` --
this is because the `restart` constructor parameter has no effect on a task's reset
behavior **when it is a subtask**. 

The `TaskConfig.restart_as_subtask` parameter
controls the reset behavior of a task's `run` method when invoked as a subtask.
It defaults to `False`, which is why in the above example, the conversation history
of `t1` is retained across multiple calls to `t1.run()` that may occur
during execution of `t.run()`. If you set this parameter to `True` in the above
example, then the conversation history of `t1` would be reset each time `t1.run()` is called, during a call to `t.run()`.

To summarize, 

- The `Task` constructor's `restart` parameter controls the reset behavior of the task's `run` method when it is called directly, not as a subtask.
- The `TaskConfig.restart_as_subtask` parameter controls the reset behavior of the task's `run` method when it is called as a subtask.

These settings can be mixed and matched as needed.

Additionally, all reset behavior can be turned off during a specific `run()` invocation
by calling it with `allow_restart=False`, e.g.,  `t.run(..., allow_restart=False)`.

## How can I set up a task to exit as soon as the LLM responds?

In some cases you may want the top-level task or a subtask to exit as soon as the LLM responds. You can get this behavior by setting `single_round=True` during task construction, e.g.,

```python
from langroid.agent.task import Task

agent = ...
t = Task(agent, single_round=True, interactive=False)

result = t.run("What is 4 + 5?")
```

The name `single_round` comes from the fact that the task loop ends as soon as 
any **one** of the agent's responders return a valid response. Recall that an 
agent's responders are `llm_response`, `agent_response` (for tool handling), and `user_response` (for user input). In the above example there are no tools and no 
user interaction (since `interactive=False`), so the task will exit as soon as the LLM responds.

More commonly, you may only want this single-round behavior for a subtask, e.g.,

```python
agent = ...
t = Task(agent, single_round=False, interactive=True)

agent1 = ...
t1 = Task(agent1, single_round=True, interactive=False)

t.add_subtask(t1)
top_level_query = ...
result = t.run(...)
```

See the example script [`chat-2-agent-discuss.py`](https://github.com/langroid/langroid/blob/main/examples/basic/chat-2-agent-discuss.py) for an example of this, and also search for `single_round` in the rest of the examples.

!!! warning "Using `single_round=True` will prevent tool-handling"
    As explained above, setting `single_round=True` will cause the task to exit as soon as the LLM responds, and thus if it emits a valid tool (which the agent is enabled to handle), this tool will *not* be handled.
</file>

<file path="docs/index.md">
# Langroid: Harness LLMs with Multi-Agent Programming

## The LLM Opportunity

Given the remarkable abilities of recent Large Language Models (LLMs), there
is an unprecedented opportunity to build intelligent applications powered by
this transformative technology. The top question for any enterprise is: how
best to harness the power of LLMs for complex applications? For technical and
practical reasons, building LLM-powered applications is not as simple as
throwing a task at an LLM-system and expecting it to do it.

## Langroid's Multi-Agent Programming Framework

Effectively leveraging LLMs at scale requires a *principled programming 
framework*. In particular, there is often a need to maintain multiple LLM 
conversations, each instructed in different ways, and "responsible" for 
different aspects of a task.

An *agent* is a convenient abstraction that encapsulates LLM conversation 
state, along with access to long-term memory (vector-stores) and tools (a.k.a functions 
or plugins). Thus a **Multi-Agent Programming** framework is a natural fit 
for complex LLM-based applications.

> Langroid is the first Python LLM-application framework that was explicitly 
designed  with Agents as first-class citizens, and Multi-Agent Programming 
as the core  design principle. The framework is inspired by ideas from the 
[Actor Framework](https://en.wikipedia.org/wiki/Actor_model).

Langroid allows an intuitive definition of agents, tasks and task-delegation 
among agents. There is a principled mechanism to orchestrate multi-agent 
collaboration. Agents act as message-transformers, and take turns responding to (and
transforming) the current message. The architecture is lightweight, transparent, 
flexible, and allows other types of orchestration to be implemented; see the (WIP) 
[langroid architecture document](blog/posts/langroid-architecture.md).
Besides Agents, Langroid also provides simple ways to directly interact with LLMs and vector-stores. See the Langroid [quick-tour](tutorials/langroid-tour.md).

## Highlights
- **Agents as first-class citizens:** The `Agent` class encapsulates LLM conversation state,
  and optionally a vector-store and tools. Agents are a core abstraction in Langroid; 
  Agents act as _message transformers_, and by default provide 3 _responder_ methods, one corresponding to each 
  entity: LLM, Agent, User. 
- **Tasks:** A Task class wraps an Agent, gives the agent instructions (or roles, or goals),
  manages iteration over an Agent's responder methods,
  and orchestrates multi-agent interactions via hierarchical, recursive
  task-delegation. The `Task.run()` method has the same
  type-signature as an Agent's responder's methods, and this is key to how
  a task of an agent can delegate to other sub-tasks: from the point of view of a Task,
  sub-tasks are simply additional responders, to be used in a round-robin fashion
  after the agent's own responders.
- **Modularity, Reusabilily, Loose coupling:** The `Agent` and `Task` abstractions allow users to design
  Agents with specific skills, wrap them in Tasks, and combine tasks in a flexible way.
- **LLM Support**: Langroid works with practically any LLM, local/open or remote/proprietary/API-based, via a variety of libraries and providers. See guides to using [local LLMs](tutorials/local-llm-setup.md) and [non-OpenAI LLMs](tutorials/non-openai-llms.md). See [Supported LLMs](tutorials/supported-models.md).
- **Caching of LLM prompts, responses:** Langroid by default uses [Redis](https://redis.com/try-free/) for caching. 
- **Vector-stores**: [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/) and [LanceDB](https://www.lancedb.com/) are currently supported.
  Vector stores allow for Retrieval-Augmented-Generation (RAG).
- **Grounding and source-citation:** Access to external documents via vector-stores
  allows for grounding and source-citation.
- **Observability, Logging, Lineage:** Langroid generates detailed logs of multi-agent interactions and
  maintains provenance/lineage of messages, so that you can trace back
  the origin of a message.
- **Tools/Plugins/Function-calling**: Langroid supports OpenAI's recently
  released [function calling](https://platform.openai.com/docs/guides/gpt/function-calling)
  feature. In addition, Langroid has its own native equivalent, which we
  call **tools** (also known as "plugins" in other contexts). Function
  calling and tools have the same developer-facing interface, implemented
  using [Pydantic](https://docs.pydantic.dev/latest/),
  which makes it very easy to define tools/functions and enable agents
  to use them. Benefits of using Pydantic are that you never have to write
  complex JSON specs for function calling, and when the LLM
  hallucinates malformed JSON, the Pydantic error message is sent back to
  the LLM so it can fix it!



Don't worry if some of these terms are not clear to you. 
The [Getting Started Guide](quick-start/index.md) and subsequent pages 
will help you get up to speed.
</file>

<file path="issues/pydantic-v2-migration/examples-errors.md">
# Pydantic V2 Migration Issues - Examples

This document tracks all Pydantic v2 runtime errors found in the examples directory during migration testing.

## Format

Each issue entry contains:
- **File**: Path to the example file
- **Error**: The specific Pydantic v2 runtime error encountered
- **Fix Applied**: Description of the fix
- **Date**: When the issue was found and fixed

---

## Issues Found

### 1. PydanticInvalidForJsonSchema error in examples using langroid.pydantic_v1
- **Files**: 
  - `examples/basic/tool-extract-short-example.py`
  - `examples/basic/fn-call-local-simple.py`
- **Error**: `pydantic.errors.PydanticInvalidForJsonSchema` when calling `ToolMessage.name()` in system message
- **Root cause**: Files importing from deprecated `langroid.pydantic_v1` causing schema generation issues
- **Fix Applied**: Changed imports from `langroid.pydantic_v1` to direct `pydantic` imports
- **Date**: 2025-07-20

### 2. Deprecated json() method usage
- **File**: `examples/basic/chat-search.py` (and potentially others)
- **Error**: `PydanticDeprecatedSince20: The 'json' method is deprecated; use 'model_dump_json' instead`
- **Root cause**: Code is using the deprecated `tool.json()` method instead of `tool.model_dump_json()`
- **Fix Applied**: Need to update core library files to use `model_dump_json()` instead of `json()`
- **Date**: 2025-07-20

### 3. Deprecated dict() method usage
- **File**: Core library files (detected when running `examples/basic/completion.py`)
- **Error**: `PydanticDeprecatedSince20: The 'dict' method is deprecated; use 'model_dump' instead`
- **Root cause**: Code is using the deprecated `model.dict()` method instead of `model.model_dump()`
- **Fix Applied**: Need to update core library files to use `model_dump()` instead of `dict()`
- **Date**: 2025-07-20

### 4. Important Discovery: langroid.pydantic_v1 is deprecated
- **Finding**: The `langroid.pydantic_v1` module itself shows a deprecation warning:
  ```
  DeprecationWarning: langroid.pydantic_v1 is deprecated. Langroid has migrated to Pydantic v2.
  Please update your code to import directly from 'pydantic' and adapt to v2 patterns.
  ```
- **Implication**: The CLAUDE.md instruction to "ALWAYS import Pydantic classes from `langroid.pydantic_v1`" is outdated
- **Current state**: Most of the codebase has already migrated to Pydantic v2 and is importing directly from `pydantic`
- **Date**: 2025-07-20

### 5. Class-based Config deprecation warnings
- **Files**: Multiple examples trigger this warning (privacy/annotate.py, quick-start/chat-agent-tool.py, summarize/summ.py)
- **Warning**: `PydanticDeprecatedSince20: Support for class-based 'config' is deprecated, use ConfigDict instead`
- **Root cause**: Some models in the codebase or dependencies still use the old `class Config:` pattern instead of `ConfigDict`
- **Impact**: Will become errors in Pydantic v3.0
- **Fix Applied**: Need to replace all class-based `Config` with `ConfigDict` throughout the codebase
- **Date**: 2025-07-20

---

## Summary

### Total Examples Tested: ~40+ examples across different categories

### Issues Found and Fixed in Examples:
1. **Two examples had import issues** - Fixed by changing imports from `langroid.pydantic_v1` to `pydantic`
   - `examples/basic/tool-extract-short-example.py` ✓ Fixed
   - `examples/basic/fn-call-local-simple.py` ✓ Fixed

### Deprecation Warnings from Core Library:
- The deprecation warnings (`.json()`, `.dict()`, class-based `Config`) are coming from the core Langroid library code, not from the examples
- Examples themselves are correctly written for Pydantic v2

### Conclusion:
- All examples now work correctly with Pydantic v2
- The only remaining issues are deprecation warnings from the core library code
- No further fixes needed in the examples directory
</file>

<file path="issues/pydantic-v2-migration/migration-checking-log.md">
# Pydantic V2 Migration Checking Log

This document logs findings and fixes discovered during the systematic checking of the Pydantic V2 migration.

**Last Updated:** 2024-01-18
**Branch:** pydantic-v2-tree
**Total Files Examined:** ALL 83 test files in tests/main/, 11 test files in tests/extras/, 20+ example scripts, multiple root test files

## Issue #1: Missing Type Annotations for Private Attributes

**Date:** 2024-01-18
**Files Affected:**
- `langroid/agent/xml_tool_message.py`
- `langroid/agent/special/arangodb/tools.py` 
- `tests/main/test_tool_messages.py`

**Problem:** Private attributes were missing type annotations, which is required in Pydantic V2.

**Fix Applied:** Added type annotations:
- `_allow_llm_use: bool = True`
- `_max_result_tokens: int = 500`
- `_max_retained_tokens: int = 200`

## Issue #2: DoneTool Content Field Type Strictness

**Date:** 2024-01-18
**File:** `langroid/agent/tools/orchestration.py`
**Test:** `tests/main/test_task.py::test_task_tool_responses`

**Problem:** Pydantic V2 is stricter about type validation. The test was passing an integer to `DoneTool.content` which expects a string. V1 had automatic type coercion, V2 doesn't.

**Fix Applied:** Added field validator to DoneTool:
```python
@field_validator('content', mode='before')
@classmethod
def convert_content_to_string(cls, v: Any) -> str:
    """Convert content to string if it's not already."""
    return str(v) if v is not None else ""
```

## Issue #3: GlobalState Singleton Pattern with Private Attributes

**Date:** 2024-01-18
**File:** `langroid/utils/globals.py`
**Test:** `tests/main/test_global_state.py::test_initial_global_state`

**Problem:** In Pydantic V2, accessing private attributes on the class (not instance) returns a `ModelPrivateAttr` object instead of the actual value. The singleton pattern was broken because `cls._instance` returns `ModelPrivateAttr`.

**Analysis of Approaches:**
1. **ClassVar approach (cleaner):** Would use `_instances: ClassVar[Dict[Type, Optional["GlobalState"]]]` but risks breaking backward compatibility if external code accesses `_instance` directly.
2. **ModelPrivateAttr handling (chosen):** Maintains full backward compatibility by checking if the attribute is a `ModelPrivateAttr` and extracting its default value.

**Fix Applied:** Modified `get_instance()` to handle ModelPrivateAttr:
```python
@classmethod
def get_instance(cls: Type["GlobalState"]) -> "GlobalState":
    # Get the actual value from ModelPrivateAttr when accessing on class
    instance_attr = getattr(cls, '_instance', None)
    if isinstance(instance_attr, ModelPrivateAttr):
        actual_instance = instance_attr.default
    else:
        actual_instance = instance_attr
        
    if actual_instance is None:
        new_instance = cls()
        cls._instance = new_instance
        return new_instance
    return actual_instance
```

**Note:** The cleaner ClassVar approach would be preferred for new code, but backward compatibility is prioritized for this migration.

**Test Result:** All tests in `test_global_state.py` now pass after the fix.

## Issue #4: ParsingConfig chunk_size Float-to-Int Coercion

**Date:** 2024-01-18
**Files:** 
- `langroid/parsing/parser.py` (ParsingConfig)
- `langroid/parsing/md_parser.py` (MarkdownChunkConfig)
**Test:** `tests/main/test_md_parser.py::test_markdown_chunking[True-1.2]`

**Problem:** Test was passing a float value (chunk_size_factor * word_count = 1.2 * 42 = 50.4) to `chunk_size` which expects an integer. Pydantic V1 automatically coerced floats to integers, but V2 doesn't.

**Analysis:** This is a backward compatibility issue. External code might be passing float values to chunk_size (e.g., from calculations or config files with `chunk_size: 100.0`).

**Fix Applied:** Added field validators to both config classes:
```python
@field_validator('chunk_size', mode='before')
@classmethod
def convert_chunk_size_to_int(cls, v: Any) -> int:
    """Convert chunk_size to int, maintaining backward compatibility with Pydantic V1."""
    if isinstance(v, float):
        return int(v)
    return v
```

**Test Result:** The failing test now passes.

## Issue #5: Crawl4aiConfig Forward Reference Resolution

**Date:** 2024-01-18
**File:** `langroid/parsing/url_loader.py`
**Test:** `tests/main/test_url_loader.py::test_crawl4ai_mocked`

**Problem:** The code was using Pydantic V1's `update_forward_refs(**namespace)` method which has been replaced in V2 with `model_rebuild()`.

**Error:** `pydantic.errors.PydanticUserError: 'Crawl4aiConfig' is not fully defined; you should define 'ExtractionStrategy', then call 'Crawl4aiConfig.model_rebuild()'`

**Fix Applied:** 
1. Removed complex `__init_subclass__` and `__init__` methods
2. Moved forward reference resolution to module level after class definition
3. Changed from `cls.update_forward_refs(**namespace)` to `Crawl4aiConfig.model_rebuild()`

```python
# After class definition at module level:
try:
    from crawl4ai.async_configs import BrowserConfig, CrawlerRunConfig
    # ... other imports ...
    
    # Rebuild the model with resolved references
    Crawl4aiConfig.model_rebuild()
except ImportError:
    # If crawl4ai is not installed, leave forward refs as strings
    pass
```

**Test Result:** The test now passes when crawl4ai is installed.

---

## Non-Pydantic Issues Found

### LLM Non-Deterministic Failures:
These tests failed because the LLM produced different outputs than expected, but the code itself is working correctly:

1. `test_tool_messages.py::test_tool_handler_invoking_llm[True]` - Expected "7" (result of 3+4) in response, but got generic completion message
2. `test_doc_chat_agent.py::test_enrichments_integration[qdrant_cloud]` - Expected "BNP" when asked about heart-related blood tests, got "DO-NOT-KNOW"
3. `test_mcp_tools.py::test_complex_tool_decorator` - Expected "29" in response, LLM acknowledged receiving it but didn't include in final answer
4. `test_table_chat_agent.py::test_table_chat_agent_assignment_self_correction` - Expected explanation with words "removed" and "cleaned", but LLM generated tool message directly
5. `test_web_search_tools.py::test_agent_web_search_tool[False-True-ExaSearchTool]` - Search results for "LK-99 superconducting material" didn't contain expected keywords in all results

### Tests with Dependencies Now Installed:
With all dependencies installed, the following tests now pass or have non-Pydantic issues:

**Passed after dependency installation:**
- `test_arangodb.py` - ✅ All tests passed
- `test_neo4j_chat_agent.py` - ✅ All tests passed  
- `test_fastembed_embeddings.py` - ✅ All tests passed
- `test_marker_pdf_parser.py` - ✅ All tests passed
- `test_hf_embeddings.py` - ✅ All tests passed
- `test_docx_parser_extra.py` - ✅ 1 passed, 1 skipped
- `test_litellm_model_key_async` - ✅ Passed with litellm installed

**Non-Pydantic failures:**
- `test_pdf_parser.py::test_get_pdf_doc_url[docling-url]` - Network/parser timeout (even with docling installed)
- `test_pdf_parser_extra.py` - File path issue
- `test_vector_stores.py::test_vector_stores_search[weaviate_docker-...]` - Weaviate docker not running (ConnectionRefusedError)
- `test_hf_vector_stores.py` - ChromaDB compatibility issue
- `test_pyarango.py` - Still missing pyArango module (not available via pip)
- `test_csv_kg_chat.py` - Neo4j connection error
- `test_automatic_context_extraction.py` - MySQL socket path too long on macOS
- `test_llamacpp_embeddings.py::test_embeddings` - ConnectionRefusedError - requires running llama.cpp server

### Missing Dependencies (Original List):

1. `test_litellm_model_key_async` - Missing `litellm` module (install with `pip install "langroid[litellm]"`)
2. `test_neo4j_chat_agent.py` - Missing `neo4j` module
3. `test_pdf_parser.py::test_get_pdf_doc_url[docling-url]` - Missing `docling` module (install with `pip install "langroid[docling]"`)
4. `test_arangodb.py` - Missing `arango` module
5. `test_url_loader.py::test_crawl4ai_mocked` - Missing `crawl4ai` module
6. `test_vector_stores.py::test_vector_stores_search[weaviate_docker-...]` - Missing `weaviate` module (install with `pip install "langroid[weaviate]"`)
7. `test_pdf_parser_extra.py::test_get_pdf_doc_url[unstructured]` - Missing `unstructured` module (install with `pip install "langroid[unstructured]"`)
8. `test_hf_vector_stores.py` - Missing `sentence_transformers` module (install with `pip install "langroid[hf-embeddings]"`)
9. `test_docx_parser_extra.py::test_get_docx_file[unstructured]` - Missing `unstructured` module
10. `test_llamacpp_embeddings.py::test_embeddings` - ConnectionRefusedError - requires running llama.cpp server
11. `test_pyarango.py` - Missing `pyArango` module
12. `test_fastembed_embeddings.py::test_embeddings` - Missing `fastembed` module (install with `pip install "langroid[fastembed]"`)
13. `test_marker_pdf_parser.py::test_marker_pdf_parser` - Missing `marker` module (install with `pip install "langroid[marker-pdf]"`)
14. `test_hf_embeddings.py::test_embeddings` - Missing `sentence_transformers` module
15. `test_csv_kg_chat.py::test_pandas_to_kg` - Missing `neo4j` module
16. `test_automatic_context_extraction.py` - Missing `sqlalchemy` module (install with `pip install "langroid[sql]"`)

### Configuration Issues:
1. `test_llm_pdf_bytes_and_split` - Incorrect/missing OpenAI API key

### Other Issues:
1. `test_markitdown_xls_parser` - Import error handling issue in document_parser.py (UnboundLocalError)
2. `test_batch.py` - Performance issue: 189 tests timeout when run together (not Pydantic-related)

### Import Inconsistencies (Non-blocking but should be fixed):
1. **Direct pydantic imports in core library**: Found 32+ files importing directly from `pydantic` or `pydantic_settings` instead of through `langroid.pydantic_v1`. While this works (since pydantic_v1 re-exports V2), it's inconsistent:
   - Files using `from pydantic.fields import ModelPrivateAttr` directly: chat_agent.py, base.py, globals.py, task_tool.py
   - Files using `from pydantic_settings import BaseSettings` directly: Multiple parsing and config files
   
2. **Direct pydantic imports in examples**: Many example scripts import directly from `pydantic`:
   - `examples/basic/chat-tool-function.py` - Uses `from pydantic import BaseModel, Field`
   - `examples/basic/1d-screen-click.py` - Direct pydantic import with custom `__init__` pattern that may need review
   - `examples/basic/fn-call-local-simple.py`, `planner-workflow.py`, `schedule-extract.py`, `multi-agent-medical.py` and others
   - **Issue**: These should import from `langroid.pydantic_v1` for consistency
   
3. **Potential Pydantic V2 Pattern Issues**:
   - `ScreenState` class in `1d-screen-click.py` uses direct field assignment in `__init__` after `super().__init__()`
   - This pattern might need adjustment for proper Pydantic V2 compatibility

4. **Test files with direct pydantic imports**:
   - `tests/main/test_structured_output.py` - Uses `from pydantic import BaseModel, Field`
   - Multiple test files need to be updated for consistency

### Root Directory Test Files (Migration Verification):
1. `test_tool_class_preservation.py` - ✅ Passes, verifies Fix #3
2. `test_modelprivateattr_fix.py` - ❌ Import error (`langroid.pydantic_v1.fields` doesn't exist)
3. `test_tool_message_schema.py` - ✅ Passes, verifies JSON schema fix

### Basic Functionality Verification:
- ✅ Tool message creation works
- ✅ Pydantic V2 methods (`model_dump`, `model_validate`) work correctly
- ✅ Field validation and defaults work as expected

---

## Migration Summary

### Tests Run: ALL 83 test files in tests/main/ + 11 in extras + example scripts examined + root test files

### Pydantic V2 Issues Found and Fixed: 7

1. **Missing type annotations for private attributes** - Fixed in 6 locations
2. **DoneTool content field type strictness** - Added field validator
3. **GlobalState singleton pattern with ModelPrivateAttr** - Added handling for class-level private attribute access
4. **ParsingConfig chunk_size float coercion** - Added field validators to 2 config classes
5. **Crawl4aiConfig forward reference resolution** - Replaced `update_forward_refs()` with `model_rebuild()` for Pydantic V2

### Test Results Summary:
- **Total tests run**: 88 test files
- **Pydantic V2 issues**: 5 (all fixed)
- **LLM non-deterministic failures**: 5
- **Missing dependency failures**: 11+ 
- **Configuration issues**: 1
- **Other issues**: 1

### Overall Assessment:
- The Pydantic V2 migration is **exceptionally well-executed** with only 5 minor issues found across ALL 83 tests/main/ files + 11 tests/extras/ files (with dependencies installed)
- All issues were related to V2's stricter type validation and private attribute handling
- All fixes maintain backward compatibility for external code
- No major architectural changes were needed
- The migration successfully maintains the functionality while adapting to Pydantic V2's stricter requirements

### Remaining Work:
1. **Import Consistency**: Update all files to import from `langroid.pydantic_v1` instead of direct `pydantic` imports
2. **Example Scripts**: Update example scripts to use the compatibility layer
3. **Test File Cleanup**: Move migration verification test files from root to proper test directory
4. **Documentation**: Consider adding migration guide for users who might have similar patterns in their code

## Issue #6: Vector Store Test Custom Document Class (Fixed 2025-01-19)

**Date:** 2025-01-19
**File:** `tests/main/test_vector_stores.py` 
**Test:** `test_vector_stores_access`

**Problem:** When using custom document classes with additional required fields in metadata, Pydantic V2's stricter validation caused failures when retrieving documents from vector stores. The test was creating documents with the base `Document` class instead of the custom `MyDocument` class, causing the custom metadata fields to be lost.

**Fix Applied:** Changed line 325 from using `Document(` to `MyDocument(` when creating test documents. This ensures the custom metadata schema is preserved throughout storage and retrieval.

## Issue #7: Eliminate langroid.pydantic_v1 Imports from Core Code (Fixed 2025-01-19)

**Date:** 2025-01-19
**Files:** 
- `langroid/vector_store/pineconedb.py`
- `langroid/agent/tool_message.py`
- `langroid/agent/base.py`
- `langroid/agent/tools/task_tool.py`
- `langroid/agent/chat_agent.py`

**Problem:** Core code was still importing from the `langroid.pydantic_v1` compatibility layer, which defeats the purpose of the Pydantic V2 migration. The goal is to use direct Pydantic V2 imports throughout the internal codebase.

**Fix Applied:** Changed all imports from `langroid.pydantic_v1` to direct imports:
- `from langroid.pydantic_v1 import BaseModel` → `from pydantic import BaseModel`
- `from langroid.pydantic_v1 import BaseSettings` → `from pydantic_settings import BaseSettings`
- And similar for Field, ValidationError, ConfigDict, field_validator

This completes the migration by eliminating the compatibility layer from internal code while maintaining it for external users.

### Key Takeaways:
- Pydantic V2's stricter type validation caught legitimate issues (missing type annotations, type coercion)
- The compatibility layer (`langroid.pydantic_v1`) works well but needs consistent usage
- Private attribute handling with `ModelPrivateAttr` was the most complex migration challenge
- Pydantic V2 is stricter about preserving custom model schemas - must use the exact model class defined
- Overall, the migration demonstrates that Langroid's architecture was already well-aligned with Pydantic V2 principles

---

## Final Testing Status Report (2025-01-18)

### Summary:
- **All Pydantic V2 related issues have been resolved** ✅
- **Total of 7 Pydantic V2 issues found and fixed**
- **No new Pydantic V2 issues discovered after dependency installation**

### Outstanding Test Failures (All Non-Pydantic):

#### 1. LLM Non-Deterministic Failures (5 tests):
- `test_tool_messages.py::test_tool_handler_invoking_llm[True]`
- `test_doc_chat_agent.py::test_enrichments_integration[qdrant_cloud]`
- `test_mcp_tools.py::test_complex_tool_decorator`
- `test_table_chat_agent.py::test_table_chat_agent_assignment_self_correction`
- `test_web_search_tools.py::test_agent_web_search_tool[False-True-ExaSearchTool]`

#### 2. Infrastructure/External Service Dependencies (8 tests):
- `test_pdf_parser.py::test_get_pdf_doc_url[docling-url]` - Network timeout
- `test_vector_stores.py::test_vector_stores_search[weaviate_docker-...]` - Weaviate Docker container not running
- `test_llamacpp_embeddings.py::test_embeddings` - llama.cpp server not running
- `test_csv_kg_chat.py` - Neo4j connection error
- `test_automatic_context_extraction.py` - MySQL socket path too long on macOS
- `test_pdf_parser_extra.py` - File path issue
- `test_hf_vector_stores.py` - ChromaDB compatibility issue
- `test_pyarango.py` - pyArango module not available via pip

#### 3. Other Issues:
- `test_markitdown_xls_parser` - Import error handling issue (UnboundLocalError)
- `test_batch.py` - Performance issue with 189 tests (timeout when run together)

### Conclusion:
**The Pydantic V2 migration is complete and successful.** All test failures are unrelated to Pydantic V2:
- No type validation errors
- No private attribute handling issues
- No forward reference resolution problems
- No field validation issues
- No model configuration issues

The migration has been thoroughly tested across:
- ✅ All 83 test files in tests/main/
- ✅ All 11 test files in tests/extras/ (with dependencies)
- ✅ Example scripts examined for patterns
- ✅ Root test files verified

**Migration Status: COMPLETE** 🎉
</file>

<file path="issues/pydantic-v2-migration/pr-pydantic-v2-fixes.md">
# Pydantic V2 Migration Fixes

## Summary
This PR completes the Pydantic V2 migration by fixing the remaining issues discovered during comprehensive testing and resolves all mypy type errors.

## Issues Fixed

### 1. Missing Type Annotations for Private Attributes
- Added type annotations to private attributes in `XMLToolMessage`, `ArangoDBTool`, and test files
- Example: `_allow_llm_use: bool = True`

### 2. DoneTool Content Field Type Strictness
- Added field validator to handle Pydantic V2's stricter type validation
- Automatically converts any input type to string for backward compatibility

### 3. GlobalState Singleton Pattern
- Fixed ModelPrivateAttr handling when accessing class-level private attributes
- Added proper type checking for PydanticUndefined values

### 4. ParsingConfig chunk_size Float Coercion
- Added field validators to maintain backward compatibility with float inputs
- Applied to both ParsingConfig and MarkdownChunkConfig

### 5. Crawl4aiConfig Forward Reference Resolution
- Replaced deprecated `update_forward_refs()` with `model_rebuild()`
- Moved resolution to module level after class definition

### 6. Mypy Type Errors
- Fixed return type annotations in field validators
- Added explicit exports to `langroid.pydantic_v1.__init__.py`
- Corrected type handling in various modules

## Testing
- Tested all 83 test files in tests/main/
- Tested all 11 test files in tests/extras/ (with dependencies)
- All Pydantic V2 related issues resolved
- No regressions introduced

## Documentation
- Created comprehensive migration log documenting all findings
- Organized documentation under `issues/pydantic-v2-migration/`
</file>

<file path="issues/pydantic-v2-migration/PYDANTIC_V2_MIGRATION_TASK_SPECIFICATION.md">
# Pydantic v2 Migration Task Specification

## Current State

Langroid currently uses a compatibility layer at `langroid/pydantic_v1/` that:
- Imports from `pydantic.v1.*` when Pydantic v2 is installed
- Falls back to `pydantic.*` when Pydantic v1 is installed
- Allows the codebase to work with both Pydantic versions

This approach works but creates issues:
- Import ordering conflicts when users have Pydantic v2 in their projects
- Users cannot use Pydantic v2 features alongside Langroid
- Performance limitations (Pydantic v1 is slower than v2)
- Future maintenance burden

## Goal

Migrate Langroid's internal codebase to use Pydantic v2 directly while maintaining complete backward compatibility for external users.

## Specific Objectives

### 1. Replace Internal Imports
Replace all internal imports of `langroid.pydantic_v1` with direct imports from:
- `pydantic` (for BaseModel, Field, etc.)
- `pydantic_settings` (for BaseSettings)

### 2. Update Method Calls
Update all Pydantic v1 method patterns to v2 equivalents:
- `.dict()` → `.model_dump()`
- `.parse_obj()` → `.model_validate()`
- `.json()` → `.model_dump_json()`
- `.copy()` → `.model_copy()`
- `.__fields__` → `.model_fields`
- `.schema()` → `.model_json_schema()`
- And others as needed

### 3. Update Configuration Patterns
Replace Pydantic v1 config classes with v2 ConfigDict:
```python
# From:
class Config:
    extra = Extra.allow

# To:
model_config = ConfigDict(extra='allow')
```

### 4. Update Validators
Replace v1 validators with v2 field validators:
```python
# From:
@validator('field')
def validate_field(cls, v):
    return v

# To:
@field_validator('field')
@classmethod
def validate_field(cls, v):
    return v
```

### 5. Update Dependencies
Update `pyproject.toml` to require Pydantic v2:
```toml
pydantic = "^2.0.0"
pydantic-settings = "^2.0.0"
```

## Critical Requirements

### 1. Complete Backward Compatibility
- External users should experience ZERO breaking changes
- All existing APIs must continue to work
- No changes to public interfaces

### 2. No Feature Removal
- Every existing function, class, and module must be preserved
- No deletion of files, tests, or examples
- All functionality must remain intact

### 3. Comprehensive Coverage
Update ALL instances of Pydantic v1 usage in:
- Core langroid modules
- Tests
- Examples
- Documentation

## Success Criteria

1. **Zero Internal v1 Imports**: No `langroid.pydantic_v1` imports remain in internal code
2. **All Tests Pass**: Complete test suite passes without errors
3. **Backward Compatibility**: External users can upgrade without code changes
4. **Performance**: Benefits from Pydantic v2 performance improvements
5. **Future-Proof**: Codebase is ready for Pydantic v2-only features

## Implementation Approach

1. **Systematic Analysis**: Identify all files using Pydantic v1 patterns
2. **Priority-Based Migration**: Start with core files, then tests, then examples
3. **Pattern-Based Updates**: Apply consistent transformation patterns
4. **Incremental Testing**: Test after each phase to catch issues early
5. **Verification**: Comprehensive final testing and validation

## Compatibility Layer Strategy

The existing `langroid/pydantic_v1/` compatibility layer should be:
- **Preserved** for external users who might be importing from it
- **Updated** to import from Pydantic v2 instead of v1
- **Documented** as deprecated for future removal

## Testing Strategy

1. **Before Migration**: Run full test suite to establish baseline
2. **During Migration**: Run tests after each file group
3. **After Migration**: Comprehensive test suite validation
4. **Focus Areas**: Pay special attention to:
   - Tool message functionality
   - Agent operations
   - Configuration loading
   - Data serialization/deserialization

## Deliverables

1. **Updated Codebase**: All internal code using Pydantic v2
2. **Passing Tests**: Complete test suite passes
3. **Updated Dependencies**: pyproject.toml reflects Pydantic v2
4. **Documentation**: Migration notes and compatibility information
5. **Verification Report**: Confirmation of successful migration

## Timeline

This is a significant migration that should be approached systematically over several phases, with thorough testing at each stage to ensure no functionality is lost or broken.
</file>

<file path="issues/pydantic-v2-migration/pydantic-migration-checking-instructions.md">
# Pydantic V2 Migration Verification Instructions

## Overview
You are tasked with verifying the Pydantic V2 migration changes made to the Langroid codebase. The migration has been completed, and your job is to ensure all changes are correct, comprehensive, and maintain backward compatibility.

## Reference Documents
1. **pydantic-v2-testing.md** - Contains a detailed log of all fixes made during the migration
2. **Git diff** - Review all changes made in the `pydantic-v2-tree` branch

## Verification Tasks

### 1. Review Each Migration Fix
For each fix documented in `pydantic-v2-testing.md`, verify:

#### Fix #1: ModelPrivateAttr Handling
- Check files: `langroid/agent/base.py`, `langroid/agent/chat_agent.py`, `langroid/agent/tools/task_tool.py`
- Verify underscore attributes are properly handled with ModelPrivateAttr checks
- Ensure the pattern `if isinstance(field_info, ModelPrivateAttr)` is used correctly

#### Fix #2: Type Annotations for Field Overrides
- Verify all field overrides include proper type annotations
- Check for `Optional` annotations on nullable fields
- Pattern to verify: `field_name: Type = value` instead of `field_name = value`

#### Fix #3: Tool Class Preservation in ValidationErrors
- Check that tool classes are attached to ValidationError instances
- Verify error handling maintains tool information for better error messages

#### Fix #4: ClassVar Usage
- Verify ClassVar is used for class-level constants in dynamic classes
- Check imports include `from typing import ClassVar`

#### Fix #5: DocMetaData ID Field Validator
- Check `langroid/mytypes.py` for the field validator
- Verify it converts various types (int, float, str) to string
- Check test coverage in `tests/main/test_mytypes.py`

#### Fix #6: Class Config to model_config Migration
- Ensure no `class Config:` patterns remain
- Verify all are replaced with `model_config = ConfigDict(...)` or `model_config = SettingsConfigDict(...)`

#### Fix #7: model_copy Method for Unpicklable Fields
- Check `langroid/language_models/openai_gpt.py`
- Verify the custom `model_copy` method preserves `http_client_factory`, `streamer`, and `streamer_async`

#### Fix #8: ToolMessage llm_function_schema Fallback
- Check `langroid/agent/tool_message.py`
- Verify fallback description when purpose has no default: `f"Tool for {cls.default_value('request')}"`

#### Fix #9: Field Extra Parameters (verbatim=True)
- Verify all `Field(..., verbatim=True)` are replaced with `Field(..., json_schema_extra={"verbatim": True})`
- Check for any remaining direct extra parameters on Field

#### Fix #10: DocMetaData ID Type Coercion
- Verify the field validator in `langroid/mytypes.py`
- Check it maintains backward compatibility for integer IDs

#### Fix #11: parse_obj_as Deprecation
- Check `langroid/parsing/urls.py`
- Verify `TypeAdapter(HttpUrl).validate_python()` is used instead of `parse_obj_as(HttpUrl, ...)`

### 2. Search for Remaining V1 Patterns
Run these searches to ensure no V1 patterns remain:

```bash
# Search for deprecated patterns
rg "parse_obj_as" langroid/ --type py
rg "parse_raw" langroid/ --type py
rg "parse_obj" langroid/ --type py
rg "\.dict\(\)" langroid/ --type py
rg "\.json\(\)" langroid/ --type py
rg "\.copy\(\)" langroid/ --type py
rg "__fields__" langroid/ --type py
rg "__config__" langroid/ --type py
rg "class Config:" langroid/ --type py
```

### 3. Verify V2 Patterns Are Used
Confirm these V2 patterns are in use:

```bash
# Search for V2 patterns
rg "model_dump" langroid/ --type py
rg "model_copy" langroid/ --type py
rg "model_validate" langroid/ --type py
rg "ConfigDict" langroid/ --type py
rg "field_validator" langroid/ --type py
rg "model_validator" langroid/ --type py
```

### 4. Check Import Consistency and Backward Compatibility
- Verify `langroid/pydantic_v1/__init__.py` provides proper backward compatibility:
  - Should issue a DeprecationWarning when imported
  - Should use `pydantic.v1` namespace when available (Pydantic v2 with v1 compatibility)
  - Should fall back to main `pydantic` namespace if v1 namespace not available
- Test the warnings:
  ```bash
  python -c "from langroid.pydantic_v1 import BaseModel" 2>&1 | grep Warning
  ```
- Verify it uses the v1 namespace:
  ```bash
  python -c "import langroid.pydantic_v1 as pv1; print(pv1.BaseModel.__module__)"
  # Should show 'pydantic.v1.main' when using Pydantic v2
  # Should show 'pydantic.main' when using actual Pydantic v1
  ```

### 5. Test Suite Verification
Run comprehensive tests and check for:

```bash
# Run tests and check for deprecation warnings
pytest tests/main/ -xvs 2>&1 | grep -E "PydanticDeprecatedSince20|DeprecationWarning.*pydantic"

# Run specific test files mentioned in the fixes
pytest tests/main/test_tool_messages.py -xvs
pytest tests/main/test_xml_tool_message.py -xvs
pytest tests/main/test_mytypes.py::test_docmetadata_id_conversion -xvs
pytest tests/main/test_openai_http_client.py::test_http_client_creation_with_factory -xvs
```

### 6. Backward Compatibility Checks
Ensure the migration maintains backward compatibility:

1. **DocMetaData accepts integer IDs** - Test that `DocMetaData(id=123)` works
2. **Tool classes without default purpose** - Verify they still work with llm_function_schema
3. **Existing user code patterns** - Consider common usage patterns that should still work
4. **langroid.pydantic_v1 imports** - Verify users can still import from this module with appropriate warnings

### 7. Edge Cases to Verify
- Dynamic class creation with Pydantic models
- Serialization/deserialization of models
- Model inheritance patterns
- Custom validators and their migration
- Settings classes using environment variables
- The `langroid.pydantic_v1` compatibility layer behavior

### 8. Documentation Review
- Check if any documentation needs updating for V2 patterns
- Verify examples use V2 patterns
- Check for any migration guides needed for users
- Ensure the backward compatibility strategy is documented

## Expected Outcomes
1. All tests pass without Pydantic deprecation warnings
2. No V1 patterns remain in the codebase (except in compatibility layer)
3. Backward compatibility is maintained for existing user code
4. The `langroid.pydantic_v1` module correctly provides v1 compatibility when possible
5. Appropriate warnings are issued for deprecated imports

## Red Flags to Watch For
- Any remaining `parse_obj_as`, `parse_raw`, `parse_obj` usage
- Direct `.dict()` or `.json()` calls on Pydantic models
- `class Config:` patterns instead of `model_config`
- Missing type annotations on field overrides
- Broken backward compatibility for common use cases
- Silent failures when users expect v1 behavior

## Final Checklist
- [ ] All 11 documented fixes are correctly implemented
- [ ] No V1 patterns remain (except in compatibility layer)
- [ ] All tests pass without deprecation warnings
- [ ] Backward compatibility is maintained
- [ ] Code follows Pydantic V2 best practices
- [ ] Compatibility layer properly handles v1/v2 distinction
- [ ] Deprecation warnings are clear and helpful
- [ ] No new issues introduced by the migration

## How to Report Findings
Create a report documenting:
1. Each fix verified (pass/fail)
2. Any issues found
3. Suggestions for improvements
4. Overall migration quality assessment
5. Any risks or concerns for production deployment
6. Backward compatibility verification results
</file>

<file path="issues/pydantic-v2-migration/PYRANTIC-V2-MIGRATION-PLAN.md">
# Pydantic v2 Migration Plan

## Executive Summary

This document outlines a systematic approach to migrate Langroid's internal codebase from using the `langroid.pydantic_v1` compatibility layer to native Pydantic v2, while maintaining complete backward compatibility for external users.

**Scope**: 89 files using `langroid.pydantic_v1` imports across the entire codebase
**Timeline**: 7 days (systematic phased approach)
**Risk**: Low (incremental migration with testing at each phase)

## Current State Analysis

### Pydantic Usage Statistics
- **Total files with pydantic_v1 imports**: 89
  - Core langroid modules: 41 files
  - Test files: 11 files  
  - Example files: 37 files
- **Current dependency**: `"pydantic<3.0.0,>=1"` (supports both v1 and v2)

### Key Patterns to Migrate

#### 1. Method Calls (75 total occurrences)
- `.dict()` → `.model_dump()` (39 occurrences)
- `.parse_obj()` → `.model_validate()` (9 occurrences)
- `.parse_raw()` → `.model_validate_json()` (2 occurrences)
- `.json()` → `.model_dump_json()` (4 occurrences)
- `.copy()` → `.model_copy()` (21 occurrences estimated)

#### 2. Configuration Classes (22 occurrences)
```python
# From:
class Config:
    extra = Extra.allow
    validate_assignment = True

# To:
model_config = ConfigDict(extra='allow', validate_assignment=True)
```

#### 3. Validators (2 occurrences)
```python
# From:
@validator('field')
def validate_field(cls, v):
    return v

# To:
@field_validator('field')
@classmethod
def validate_field(cls, v):
    return v
```

#### 4. Import Patterns
```python
# From:
from langroid.pydantic_v1 import BaseModel, Field, BaseSettings

# To:
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings
```

### High-Priority Files for Migration

#### Core Framework (Phase 2a)
1. `langroid/agent/base.py` - Base agent class
2. `langroid/agent/tool_message.py` - Tool message system
3. `langroid/agent/chat_agent.py` - Chat agent implementation
4. `langroid/agent/task.py` - Task execution system

#### Language Models (Phase 2b)
1. `langroid/language_models/openai_gpt.py` - OpenAI integration
2. `langroid/language_models/base.py` - Base LLM classes
3. `langroid/language_models/azure_openai.py` - Azure integration
4. Other LLM provider files (8 total)

#### Vector Stores (Phase 2c)
1. `langroid/vector_store/base.py` - Base vector store
2. `langroid/vector_store/qdrant.py` - Qdrant integration
3. `langroid/vector_store/chroma.py` - Chroma integration
4. Other vector store implementations (12 total)

## Migration Plan

### Phase 1: Infrastructure Setup (Day 1)

#### 1.1 Update Dependencies
- **File**: `pyproject.toml`
- **Changes**:
  ```toml
  # From:
  pydantic = "<3.0.0,>=1"
  
  # To:
  pydantic = "^2.0.0"
  pydantic-settings = "^2.0.0"
  ```

#### 1.2 Create Migration Scripts
- **Script 1**: `scripts/migrate_pydantic_imports.py` - Automated import replacement
- **Script 2**: `scripts/migrate_pydantic_methods.py` - Method call migration
- **Script 3**: `scripts/migrate_pydantic_configs.py` - Config class migration
- **Script 4**: `scripts/validate_migration.py` - Verification script

#### 1.3 Baseline Testing
- Run complete test suite: `pytest tests/`
- Document current test results
- Identify any existing Pydantic-related test failures

### Phase 2: Core Framework Migration (Days 2-4)

#### Phase 2a: Base Classes (Day 2)
**Files to migrate** (2 files):
1. `langroid/agent/base.py`
2. `langroid/agent/tool_message.py`

**Migration steps**:
1. Replace `langroid.pydantic_v1` imports with native Pydantic v2
2. Update `.dict()` calls to `.model_dump()`
3. Update `.parse_obj()` calls to `.model_validate()`
4. Convert Config classes to `model_config = ConfigDict()`
5. Run targeted tests: `pytest tests/main/test_agent.py tests/main/test_tool_message.py`

#### Phase 2b: Chat Agent Core (Day 3)
**Files to migrate** (2 files):
1. `langroid/agent/chat_agent.py`
2. `langroid/agent/task.py`

**Migration steps**:
1. Import migration
2. Method call updates (heavy `.dict()` usage in chat_agent.py)
3. Config class updates
4. Run targeted tests: `pytest tests/main/test_chat_agent.py tests/main/test_task.py`

#### Phase 2c: Language Models (Day 4a)
**Files to migrate** (8 files):
1. `langroid/language_models/openai_gpt.py` (highest priority)
2. `langroid/language_models/base.py`
3. `langroid/language_models/azure_openai.py`
4. Other LLM provider files

**Migration steps**:
1. Focus on `.parse_obj()` calls (common in LLM response parsing)
2. Update configuration classes
3. Run targeted tests: `pytest tests/main/test_llm.py`

#### Phase 2d: Vector Stores (Day 4b)
**Files to migrate** (12 files):
1. `langroid/vector_store/base.py`
2. `langroid/vector_store/qdrant.py`
3. `langroid/vector_store/chroma.py`
4. Other vector store implementations

**Migration steps**:
1. Heavy focus on `.dict()` calls (document serialization)
2. Update configuration patterns
3. Run targeted tests: `pytest tests/main/test_vector_store.py`

### Phase 3: Tests & Examples (Day 5)

#### Phase 3a: Test Files (Day 5a)
**Files to migrate** (11 files):
- All test files with `langroid.pydantic_v1` imports
- Focus on test utilities and fixtures

**Migration steps**:
1. Import migration
2. Update test assertion patterns
3. Run individual test files after migration

#### Phase 3b: Example Files (Day 5b)
**Files to migrate** (37 files):
- All example files in `examples/` directory
- Focus on quick-start examples first

**Migration steps**:
1. Import migration
2. Update example patterns
3. Run examples to verify functionality

### Phase 4: Compatibility Layer Update (Day 6)

#### 4.1 Update Compatibility Layer
**Files to modify**:
- `langroid/pydantic_v1/__init__.py`
- `langroid/pydantic_v1/main.py`

**Changes**:
```python
# Update to always import from Pydantic v2
from pydantic import BaseModel, Field, ValidationError
from pydantic_settings import BaseSettings
# Add deprecation warnings for external users
```

#### 4.2 Add Deprecation Warnings
- Add warnings for external users still importing from `langroid.pydantic_v1`
- Document migration path for external users

### Phase 5: Final Validation (Day 7)

#### 5.1 Comprehensive Testing
- Run complete test suite: `pytest tests/`
- Run with coverage: `pytest --cov=langroid tests/`
- Performance benchmarking comparison

#### 5.2 Verification Checklist
- [ ] All 89 files migrated from `langroid.pydantic_v1`
- [ ] Zero test failures
- [ ] All examples run successfully
- [ ] Backward compatibility maintained
- [ ] Performance improvements measurable
- [ ] Documentation updated

#### 5.3 Migration Verification Report
Create final report documenting:
- Files migrated and patterns updated
- Test results comparison
- Performance improvements
- Backward compatibility verification
- Any issues encountered and resolved

## Migration Patterns Reference

### Import Migrations
```python
# Before
from langroid.pydantic_v1 import BaseModel, Field, BaseSettings, ValidationError

# After
from pydantic import BaseModel, Field, ValidationError
from pydantic_settings import BaseSettings
```

### Method Call Migrations
```python
# Before
data = model.dict()
obj = Model.parse_obj(data)
json_str = model.json()
copy_obj = model.copy()

# After
data = model.model_dump()
obj = Model.model_validate(data)
json_str = model.model_dump_json()
copy_obj = model.model_copy()
```

### Config Class Migrations
```python
# Before
class MyModel(BaseModel):
    field: str
    
    class Config:
        extra = Extra.allow
        validate_assignment = True

# After
class MyModel(BaseModel):
    field: str
    
    model_config = ConfigDict(extra='allow', validate_assignment=True)
```

### Validator Migrations
```python
# Before
@validator('field')
def validate_field(cls, v):
    return v

# After
@field_validator('field')
@classmethod
def validate_field(cls, v):
    return v
```

## Risk Mitigation Strategies

### 1. Incremental Migration
- Migrate files in logical groups
- Test after each group
- Maintain rollback capability

### 2. Backward Compatibility
- Preserve all existing APIs
- No changes to public interfaces
- Compatibility layer remains functional

### 3. Comprehensive Testing
- Run tests after each migration phase
- Focus on integration tests
- Performance regression testing

### 4. Documentation
- Update migration status in real-time
- Document any breaking changes discovered
- Create troubleshooting guide

## Success Metrics

### Primary Metrics
- **Migration Coverage**: 100% of files migrated from `langroid.pydantic_v1`
- **Test Success Rate**: 100% of existing tests pass
- **Backward Compatibility**: Zero breaking changes for external users

### Secondary Metrics
- **Performance Improvement**: Measurable speed improvements
- **Memory Usage**: Reduced memory footprint
- **Code Quality**: Cleaner, more maintainable code

## Rollback Plan

If critical issues are discovered:
1. **Immediate**: Revert specific file changes
2. **Temporary**: Maintain both old and new patterns
3. **Final**: Complete rollback to compatibility layer only

## Post-Migration Tasks

### 1. Documentation Updates
- Update README with Pydantic v2 requirements
- Update contribution guidelines
- Create migration guide for external users

### 2. Future Cleanup
- Plan removal of compatibility layer (future version)
- Adopt Pydantic v2-only features
- Performance optimization opportunities

### 3. Communication
- Announce migration completion
- Provide migration support for users
- Update examples and tutorials

## Conclusion

This migration plan provides a systematic, low-risk approach to migrating Langroid from Pydantic v1 to v2. The phased approach ensures thorough testing at each stage while maintaining complete backward compatibility for external users.

The migration will unlock performance improvements, future-proof the codebase, and eliminate the maintenance burden of the compatibility layer while preserving all existing functionality.
</file>

<file path="issues/898-implementation.md">
# Issue #898: OpenAI HTTP Client Support for SSL Certificate Verification

## Table of Contents
1. [Problem Statement](#problem-statement)
2. [Solution Overview](#solution-overview)
3. [Implementation Plan](#implementation-plan)
4. [Implementation Details](#implementation-details)
5. [Rationale and Design Decisions](#rationale-and-design-decisions)
6. [Code Changes](#code-changes)
7. [Testing Strategy](#testing-strategy)
8. [Security Considerations](#security-considerations)
9. [Performance Analysis](#performance-analysis)
10. [Usage Examples](#usage-examples)
11. [Migration Guide](#migration-guide)
12. [Future Considerations](#future-considerations)

## Problem Statement

Users in corporate environments often face SSL certificate verification errors when using OpenAI models through Langroid due to:
- Self-signed certificates
- Corporate proxy servers with custom CA certificates
- Network security appliances that intercept HTTPS traffic

The original implementation allowed custom HTTP clients via `http_client_factory`, but these clients were not cached, leading to:
- Resource exhaustion from multiple client instances
- Performance degradation
- Potential connection pool exhaustion

## Solution Overview

We implemented a three-tier HTTP client configuration system:

1. **Simple SSL Bypass** (`http_verify_ssl=False`) - Quick, cacheable
2. **HTTP Client Configuration** (`http_client_config`) - Moderate flexibility, cacheable
3. **Custom HTTP Client Factory** (`http_client_factory`) - Maximum flexibility, not cacheable

This approach balances performance (through caching) with flexibility (through custom factories).

## Implementation Plan

### Initial Analysis
1. **OpenAIGPT class** (in `openai_gpt.py`) creates OpenAI/AsyncOpenAI clients in two ways:
   - Using cached clients via `get_openai_client()` and `get_async_openai_client()`
   - Creating new clients directly

2. **Client caching** (in `client_cache.py`) prevents resource exhaustion by reusing clients based on configuration parameters, but didn't support `http_client` parameter.

3. The OpenAI Python SDK supports an `http_client` parameter in its constructor that accepts an httpx.Client instance.

### Proposed Solution Components

1. **Update OpenAIGPTConfig**: Add configuration parameters for HTTP client customization
2. **Update Client Cache Functions**: Support HTTP client parameters while maintaining caching benefits
3. **Update OpenAIGPT Initialization**: Implement priority logic for different configuration options
4. **Handle SSL Verification Use Case**: Provide simple flag for common SSL bypass scenario

## Implementation Details

### 1. Configuration Schema

```python
class OpenAIGPTConfig(LLMConfig):
    # Existing fields...
    
    # New/Modified fields:
    http_client_factory: Optional[Callable[[], Any]] = None  # Factory for httpx.Client
    http_verify_ssl: bool = True  # Simple flag for SSL verification
    http_client_config: Optional[Dict[str, Any]] = None  # Config dict for httpx.Client
```

### 2. Priority Order Logic

In `OpenAIGPT.__init__`:

```python
# Priority order:
# 1. http_client_factory (most flexibility, not cacheable)
# 2. http_client_config (cacheable, moderate flexibility)
# 3. http_verify_ssl=False (cacheable, simple SSL bypass)

http_client = None
async_http_client = None
http_client_config_used = None

if self.config.http_client_factory is not None:
    # Use the factory to create http_client (not cacheable)
    http_client = self.config.http_client_factory()
    async_http_client = http_client  # Assume it works for both
elif self.config.http_client_config is not None:
    # Use config dict (cacheable)
    http_client_config_used = self.config.http_client_config
elif not self.config.http_verify_ssl:
    # Simple SSL bypass (cacheable)
    http_client_config_used = {"verify": False}
    logging.warning("SSL verification has been disabled...")
```

### 3. Client Caching Enhancement

Updated `client_cache.py` to support configuration-based client creation:

```python
def get_openai_client(
    api_key: str,
    base_url: Optional[str] = None,
    organization: Optional[str] = None,
    timeout: Union[float, Timeout] = 120.0,
    default_headers: Optional[Dict[str, str]] = None,
    http_client: Optional[Any] = None,
    http_client_config: Optional[Dict[str, Any]] = None,
) -> OpenAI:
    # If http_client is provided directly, don't cache
    if http_client is not None:
        # ... create and return uncached client
    
    # If http_client_config is provided, create client from config and cache
    created_http_client = None
    if http_client_config is not None:
        from httpx import Client
        created_http_client = Client(**http_client_config)
    
    # Include config in cache key for proper caching
    cache_key = _get_cache_key(
        "openai",
        api_key=api_key,
        base_url=base_url,
        organization=organization,
        timeout=timeout,
        default_headers=default_headers,
        http_client_config=http_client_config,
    )
    
    # ... rest of caching logic
```

## Rationale and Design Decisions

### Why Three Options?

1. **http_verify_ssl=False**
   - **Use Case**: Quick fix for development or known secure environments
   - **Pros**: Simple, one-line change
   - **Cons**: All-or-nothing approach
   - **Cacheable**: Yes

2. **http_client_config**
   - **Use Case**: Common corporate scenarios (proxy, custom CA, timeouts)
   - **Pros**: Declarative, cacheable, covers 90% of use cases
   - **Cons**: Limited to static configuration
   - **Cacheable**: Yes

3. **http_client_factory**
   - **Use Case**: Complex scenarios (dynamic auth, event hooks, custom transports)
   - **Pros**: Complete control over client creation
   - **Cons**: Not cacheable, requires more code
   - **Cacheable**: No

### Why Not Cache Factory-Created Clients?

- Factory functions may create clients with stateful behavior
- Dynamic configuration based on runtime conditions
- Event hooks or callbacks that shouldn't be shared
- User expectation: factories create fresh instances

### Cache Key Design

The cache key includes `http_client_config` to ensure:
- Different configurations get different cached clients
- Same configuration reuses the same client
- Prevents configuration conflicts

## Code Changes

### Files Modified

1. **langroid/language_models/openai_gpt.py**
   - Added `http_client_config` field to `OpenAIGPTConfig`
   - Implemented three-tier priority logic in `__init__`
   - Updated client creation for both cached and non-cached paths

2. **langroid/language_models/client_cache.py**
   - Added `http_client_config` parameter to cache functions
   - Implemented client creation from config
   - Updated cache key generation to include config

3. **tests/main/test_openai_http_client.py**
   - Added tests for `http_client_config`
   - Added priority order tests
   - Updated integration test to cover all three options

4. **docs/tutorials/ssl-configuration.md**
   - Documented all three configuration options
   - Added examples and use cases
   - Included security warnings and best practices

## Testing Strategy

### Unit Tests

1. **Configuration Tests**:
   - Test that `http_verify_ssl` configuration is properly set
   - Test that `http_client_factory` can be configured
   - Test that `http_client_config` can be configured

2. **Priority Tests**:
   - Test that `http_client_factory` takes priority over `http_client_config`
   - Test that configuration options work as expected

3. **Client Creation Tests**:
   - Test that HTTP client is created from factory
   - Test that `http_verify_ssl=False` creates appropriate clients
   - Test that `http_client_config` creates cacheable clients

### Integration Test

Since we cannot reliably reproduce SSL certificate issues in a standard test environment, we implemented:

1. **Local HTTPS Server with Self-Signed Certificate**
   - Set up a local HTTPS server with a self-signed certificate
   - Test that connections fail with `http_verify_ssl=True` (default)
   - Test that connections succeed with `http_verify_ssl=False`
   - Test that `http_client_config={"verify": False}` also works
   - This simulates the user's SSL verification issues

2. **Test Implementation**:
```python
@pytest.mark.skipif(
    os.getenv("CI") == "true",
    reason="Integration test with local HTTPS server - skipped in CI",
)
def test_ssl_verification_enabled_fails(self):
    """Test SSL verification behavior with self-signed certificate."""
    # Create self-signed certificate
    # Start HTTPS server
    # Test 1: Default behavior (SSL verification enabled) should fail
    # Test 2: With SSL verification disabled, should get to API error
    # Test 3: With http_client_config, should also bypass SSL
```

### Test Results

All tests pass:
- Unit tests verify configuration options work correctly
- Integration test with self-signed certificate verifies SSL bypass functionality
- Tests are designed to run locally (integration test skipped in CI with `CI=true`)

## Security Considerations

### SSL Verification Warnings

When SSL verification is disabled, a warning is logged:
```
SSL verification has been disabled. This is insecure and should only be used in trusted environments (e.g., corporate networks with self-signed certificates).
```

### Documentation Warnings

The documentation includes prominent security warnings:
- Never disable SSL verification in production unless absolutely necessary
- Use custom CA bundles instead of disabling verification
- Ensure you're only connecting to known, trusted endpoints

### Recommended Approach

For corporate environments, we recommend:
```python
# Better: Use custom CA bundle
config = OpenAIGPTConfig(
    http_client_config={
        "verify": "/path/to/corporate-ca-bundle.pem"
    }
)

# Instead of: Disabling verification entirely
config = OpenAIGPTConfig(
    http_verify_ssl=False  # Avoid this in production
)
```

## Performance Analysis

### Client Caching Benefits

**Before (only http_client_factory)**:
- Each `OpenAIGPT` instance creates a new HTTP client
- No sharing between instances
- Resource usage: O(n) where n = number of instances

**After (with http_client_config)**:
- Clients with same config share cached instance
- Resource usage: O(k) where k = number of unique configs
- Typical improvement: 10x-100x reduction in client instances

### Benchmark Results

```python
# Pseudo-benchmark showing the improvement
# Creating 100 agents with same config

# Old approach (factory only):
for i in range(100):
    agent = ChatAgent(config)  # 100 HTTP clients created

# New approach (config):
for i in range(100):
    agent = ChatAgent(config)  # 1 HTTP client created and reused
```

## Usage Examples

### Simple SSL Bypass (Quick Solution)
```python
import langroid.language_models as lm

config = lm.OpenAIGPTConfig(
    chat_model="gpt-4",
    http_verify_ssl=False  # Disables SSL verification
)

# Use with an agent
agent = lr.ChatAgent(lr.ChatAgentConfig(llm=config))
```

### HTTP Client Configuration (Moderate Control, Cacheable)
```python
import langroid.language_models as lm

# Configure HTTP client with a dictionary
config = lm.OpenAIGPTConfig(
    chat_model="gpt-4",
    http_client_config={
        "verify": False,  # or path to CA bundle: "/path/to/ca-bundle.pem"
        "proxy": "http://proxy.company.com:8080",
        "timeout": 30.0,
        "headers": {
            "User-Agent": "MyApp/1.0"
        }
    }
)

# This configuration is cacheable - multiple agents can share the same client
agent1 = lr.ChatAgent(lr.ChatAgentConfig(llm=config))
agent2 = lr.ChatAgent(lr.ChatAgentConfig(llm=config))  # Reuses cached client
```

### Custom HTTP Client Factory (Maximum Control)
```python
from httpx import Client
import langroid.language_models as lm

def create_custom_client():
    """Factory function to create a custom HTTP client."""
    # Can include complex logic, event hooks, custom auth, etc.
    client = Client(
        verify=False,  # or provide path to custom CA bundle
        proxies={
            "https": "http://proxy.company.com:8080"
        },
        timeout=30.0
    )
    
    # Add event hooks for logging, monitoring, etc.
    def log_request(request):
        print(f"Request: {request.method} {request.url}")
    
    def log_response(response):
        print(f"Response: {response.status_code}")
    
    client.event_hooks = {
        "request": [log_request],
        "response": [log_response]
    }
    
    return client

# Use the custom client factory (not cacheable)
config = lm.OpenAIGPTConfig(
    chat_model="gpt-4",
    http_client_factory=create_custom_client
)
```

### Corporate Proxy with Custom CA Bundle
```python
import langroid.language_models as lm

# Better approach: Use custom CA bundle instead of disabling verification
config = lm.OpenAIGPTConfig(
    chat_model="gpt-4",
    http_client_config={
        "verify": "/path/to/corporate-ca-bundle.pem",
        "proxies": {
            "http": "http://proxy.corp.com:8080",
            "https": "http://proxy.corp.com:8080"
        },
        "headers": {
            "Proxy-Authorization": "Basic <encoded-credentials>"
        }
    }
)
```

### Development/Testing with Local API Server
```python
import langroid.language_models as lm

# For local development with self-signed certificates
config = lm.OpenAIGPTConfig(
    chat_model="gpt-4",
    api_base="https://localhost:8443/v1",
    http_verify_ssl=False  # OK for local development
)
```

## Migration Guide

### For Users Currently Using http_client_factory

**Assess if you need factory flexibility:**

Simple cases can migrate to `http_client_config`:
```python
# Before:
def create_client():
    return httpx.Client(verify=False, proxy="http://proxy:8080")

config = OpenAIGPTConfig(http_client_factory=create_client)

# After (cacheable):
config = OpenAIGPTConfig(
    http_client_config={
        "verify": False,
        "proxy": "http://proxy:8080"
    }
)
```

Complex cases should keep using factory:
```python
# Keep using factory for:
# - Dynamic configuration
# - Event hooks
# - Custom authentication
# - Stateful clients
```

### For New Users

Start with the simplest option that meets your needs:

1. **Just need to bypass SSL?** Use `http_verify_ssl=False`
2. **Need proxy or custom settings?** Use `http_client_config`
3. **Need complex behavior?** Use `http_client_factory`

## Future Considerations

### Potential Enhancements

1. **Async Client Configuration**: Currently, async clients mirror sync client config. Future versions could support separate async configuration.

2. **Per-Request Options**: Support for request-level HTTP client options without creating new clients.

3. **Connection Pool Management**: Expose connection pool settings in `http_client_config`.

4. **Metrics and Monitoring**: Add hooks for monitoring cached vs. uncached client usage.

### Breaking Changes

None. All changes are additive and maintain backward compatibility.

### Deprecation Strategy

No deprecations planned. All three options serve different use cases and will be maintained.

## Summary

This implementation successfully addresses the SSL certificate verification issue (#898) while introducing a sophisticated client caching system. The key achievements are:

1. **Three-Tier Solution**: Users can choose between simple SSL bypass, configuration-based clients (cacheable), or custom factories based on their needs.

2. **Performance Improvement**: Common configurations now benefit from client caching, reducing resource consumption by 10x-100x in typical multi-agent scenarios.

3. **Backward Compatibility**: All existing code continues to work without modification.

4. **Security by Default**: SSL verification remains enabled by default with clear warnings when disabled.

5. **Comprehensive Testing**: Unit tests, integration tests with self-signed certificates, and clear testing strategy for SSL scenarios.

The solution balances simplicity for common use cases with flexibility for complex enterprise requirements, making Langroid more accessible to users in corporate environments while maintaining security best practices.

## Acknowledgments

This implementation was developed to address Issue #898 reported by users experiencing SSL certificate verification errors in corporate environments. The solution evolved from initial HTTP client factory support to a comprehensive three-tier system based on feedback about resource exhaustion from uncached clients.
</file>

<file path="issues/html-logger-implementation.md">
# HTML Logger Implementation Plan

## Overview

This document outlines the technical implementation plan for adding an HTML logger
to Langroid's task system. The implementation will create self-contained HTML files
with collapsible log entries, following the specification in `html-logger.md`.

## Architecture

### 1. Core Components

#### 1.1 HTMLLogger Class
Create a new logger class that inherits from or follows the pattern of existing loggers:

```python
class HTMLLogger:
    def __init__(self, filename: str, log_dir: str = "logs"):
        self.file_path = Path(log_dir) / f"{filename}.html"
        self.entries = []
        self._write_header()
    
    def log(self, fields: ChatDocLoggerFields):
        """Add a log entry"""
        entry = self._format_entry(fields)
        self.entries.append(entry)
        self._append_to_file(entry)
    
    def close(self):
        """Finalize the HTML file"""
        self._write_footer()
```

#### 1.2 HTML Template Structure
The HTML file will have three main sections:

1. **Header Section**: Static CSS, JavaScript, and page header
2. **Content Section**: Dynamic log entries
3. **Footer Section**: Closing tags and finalization

### 2. Implementation Steps

#### Step 1: Create HTML Logger Foundation
1. Add `html_logger.py` in `langroid/agent/logging/`
2. Define the `HTMLLogger` class with basic file handling
3. Implement HTML header generation with embedded CSS and JavaScript

#### Step 2: Integrate with Task System
1. Modify `init_loggers` method in `task.py` to include HTML logger option
2. Add configuration flag (e.g., `enable_html_logging` in TaskConfig)
3. Update `log_message` method to send data to HTML logger

#### Step 3: Implement HTML Generation
1. Create entry formatting logic that converts ChatDocLoggerFields to HTML
2. Implement hierarchical structure for collapsible sections
3. Add proper escaping for HTML special characters

#### Step 4: Add JavaScript Functionality
1. Implement toggle functionality for collapsible sections
2. Add "Expand All" / "Collapse All" controls
3. Ensure smooth animations and state management

### 3. Detailed Component Design

#### 3.1 HTML Header Template
```python
HTML_HEADER = """<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>{task_name} - Langroid Task Log</title>
    <style>
        body {{
            background-color: #2b2b2b;
            color: #f0f0f0;
            font-family: 'Consolas', 'Monaco', monospace;
            margin: 0;
            padding: 20px;
        }}
        .header {{
            border: 2px solid #d4a017;
            padding: 10px;
            margin-bottom: 20px;
            color: #d4a017;
        }}
        .entry {{
            margin-bottom: 10px;
            border-left: 3px solid transparent;
        }}
        .entry.user {{ border-left-color: #00ff00; }}
        .entry.assistant {{ border-left-color: #ff6b6b; }}
        .toggle {{
            cursor: pointer;
            user-select: none;
            color: #00ff00;
        }}
        .collapsed .content {{ display: none; }}
        /* More styles... */
    </style>
    <script>
        function toggle(id) {{
            const element = document.getElementById(id);
            element.classList.toggle('collapsed');
            const toggle = element.querySelector('.toggle');
            toggle.textContent = element.classList.contains('collapsed') ? '[+]' : '[-]';
        }}
        /* More JavaScript... */
    </script>
</head>
<body>
    <div class="header">
        <div>{model_info}</div>
        <div>{timestamp} - {message_count} messages</div>
    </div>
    <div id="controls">
        <button onclick="expandAll()">Expand All</button>
        <button onclick="collapseAll()">Collapse All</button>
    </div>
    <div id="content">
"""
```

#### 3.2 Entry Generation Logic
```python
def _format_entry(self, fields: ChatDocLoggerFields) -> str:
    """Convert log fields to HTML entry"""
    entry_id = f"entry_{len(self.entries)}"
    entity_type = fields.responder.upper()
    
    # Build hierarchical structure
    html_parts = [f'<div class="entry {entity_type.lower()}" id="{entry_id}">']
    
    # Add entity header
    if fields.task_name and fields.task_name != "root":
        html_parts.append(f'<div class="entity-header">{fields.task_name} → {entity_type}</div>')
    else:
        html_parts.append(f'<div class="entity-header">{entity_type}</div>')
    
    # Add collapsible sections
    if fields.tool:
        html_parts.append(self._format_tool_section(fields))
    
    # Add main content
    if fields.content:
        html_parts.append(self._format_content_section(fields.content))
    
    html_parts.append('</div>')
    return '\n'.join(html_parts)
```

#### 3.3 Tool Section Formatting
```python
def _format_tool_section(self, fields: ChatDocLoggerFields) -> str:
    """Format tool calls with proper nesting"""
    tool_id = f"tool_{self.tool_counter}"
    self.tool_counter += 1
    
    # Parse tool information
    tool_name = fields.tool
    tool_type = fields.tool_type
    
    # Build tool section HTML
    return f"""
    <div class="tool-section">
        <div class="toggle" onclick="toggle('{tool_id}')">[+]</div>
        <span class="tool-name">{tool_name}({self._format_tool_params(fields)})</span>
        <div id="{tool_id}" class="tool-content collapsed">
            <!-- Tool result and raw call details -->
        </div>
    </div>
    """
```

### 4. Integration Points

#### 4.1 Task Configuration
Add to `TaskConfig`:
```python
class TaskConfig(BaseModel):
    # ... existing fields ...
    enable_html_logging: bool = True
    html_log_dir: str = "logs"
```

#### 4.2 Logger Initialization
Modify `init_loggers` in `task.py`:
```python
def init_loggers(self, tsv_formatter: logging.Formatter | None = None) -> None:
    # ... existing logger setup ...
    
    if self.config.enable_html_logging:
        from langroid.agent.logging.html_logger import HTMLLogger
        self.html_logger = HTMLLogger(
            filename=self.name or "root",
            log_dir=self.config.html_log_dir
        )
```

#### 4.3 Message Logging
Update `log_message` method:
```python
def log_message(self, resp: ChatDocument) -> None:
    # ... existing logging ...
    
    if hasattr(self, 'html_logger') and self.html_logger:
        fields = ChatDocLoggerFields.create(resp, self.id, self.name)
        self.html_logger.log(fields)
```

### 5. Testing Strategy

#### 5.1 Unit Tests
1. Test HTML generation for various message types
2. Test proper escaping of special characters
3. Test file creation and writing
4. Test JavaScript functionality (via parsing)

#### 5.2 Integration Tests
1. Test with simple single-agent tasks
2. Test with multi-agent tasks and sub-tasks
3. Test with various tool types
4. Test with long-running conversations

#### 5.3 Manual Testing
1. Verify visual appearance matches specification
2. Test collapsible functionality in browsers
3. Test performance with large logs
4. Verify accessibility features

### 6. Implementation Timeline

1. **Phase 1**: Core HTML logger class and basic integration (2-3 hours)
2. **Phase 2**: HTML generation with proper styling (2-3 hours)
3. **Phase 3**: JavaScript functionality and interactivity (1-2 hours)
4. **Phase 4**: Testing and refinement (1-2 hours)

### 7. Key Considerations

#### 7.1 Performance
- Stream writes to avoid memory buildup
- Efficient string concatenation
- Minimal JavaScript for responsiveness

#### 7.2 Security
- Proper HTML escaping to prevent XSS
- No external dependencies (self-contained)
- Safe file path handling

#### 7.3 Compatibility
- Test across major browsers
- Ensure proper UTF-8 encoding
- Handle special characters in content

#### 7.4 Edge Cases
- Empty messages
- Very long content
- Special characters in tool names
- Malformed tool responses
- System messages without content

### 8. File Structure

```
langroid/
├── agent/
│   ├── logging/
│   │   ├── __init__.py
│   │   ├── html_logger.py  # New file
│   │   └── ...
│   └── task.py  # Modified
└── tests/
    └── main/
        └── test_html_logger.py  # New test file
```

### 9. Future Extensions

While out of scope for initial implementation, consider:
- Configuration for color themes
- Export functionality
- Search within logs
- Performance optimizations for very large logs
- Real-time streaming updates

### 10. Success Criteria

The implementation will be considered successful when:
1. HTML logs are generated alongside existing logs
2. All log information is preserved and accessible
3. Collapsible sections work smoothly
4. Visual design matches specification
5. No performance impact on task execution
6. Tests pass and edge cases are handled
</file>

<file path="issues/html-logger.md">
# HTML Logger Specification for Langroid Task System

## Overview

This document specifies the requirements for a new HTML logger that will enhance
the current logging system in Langroid's task.py module. The HTML logger will
produce self-contained HTML files with collapsible entries, providing a more
user-friendly way to navigate complex multi-agent conversations.

## Current State

The Langroid task system currently supports two logging formats:
1. **TSV Logger**: Tab-separated values for structured data analysis
2. **Plain Text Logger**: Rich-formatted text logs with color coding

Both loggers capture comprehensive information about agent interactions, including
task names, responders, message content, and tool usage.

## Requirements

### 1. Output Format

- **File Type**: Self-contained HTML file with embedded CSS and JavaScript
- **File Extension**: `.html`
- **File Naming**: Same pattern as existing loggers: `{task_name}.html`
- **Encoding**: UTF-8

### 2. Visual Structure

#### 2.1 Overall Layout
- Dark theme with dark gray/black background (#2b2b2b or similar)
- Monospace font for consistency with terminal output
- Fixed header showing model info and timestamp
- Responsive design that works on various screen sizes
- Golden/amber accent color for headers and borders (#d4a017 or similar)

#### 2.2 Fixed Header Section
- Model name and version (e.g., "claude-opus-4-20250514")
- Timestamp of log generation
- Total message count
- Styled with golden border and text

#### 2.3 Collapsible Entries
Each log entry must be collapsible with:
- **Collapsed State**: Shows only the entity type/role
- **Expanded State**: Shows full message content with sub-sections
- **Toggle Control**: [+] and [-] text indicators in square brackets

#### 2.4 Entry Structure
Each entry consists of:
- **Role Header**: Entity type in colored uppercase (USER, ASSISTANT, SYSTEM, etc.)
- **Collapsible Sections**: Each with [+]/[-] toggle:
  - System Prompt (if applicable)
  - Tools (with count)
  - System Reminder (if applicable)
  - Main content

#### 2.5 Color Scheme
- **USER**: Green text (#00ff00 or similar)
- **ASSISTANT**: Red/orange text (#ff6b6b or similar)
- **SYSTEM**: Gray text
- **Tool calls**: Green indicators for [+] toggles
- **Tool results**: Success (✓) in green, Error (✗) in red
- **Code blocks**: Dark background with syntax highlighting

#### 2.6 Tool Display
When expanded, tool calls should show:
- Tool name and parameters in a code block
- Tool result with success/error indicator
- Raw tool call details (collapsible sub-section)

Example structure:
```
ASSISTANT
[+] System Reminder

I'll read the langroid-llms.txt file to see what it contains.

  [+] Read(/Users/pchalasani/Git/claude-code-play/langroid-llms.txt)
  [+] Tool Result ✓
  [-] Raw Tool Call
      {
        "type": "tool_use",
        "id": "toolu_0184van1ug4T6kAj7a8SkaKp",
        "name": "Read",
        "input": {
          "file_path": "/Users/pchalasani/Git/claude-code-play/langroid-llms.txt"
        }
      }
```

### 3. Functionality

#### 3.1 User Controls
- **Expand/Collapse Individual**: Click on entry header or toggle button
- **Expand All**: Button to expand all entries
- **Collapse All**: Button to collapse all entries
- **Search**: Basic text search functionality (optional enhancement)

#### 3.2 State Persistence
- Collapse/expand state should be maintained during the session
- No requirement for persistence across page reloads

### 4. Data Representation

The HTML logger should capture all information currently logged by the plain
text logger and organize it hierarchically:

#### 4.1 Primary Level (Always Visible)
- Entity/Role name (USER, ASSISTANT, AGENT, etc.)
- Task name prefix if not "root"

#### 4.2 Collapsible Sections
Each entry may have multiple collapsible sub-sections:
- **System Messages**: System prompts, reminders, etc.
- **Tool Information**: 
  - Tool count in header (e.g., "Tools (17)")
  - Individual tool calls with name and parameters
  - Tool results with success/error indicators
  - Raw tool call JSON (nested collapsible)
- **Message Content**: The actual text content
- **Metadata** (when relevant):
  - Recipient information
  - Blocked entities
  - Mark indicator for final results

#### 4.3 Mapping from Current Log Fields
- `responder` → Entity type (USER, ASSISTANT, etc.)
- `task_name` → Prefix before entity if not "root"
- `sender` + `sender_name` → Combined in display
- `tool_type` + `tool` → Tool section with appropriate formatting
- `content` → Main message content
- `mark` → Special indicator for final results
- `recipient` → Shown in metadata when present
- `block` → Shown in metadata when present

### 5. Integration Requirements

#### 5.1 Implementation Location
- Add to the existing `init_loggers` method in task.py
- Follow the same pattern as TSV and plain text loggers
- Use the same log directory and naming conventions

#### 5.2 Configuration
- HTML logger should be optional
- Controlled via configuration flag or environment variable
- Should not interfere with existing loggers

#### 5.3 Compatibility
- Must work with the existing `log_message` method
- Support the same ChatDocLoggerFields structure
- Handle sub-tasks transparently (no special handling needed)

### 6. Performance Considerations

- Efficient for files with thousands of log entries
- Minimal JavaScript for toggle functionality
- CSS animations should be optional or lightweight
- File size should remain reasonable for large conversations

### 7. Accessibility

- Keyboard navigation support for expanding/collapsing entries
- Clear visual indicators for interactive elements
- Sufficient color contrast for readability
- Screen reader compatible structure

## Example Visual Mock-up

```
claude-opus-4-20250514
7/8/2025, 12:00:50 PM   8 messages
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

[+] System Prompt
[+] Tools (17)

USER
[+] System Reminder

Use the read tool to read the langroid-llms.txt and see what it is about

[+] System Reminder

ASSISTANT

I'll read the langroid-llms.txt file to see what it contains.

  [+] Read(/Users/pchalasani/Git/claude-code-play/langroid-llms.txt)
  [+] Tool Result ✗
  [-] Raw Tool Call
      {
        "type": "tool_use",
        "id": "toolu_0184van1ug4T6kAj7a8SkaKp",
        "name": "Read",
        "input": {
          "file_path": "/Users/pchalasani/Git/claude-code-play/langroid-llms.txt"
        }
      }

ASSISTANT

The file is quite large (3.3MB). Let me read it in smaller chunks to understand its content.

  [-] Read(/Users/pchalasani/Git/claude-code-play/langroid-llms.txt)
      {
        "file_path": "/Users/pchalasani/Git/claude-code-play/langroid-llms.txt",
        "limit": 100
      }
  [+] Tool Result ✓
  [+] Raw Tool Call
```

## Future Enhancements (Out of Scope)

These features are not required for the initial implementation but could be
added later:
- Filtering by entity type, task name, or tool
- Export to other formats
- Real-time log streaming
- Syntax highlighting for code in messages
- Timestamp display options
- Log entry grouping by conversation threads
</file>

<file path="issues/llm-client-caching-phase1-summary.md">
# Phase 1 Implementation Summary: Client Caching

## Changes Made

### 1. Created `langroid/language_models/client_cache.py`

A new module implementing singleton pattern for LLM clients with the following features:

- **Consistent with existing caching**: Uses SHA256 hashing for cache keys, matching the approach in `OpenAIGPT._cache_lookup`
- **Wrapper functions** for each client type:
  - `get_openai_client()` / `get_async_openai_client()`
  - `get_groq_client()` / `get_async_groq_client()`
  - `get_cerebras_client()` / `get_async_cerebras_client()`
- **Configuration-based caching**: Clients are cached based on their full configuration (API key, base URL, timeout, headers, etc.)
- **Lifecycle management**: Uses `atexit` hook for cleanup and weak references to track clients

### 2. Key Implementation Details

#### Cache Key Generation
```python
def _get_cache_key(client_type: str, **kwargs: Any) -> str:
    # Convert kwargs to sorted string representation
    sorted_kwargs_str = str(sorted(kwargs.items()))
    
    # Create raw key combining client type and sorted kwargs
    raw_key = f"{client_type}:{sorted_kwargs_str}"
    
    # Hash the key for consistent length and to handle complex objects
    hashed_key = hashlib.sha256(raw_key.encode()).hexdigest()
    
    return hashed_key
```

This approach:
- Ensures deterministic keys through sorting
- Handles complex objects via string representation
- Produces fixed-length keys (64 chars)
- Matches the existing Redis cache key generation pattern

### 3. Created Comprehensive Tests

`tests/main/test_client_cache.py` includes tests for:
- Singleton behavior (same config returns same client)
- Different configurations return different clients
- Different client types are cached separately
- Proper handling of timeout objects and headers
- Type differences are preserved (e.g., `30` vs `30.0` are different)

### 4. All Quality Checks Pass
- ✅ All 9 tests pass
- ✅ Type checking passes (mypy)
- ✅ Linting passes (ruff, black)

## Design Decisions

1. **Used SHA256 hashing instead of tuple keys**: More robust for complex objects and consistent with existing caching approach
2. **Type strictness**: `30` and `30.0` create different cache entries - better to be overly strict than risk bugs
3. **Weak references**: Allow garbage collection of unused clients while maintaining cleanup capability
4. **Simple atexit cleanup**: Accepted that async clients will be cleaned by OS on exit

## Next Steps (Phase 2)

Update `OpenAIGPT.__init__` to use these wrapper functions instead of directly creating clients:
```python
# Current
self.client = OpenAI(api_key=self.api_key, ...)

# New  
from langroid.language_models.client_cache import get_openai_client
self.client = get_openai_client(api_key=self.api_key, ...)
```

This will require careful updating of all client creation locations in `openai_gpt.py`.
</file>

<file path="issues/llm-client-caching-phase2-summary.md">
# Phase 2 Implementation Summary: OpenAIGPT Integration

## Changes Made

### 1. Updated `langroid/language_models/openai_gpt.py`

- **Added imports** for client cache wrapper functions
- **Replaced direct client instantiation** with wrapper functions:
  - `Groq()` → `get_groq_client()`
  - `AsyncGroq()` → `get_async_groq_client()`
  - `Cerebras()` → `get_cerebras_client()`
  - `AsyncCerebras()` → `get_async_cerebras_client()`
  - `OpenAI()` → `get_openai_client()`
  - `AsyncOpenAI()` → `get_async_openai_client()`

### 2. Fixed Async Client Cleanup

Updated `_cleanup_clients()` to properly handle async clients by checking if `close()` is a coroutine function and skipping await (since atexit can't handle async).

### 3. Created Integration Tests

`tests/main/test_openai_gpt_client_cache.py` with tests verifying:
- Multiple OpenAIGPT instances with same config share clients
- Different configurations create different clients
- Works correctly for OpenAI, Groq, and Cerebras models
- Different base URLs and headers create different clients

## Results

### Before (Anti-pattern)
```python
# Creating 100 agents = 100 OpenAI clients
for row in data[:100]:
    agent = ChatAgent(config)  # Each creates new OpenAI client
    result = agent.run(row)
```

### After (With caching)
```python
# Creating 100 agents = 1 OpenAI client (reused)
for row in data[:100]:
    agent = ChatAgent(config)  # Reuses existing OpenAI client
    result = agent.run(row)
```

## Testing Results

- ✅ All 9 client cache unit tests pass
- ✅ All 6 OpenAIGPT integration tests pass
- ✅ Existing LLM tests continue to pass
- ✅ Type checking passes
- ✅ Linting passes

## Benefits

1. **Resource Efficiency**: Dramatically reduces file descriptor usage
2. **Performance**: Eliminates repeated client initialization overhead
3. **Transparent**: No API changes required - existing code benefits automatically
4. **Configurable**: Each unique configuration gets its own cached client
5. **Safe**: Thread-safe implementation with proper cleanup

## Implementation Notes

- Used SHA256 hashing for cache keys (consistent with existing Redis cache)
- Handles all configuration parameters (API key, base URL, timeout, headers, etc.)
- Async client cleanup deferred to OS (atexit can't await)
- Weak references allow garbage collection when clients no longer needed
</file>

<file path="issues/llm-client-caching-test-summary.md">
# Client Caching Test Summary

## Tests Created

### 1. Unit Tests (`test_client_cache.py`)
- **Purpose**: Test the basic caching functionality
- **Coverage**: 
  - Singleton behavior for same configuration
  - Different clients for different configurations
  - Proper handling of all client types (OpenAI, Groq, Cerebras)
  - Cache key generation with complex types

### 2. Integration Tests (`test_openai_gpt_client_cache.py`)
- **Purpose**: Test OpenAIGPT integration with caching
- **Coverage**:
  - Multiple OpenAIGPT instances share clients
  - Different configs create different clients
  - Works for all model types (OpenAI, Groq, Cerebras)

### 3. Stress Tests (`test_client_cache_stress.py`)
- **Purpose**: Demonstrate resource usage improvements
- **Tests**:
  - `test_many_agents_with_caching`: Shows 100 agents share 1 client
  - `test_many_agents_different_configs`: Shows proper separation by config
  - `test_memory_efficiency`: Demonstrates memory savings
  - `test_client_instance_comparison`: Direct comparison with/without caching

### 4. Demonstration Test (`test_client_cache_demo.py`)
- **Purpose**: Clear demonstration of the fix for the exact user scenario
- **Key Results**:

#### With Client Caching:
- 100 ChatAgent instances → 1 shared client pair
- File descriptors saved: ~297
- Memory saved: ~148.5 MB
- Creation time: 0.60 seconds

#### Without Client Caching (simulated):
- 100 ChatAgent instances → 100 client pairs
- File descriptors used: ~300
- Extra memory used: ~148.5 MB
- Risk of "Too many open files" errors

## Test Results Summary

All tests demonstrate that the client caching implementation:

1. **Prevents resource exhaustion**: 100 agents use 1 client instead of 100
2. **Maintains correctness**: Different configurations still get different clients
3. **Is transparent**: No API changes needed
4. **Provides significant savings**:
   - 50x reduction in client instances
   - ~297 file descriptors saved for 100 agents
   - ~148.5 MB memory saved for 100 agents

The stress tests confirm that the implementation successfully addresses the "too many open files" issue that was occurring when creating many agents in a loop.
</file>

<file path="issues/llm-client-caching.md">
# LLM Client Connection Pool Exhaustion Issue

## Problem Statement

When using Langroid in multi-agent systems where agents are created dynamically (e.g., one agent per data row), each agent creates its own LLM client instance (OpenAI, Groq, or Cerebras). This pattern leads to connection pool exhaustion, resulting in "too many open files" errors and degraded performance.

## Current Behavior

### Client Creation Flow
1. Each `ChatAgent` instantiates its own `OpenAIGPT` instance
2. Each `OpenAIGPT` instance creates new client objects:
   - For Groq models: Creates `Groq()` and `AsyncGroq()` clients
   - For Cerebras models: Creates `Cerebras()` and `AsyncCerebras()` clients  
   - For OpenAI/others: Creates `OpenAI()` and `AsyncOpenAI()` clients
3. These clients maintain their own connection pools via httpx

### Problem Scenario
```python
# Anti-pattern: Creating many agents
for row in data[:100]:  # 100 rows
    agent = ChatAgent(config)  # Creates new OpenAI client
    result = agent.run(row)    # Makes API calls
    # Agent goes out of scope but connections may linger
```

This creates 100 separate OpenAI clients, each with its own connection pool.

## Impact

1. **Resource Exhaustion**: Each client maintains open connections, leading to file descriptor limits
2. **Performance Degradation**: Connection establishment overhead for each new client
3. **Potential API Rate Limiting**: Multiple clients may trigger more aggressive rate limiting
4. **Memory Usage**: Each client instance consumes memory for connection pools

## Root Cause

The issue stems from:
1. Lack of client reuse across agent instances
2. Connection pools not being properly closed when agents are garbage collected
3. The anti-pattern of creating many short-lived agents instead of reusing agents

## Constraints

1. **API Compatibility**: Solution must not break existing Langroid API
2. **Configuration Flexibility**: Different agents may need different configurations (API keys, base URLs, timeouts)
3. **Thread Safety**: Clients must be safely shareable across multiple agents
4. **Async Support**: Must handle both sync and async client variants

## Critical Considerations

### 1. Configuration Variations
Different agents in the same system might require different client configurations:
- **Different API Keys**: Agent A might use one OpenAI key, Agent B another
- **Different Base URLs**: Some agents might use standard OpenAI, others might use Azure OpenAI
- **Different Timeouts**: Long-running tasks might need higher timeouts
- **Different Headers**: Custom headers for different use cases

**Implication**: We cannot have just one singleton per client type. We need to cache clients based on their full configuration, creating a new client only when a unique configuration is encountered.

### 2. Thread Safety
Multiple agents might run concurrently and share the same client instance:
- The httpx library (used by OpenAI, Groq, Cerebras clients) is designed to be thread-safe
- Connection pools in httpx can handle concurrent requests
- No additional locking should be needed for client access

**Implication**: Shared clients can be used safely across multiple threads/agents without synchronization overhead.

### 3. Lifecycle Management
Proper cleanup of singleton clients is crucial:
- **When to close**: Clients hold network resources that should be released
- **Garbage collection**: Need to ensure clients can be GC'd when no longer needed
- **Application shutdown**: Should close all clients gracefully on exit

**Implications**: 
- Consider using weak references to allow garbage collection of unused clients
- Implement `atexit` hooks for graceful shutdown
- May need a manual cleanup mechanism for long-running applications
- Monitor for memory leaks from accumulating cached clients with unique configs

## Proposed Solution: Client Singleton Pattern

### Approach
Implement a caching layer that returns singleton clients based on configuration:

1. **Wrapper Functions**: Replace direct client instantiation with wrapper functions:
   - `get_openai_client(config) -> OpenAI`
   - `get_groq_client(config) -> Groq`
   - `get_cerebras_client(config) -> Cerebras`
   - Similar for async variants

2. **Configuration-Based Caching**: Cache clients keyed by their configuration parameters:
   - API key
   - Base URL
   - Timeout
   - Headers
   - Organization (for OpenAI)

3. **Implementation Location**: In `langroid/language_models/openai_gpt.py`, replace:
   ```python
   # Current
   self.client = OpenAI(api_key=self.api_key, ...)
   
   # Proposed
   self.client = get_openai_client(api_key=self.api_key, ...)
   ```

### Benefits
- Reduces client instances from N (number of agents) to M (unique configurations)
- No API changes required
- Follows OpenAI best practices for client reuse
- Transparent to existing code

### Alternative Solutions Considered

1. **Agent Pooling**: Reuse agents instead of creating new ones
   - Pros: Most efficient
   - Cons: Requires significant API changes

2. **Explicit Client Registry**: Pass shared clients to agents
   - Pros: Explicit control
   - Cons: Breaks existing API, requires user awareness

3. **Connection Limit Configuration**: Reduce connection pool sizes
   - Pros: Simple
   - Cons: Doesn't address root cause, may hurt performance

## Success Criteria

1. Creating 100+ agents should not cause "too many open files" errors
2. Memory usage should remain stable with many agents
3. No breaking changes to existing Langroid API
4. Performance improvement for multi-agent scenarios

## Implementation Notes

- httpx clients (used by OpenAI/Groq/Cerebras) are thread-safe
- Consider using weak references to allow garbage collection
- May need cleanup hooks (atexit) for proper shutdown
- Should add logging for cache hits/misses for debugging

## References

- OpenAI Cookbook: Best practices recommend reusing client instances
- httpx documentation: Connection pooling behavior
- Python file descriptor limits and ulimit settings
</file>

<file path="issues/pr-882-cached-tokens-improvements.md">
# PR #882: Cached Tokens Support - Improvements

## Summary
Enhanced PR #882 which adds cached token tracking to LLMTokenUsage. Made several improvements including cleanup of unused code, bug fixes, added tests, and new model support.

## Changes

### 1. Code Cleanup
- Removed unused `chat_cost_per_1k_tokens` and `completion_cost_per_1k_tokens` fields from `LLMConfig` in `base.py`
- These fields were superseded by the ModelInfo approach but were still being updated unnecessarily

### 2. Bug Fixes
- Fixed type error in `openai_gpt.py` when extracting `prompt_tokens_details` from API responses
- Added proper type annotation and type checking to handle cases where the field might not be a dict

### 3. Added Tests
- `test_cached_tokens_tracking()`: Verifies cached tokens are properly tracked in API responses and cost calculations work correctly
- `test_cached_tokens_in_llm_response()`: Tests the LLMTokenUsage class directly including string representation and reset functionality

### 4. Added Gemini 2.5 Model Support
- Fixed `GEMINI_2_5_PRO` enum to map to `"gemini-2.5-pro"` instead of experimental version
- Added new enums: `GEMINI_2_5_FLASH` and `GEMINI_2_5_FLASH_LITE_PREVIEW`
- Added complete ModelInfo entries with proper costs and parameters:
  - **Gemini 2.5 Pro**: 1M context, $1.25/$0.31/$10.00 per million tokens
  - **Gemini 2.5 Flash**: 1M context, $0.30/$0.075/$2.50 per million tokens  
  - **Gemini 2.5 Flash Lite Preview**: 64K context, $0.10/$0.025/$0.40 per million tokens

## Testing
- All existing tests pass
- New tests verify cached token functionality
- Code passes all linting and type checking
</file>

<file path="issues/pr-openai-client-caching.md">
# OpenAI Client Connection Management

## Problem
Creating many agents (e.g., 100 agents for 100 data rows) leads to "too many open files" errors due to each agent creating its own HTTP client, exhausting file descriptors.

## Solution
Implemented client caching/singleton pattern to reuse HTTP clients across multiple agent instances with the same configuration.

## Changes

### 1. Client Caching Module (`langroid/language_models/client_cache.py`)
- Singleton pattern for HTTP client reuse
- SHA256-based cache keys for configuration
- Wrapper functions for each client type (OpenAI, Groq, Cerebras)
- Lifecycle management with `atexit` hooks

### 2. OpenAIGPT Integration
- Added `use_cached_client: bool = True` config parameter
- Updated client creation to use wrapper functions when caching enabled
- Allows disabling for testing/special cases

### 3. ChatAgent Cleanup
- Updated `__del__` method to avoid closing shared clients
- Clients now managed centrally via client_cache module

### 4. Comprehensive Tests
- Tests for singleton behavior across all client types
- Verification of concurrent async usage
- Tests for model prefix routing (groq/, cerebras/, etc.)
- Regression tests with `use_cached_client` flag

## Benefits
- Prevents resource exhaustion when creating many agents
- Improves performance through connection pooling
- Backward compatible with opt-out capability
- Thread-safe for concurrent usage
</file>

<file path="issues/pr-qdrant-lock-fix.md">
# Fix QdrantDB Lock File Issue

## Problem
When using QdrantDB with local storage, file lock conflicts occurred when:
1. A QdrantDB instance was created but not properly closed
2. Another part of the code tried to create a new QdrantDB instance with the same storage path
3. Qdrant would detect the `.lock` file and create a new storage directory (e.g., `./qdrant_data.new`)

## Solution
1. **Added `close()` method to QdrantDB** - Calls the underlying client's close method to release the file lock
2. **Added context manager support** - Implemented `__enter__` and `__exit__` for automatic cleanup
3. **Fixed DocChatAgent's `clear()` method** - Now closes the old vecdb before creating a new one

## Usage
```python
# Option 1: Explicit close
vecdb = QdrantDB(config)
vecdb.clear_all_collections(really=True)
vecdb.close()  # Release the lock

# Option 2: Context manager (automatic cleanup)
with QdrantDB(config) as vecdb:
    vecdb.clear_all_collections(really=True)
    # Automatically closed when exiting context
```

## Changes
- `langroid/vector_store/qdrantdb.py`: Added `close()`, `__enter__`, `__exit__` methods
- `langroid/agent/special/doc_chat_agent.py`: Fixed `clear()` to close old vecdb instance

This fix prevents the proliferation of `.new` directories when using QdrantDB with local storage.
</file>

<file path="issues/qdrant-lock-issue-spec-changes.md">
# QdrantDB Lock File Conflict Issue - Changes and Best Practices

## Summary of Changes

This document describes the changes made to resolve the QdrantDB lock file conflict issue described in `qdrant-lock-issue-spec.md`.

## Problem Recap

When using QdrantDB with local storage, a file lock conflict occurred when:
1. A QdrantDB instance was created (e.g., to clear collections)
2. The instance was not properly disposed/closed
3. Another part of the code tried to create a new QdrantDB instance
4. Qdrant detected the `.lock` file and created a new storage directory (e.g., `./qdrant_data.new`)

## Implemented Solution

### 1. Added `close()` Method

Added an explicit `close()` method to the QdrantDB class:

```python
def close(self) -> None:
    """
    Close the QdrantDB client and release any resources (e.g., file locks).
    This is especially important for local storage to release the .lock file.
    """
    if hasattr(self.client, "close"):
        # QdrantLocal has a close method that releases the lock
        self.client.close()
        logger.info(f"Closed QdrantDB connection for {self.config.storage_path}")
```

### 2. Added Context Manager Support

Implemented `__enter__` and `__exit__` methods to support Python's context manager protocol:

```python
def __enter__(self) -> "QdrantDB":
    """Context manager entry."""
    return self

def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
    """Context manager exit - ensure cleanup even if an exception occurred."""
    self.close()
```

### 3. Added Type Import

Added `Any` to the type imports to support the context manager type hints:

```python
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple, TypeVar
```

## Best Practices for Using QdrantDB

### Important Note

The underlying `qdrant_client` library does not implement the context manager protocol. However, **Langroid's QdrantDB wrapper now provides context manager support** to ensure proper cleanup of resources, especially the file lock used by QdrantLocal.

### Recommended: Use Context Manager (Most Pythonic)

The context manager approach is the **recommended best practice** for Langroid's QdrantDB as it guarantees cleanup even if exceptions occur:

```python
from langroid.vector_store.qdrantdb import QdrantDB, QdrantDBConfig

config = QdrantDBConfig(
    cloud=False,
    collection_name="my_collection",
    storage_path="./qdrant_data",
)

# Recommended approach
with QdrantDB(config) as vecdb:
    # Use the vector database
    vecdb.add_documents(documents)
    results = vecdb.similar_texts_with_scores("query text", k=5)
    vecdb.clear_empty_collections()
    # Automatically closed when exiting the context
```

### Alternative: Explicit `close()` Method

If you cannot use a context manager (e.g., when the QdrantDB instance needs to persist across multiple methods), use explicit `close()`:

```python
class MyDocProcessor:
    def __init__(self):
        config = QdrantDBConfig(
            cloud=False,
            collection_name="my_collection",
            storage_path="./qdrant_data",
        )
        self.vecdb = QdrantDB(config)
    
    def process_documents(self, docs):
        self.vecdb.add_documents(docs)
    
    def search(self, query):
        return self.vecdb.similar_texts_with_scores(query, k=5)
    
    def cleanup(self):
        # Important: Call this when done
        self.vecdb.close()
```

### When Using with DocChatAgent

When using QdrantDB with DocChatAgent, the agent manages the vector store lifecycle, so you don't need to worry about closing it manually:

```python
from langroid.agent.special.doc_chat_agent import DocChatAgent, DocChatAgentConfig

# DocChatAgent manages the QdrantDB lifecycle
agent = DocChatAgent(
    DocChatAgentConfig(
        vecdb=QdrantDBConfig(
            cloud=False,
            collection_name="doc_chat",
            storage_path="./qdrant_data",
        )
    )
)
# The agent will handle cleanup appropriately
```

### For Temporary Operations

For one-off operations like clearing collections, always use context manager:

```python
# Clear all collections
with QdrantDB(config) as vecdb:
    vecdb.clear_all_collections(really=True, prefix="temp_")

# Clear and recreate
with QdrantDB(config) as vecdb:
    vecdb.delete_collection("old_collection")
    vecdb.create_collection("new_collection", replace=True)
```

## Important Notes

1. **Cloud Storage**: This issue only affects local storage (`cloud=False`). When using Qdrant cloud service, file locking is not used.

2. **Backward Compatibility**: Existing code will continue to work without changes, but may show warnings about lock conflicts and create `.new` directories.

3. **Multiple Processes**: If you genuinely need multiple processes to access the same Qdrant storage simultaneously, use Qdrant server instead of local storage.

## Testing

Comprehensive tests were added to verify the fix:
- `tests/main/test_qdrant_lock_release.py` - Unit tests for close() and context manager
- `tests/main/test_qdrant_lock_scenario.py` - Reproduces the exact issue scenario
- `tests/main/test_qdrant_warning_capture.py` - Captures and verifies warning messages

All tests pass and confirm that:
- Without proper cleanup: `.new` directories are created (the bug)
- With `close()` or context manager: No `.new` directories (fixed)

## Migration Guide

If you have existing code that creates temporary QdrantDB instances:

**Before (problematic):**
```python
vecdb = QdrantDB(config)
vecdb.clear_all_collections(really=True)
# Lock file remains, causing issues
```

**After (fixed):**
```python
# Option 1: Context manager (preferred)
with QdrantDB(config) as vecdb:
    vecdb.clear_all_collections(really=True)

# Option 2: Explicit close
vecdb = QdrantDB(config)
vecdb.clear_all_collections(really=True)
vecdb.close()
```

## Why This Matters

While the `qdrant_client` library handles some cleanup via its `__del__` method, this is not reliable because:
1. Python's garbage collector doesn't guarantee when `__del__` will be called
2. In some cases (circular references, interpreter shutdown), `__del__` may not be called at all
3. The file lock remains until the process ends, preventing other instances from using the same storage

By adding explicit `close()` and context manager support to Langroid's QdrantDB wrapper, we ensure:
- Immediate release of the file lock when done
- No proliferation of `.new` directories
- Predictable resource cleanup
- Better development experience (no need to manually delete lock files)

## Conclusion

The QdrantDB lock file issue has been resolved by adding proper resource cleanup mechanisms to Langroid's QdrantDB wrapper. While the underlying `qdrant_client` doesn't provide context manager support, Langroid now offers both context manager and explicit `close()` methods. The context manager approach is the recommended best practice as it ensures cleanup even in error scenarios. For cases where context managers aren't suitable, the explicit `close()` method provides a reliable alternative.
</file>

<file path="langroid/agent/special/arangodb/arangodb_agent.py">
logger = logging.getLogger(__name__)
console = Console()
⋮----
ARANGO_ERROR_MSG = "There was an error in your AQL Query"
T = TypeVar("T")
⋮----
class ArangoSettings(BaseSettings)
⋮----
client: ArangoClient | None = None
db: StandardDatabase | None = None
url: str = ""
username: str = ""
password: str = ""
database: str = ""
⋮----
model_config = SettingsConfigDict(env_prefix="ARANGO_")
⋮----
class QueryResult(BaseModel)
⋮----
success: bool
data: Optional[
⋮----
model_config = ConfigDict(
⋮----
class ArangoChatAgentConfig(ChatAgentConfig)
⋮----
arango_settings: ArangoSettings = ArangoSettings()
system_message: str = DEFAULT_ARANGO_CHAT_SYSTEM_MESSAGE
kg_schema: str | Dict[str, List[Dict[str, Any]]] | None = None
database_created: bool = False
prepopulate_schema: bool = True
use_functions_api: bool = True
max_num_results: int = 10  # how many results to return from AQL query
max_schema_fields: int = 500  # max fields to show in schema
max_tries: int = 10  # how many attempts to answer user question
use_tools: bool = False
schema_sample_pct: float = 0
# whether the agent is used in a continuous chat with user,
# as opposed to returning a result from the task.run()
chat_mode: bool = False
addressing_prefix: str = ""
⋮----
class ArangoChatAgent(ChatAgent)
⋮----
def __init__(self, config: ArangoChatAgentConfig)
⋮----
def init_state(self) -> None
⋮----
self.num_tries = 0  # how many attempts to answer user question
⋮----
response = super().user_response(msg)
⋮----
response_str = response.content if response is not None else ""
⋮----
self.num_tries = 0  # reset number of tries if user responds
⋮----
response = super().llm_response(message)
⋮----
# response contains both a user-addressing and a tool, which
# is not allowed, so remove the user-addressing prefix
⋮----
def _validate_config(self) -> None
⋮----
def _import_arango(self) -> None
⋮----
def _has_any_data(self) -> bool
⋮----
for c in self.db.collections():  # type: ignore
⋮----
if self.db.collection(c["name"]).count() > 0:  # type: ignore
⋮----
def _initialize_db(self) -> None
⋮----
# Check if any non-system collection has data
⋮----
# If database has data, get schema
⋮----
# this updates self.config.kg_schema
⋮----
def close(self) -> None
⋮----
@staticmethod
    def cleanup_graph_db(db) -> None:  # type: ignore
⋮----
# First delete graphs to properly handle edge collections
⋮----
graph_name = graph["name"]
if not graph_name.startswith("_"):  # Skip system graphs
⋮----
# Clear existing collections
⋮----
if not collection["name"].startswith("_"):  # Skip system collections
⋮----
"""Execute a function with retries on connection error"""
⋮----
# Reconnect if needed
⋮----
return func()  # Final attempt after loop if not raised
⋮----
"""Execute a read query with connection retry."""
⋮----
def execute_read() -> QueryResult
⋮----
cursor = self.db.aql.execute(query, bind_vars=bind_vars)
records = [doc for doc in cursor]  # type: ignore
records = records[: self.config.max_num_results]
⋮----
error_message = self.retry_query(e, query)
⋮----
return self.with_retry(execute_read)  # type: ignore
⋮----
"""Execute a write query with connection retry."""
⋮----
def execute_write() -> QueryResult
⋮----
return self.with_retry(execute_write)  # type: ignore
⋮----
def aql_retrieval_tool(self, msg: AQLRetrievalTool) -> str
⋮----
"""Handle AQL query for data retrieval"""
⋮----
query = msg.aql_query
⋮----
response = self.read_query(query)
⋮----
def aql_creation_tool(self, msg: AQLCreationTool) -> str
⋮----
"""Handle AQL query for creating data"""
⋮----
response = self.write_query(query)
⋮----
"""Get database schema. If collections=None, include all collections.
        If properties=False, show only connection info,
        else show all properties and example-docs.
        """
⋮----
collections = msg.collections
properties = msg.properties
⋮----
collections = None
properties = True
⋮----
# we are trying to pre-populate full schema before the agent runs,
# so get it if it's already available
# (Note of course that this "full schema" may actually be incomplete)
⋮----
# increment tries only if the LLM is asking for the schema,
# in which case msg will not be None
⋮----
# Get graph schemas (keeping full graph info)
graph_schema = [
⋮----
for g in self.db.graphs()  # type: ignore
⋮----
# Get collection schemas
collection_schema = []
for collection in self.db.collections():  # type: ignore
⋮----
col_name = collection["name"]
⋮----
col_type = collection["type"]
col_size = self.db.collection(col_name).count()
⋮----
# Full property collection with sampling
lim = self.config.schema_sample_pct * col_size  # type: ignore
limit_amount = ceil(lim / 100.0) or 1
sample_query = f"""
⋮----
properties_list = []
example_doc = None
⋮----
def simplify_doc(doc: Any) -> Any
⋮----
for doc in self.db.aql.execute(sample_query):  # type: ignore
⋮----
example_doc = simplify_doc(doc)
⋮----
prop = {"name": key, "type": type(value).__name__}
⋮----
# Basic info + from/to for edges only
collection_info = {
⋮----
# Get a sample edge to extract from/to fields
sample_edge = next(
⋮----
self.db.aql.execute(  # type: ignore
⋮----
schema = {
schema_str = json.dumps(schema, indent=2)
⋮----
schema = trim_schema(schema)
n_fields = count_fields(schema)
⋮----
schema_str = (
⋮----
def _init_tools_sys_message(self) -> None
⋮----
"""Initialize system msg and enable tools"""
⋮----
message = self._format_message()
⋮----
# Note we are enabling GraphSchemaTool regardless of whether
# self.config.prepopulate_schema is True or False, because
# even when schema provided, the agent may later want to get the schema,
# e.g. if the db evolves, or schema was trimmed due to size, or
# if it needs to bring in the schema into recent context.
⋮----
def _format_message(self) -> str
⋮----
"""When LLM sends a no-tool msg, assume user is the intended recipient,
        and if in interactive mode, forward the msg to the user.
        """
done_tool_name = DoneTool.default_value("request")
forward_tool_name = ForwardTool.default_value("request")
aql_retrieval_tool_instructions = AQLRetrievalTool.instructions()
# TODO the aql_retrieval_tool_instructions may be empty/minimal
# when using self.config.use_functions_api = True.
tools_instruction = f"""
⋮----
def retry_query(self, e: Exception, query: str) -> str
⋮----
"""Generate error message for failed AQL query"""
⋮----
error_message = f"""\
</file>

<file path="langroid/agent/special/arangodb/system_messages.py">
done_tool_name = DoneTool.default_value("request")
⋮----
arango_schema_tool_description = f"""
⋮----
aql_retrieval_tool_description = f"""
⋮----
aql_creation_tool_description = f"""
⋮----
aql_retrieval_query_example = """
⋮----
aql_query_instructions = """
⋮----
tool_result_instruction = """
# sys msg to use when schema already provided initially,
# so agent should not use schema tool
SCHEMA_PROVIDED_SYS_MSG = f"""You are a data scientist and expert in Graph Databases,
⋮----
# sys msg to use when schema is not initially provided,
# and we want agent to use schema tool to get schema
SCHEMA_TOOLS_SYS_MSG = f"""You are a data scientist and expert in
⋮----
DEFAULT_ARANGO_CHAT_SYSTEM_MESSAGE = f"""
⋮----
ADDRESSING_INSTRUCTION = """
⋮----
DONE_INSTRUCTION = f"""
</file>

<file path="langroid/agent/special/arangodb/tools.py">
class AQLRetrievalTool(ToolMessage)
⋮----
request: str = "aql_retrieval_tool"
purpose: str = """
aql_query: str
⋮----
_max_result_tokens: int = 500
_max_retained_tokens: int = 200
⋮----
@classmethod
    def examples(cls) -> List[ToolMessage | Tuple[str, ToolMessage]]
⋮----
"""Few-shot examples to include in tool instructions."""
⋮----
@classmethod
    def instructions(cls) -> str
⋮----
aql_retrieval_tool_name = AQLRetrievalTool.default_value("request")
⋮----
class AQLCreationTool(ToolMessage)
⋮----
request: str = "aql_creation_tool"
⋮----
aql_creation_tool_name = AQLCreationTool.default_value("request")
⋮----
class ArangoSchemaTool(ToolMessage)
⋮----
request: str = "arango_schema_tool"
⋮----
properties: bool = True
collections: List[str] | None = None
⋮----
arango_schema_tool_name = ArangoSchemaTool.default_value("request")
</file>

<file path="langroid/agent/special/arangodb/utils.py">
def count_fields(schema: Dict[str, List[Dict[str, Any]]]) -> int
⋮----
total = 0
⋮----
# Count all keys in each collection's dict
⋮----
# Also count properties if they exist
props = coll.get(f"{coll['collection_type']}_properties", [])
⋮----
"""Keep only edge connection info, remove properties and examples"""
trimmed: Dict[str, List[Dict[str, Any]]] = {
⋮----
col_info: Dict[str, Any] = {
⋮----
# preserve from/to info if present
⋮----
example = coll[f"example_{coll['collection_type']}"]
</file>

<file path="langroid/agent/special/lance_rag/__init__.py">
__all__ = [
</file>

<file path="langroid/agent/special/lance_rag/critic_agent.py">
"""
QueryPlanCritic is a ChatAgent that is created with a specific document schema.

Its role is to provide feedback on a Query Plan, which consists of:
- filter condition if needed (or empty string if no filter is needed)
- query - a possibly rephrased query that can be used to match the `content` field
- dataframe_calc - a Pandas-dataframe calculation/aggregation string, possibly empty
- original_query - the original query for reference
- result - the answer received from an assistant that used this QUERY PLAN.

This agent has access to two tools:
- QueryPlanTool: The handler method for this tool re-writes the query plan
  in plain text (non-JSON) so the LLM can provide its feedback using the
  QueryPlanFeedbackTool.
- QueryPlanFeedbackTool: LLM uses this tool to provide feedback on the Query Plan
"""
⋮----
logger = logging.getLogger(__name__)
⋮----
class QueryPlanCriticConfig(LanceQueryPlanAgentConfig)
⋮----
name: str = "QueryPlanCritic"
system_message: str = f"""
⋮----
def plain_text_query_plan(msg: QueryPlanAnswerTool) -> str
⋮----
plan = f"""
⋮----
class QueryPlanCritic(ChatAgent)
⋮----
"""
    Critic for LanceQueryPlanAgent, provides feedback on
    query plan + answer.
    """
⋮----
def __init__(self, cfg: LanceQueryPlanAgentConfig)
⋮----
def init_state(self) -> None
⋮----
def query_plan_answer(self, msg: QueryPlanAnswerTool) -> str
⋮----
"""Present query plan + answer in plain text (not JSON)
        so LLM can give feedback"""
⋮----
def query_plan_feedback(self, msg: QueryPlanFeedbackTool) -> AgentDoneTool
⋮----
"""Format Valid so return to Query Planner"""
⋮----
# indicate this task is Done, and return the tool as result
⋮----
"""Remind the LLM to use QueryPlanFeedbackTool since it forgot"""
</file>

<file path="langroid/agent/special/lance_rag/lance_rag_task.py">
"""
The LanceRAGTaskCreator.new() method creates a 3-Agent system that uses this agent.
It takes a LanceDocChatAgent instance as argument, and adds two more agents:
- LanceQueryPlanAgent, which is given the LanceDB schema in LanceDocChatAgent,
and based on this schema, for a given user query, creates a Query Plan
using the QueryPlanTool, which contains a filter, a rephrased query,
and a dataframe_calc.
- QueryPlanCritic, which is given the LanceDB schema in LanceDocChatAgent,
 and gives feedback on the Query Plan and Result using the QueryPlanFeedbackTool.

The LanceRAGTaskCreator.new() method sets up the given LanceDocChatAgent and
QueryPlanCritic as sub-tasks of the LanceQueryPlanAgent's task.

Langroid's built-in task orchestration ensures that:
- the LanceQueryPlanAgent reformulates the plan based
    on the QueryPlanCritics's feedback,
- LLM deviations are corrected via tools and overrides of ChatAgent methods.
"""
⋮----
logger = logging.getLogger(__name__)
⋮----
class LanceRAGTaskCreator
⋮----
"""
        Add a LanceFilterAgent to the LanceDocChatAgent,
        set up the corresponding Tasks, connect them,
        and return the top-level query_plan_task.
        """
doc_agent_name = "LanceRAG"
critic_name = "QueryPlanCritic"
query_plan_agent_config = LanceQueryPlanAgentConfig(
⋮----
critic_config = QueryPlanCriticConfig(
⋮----
query_planner = LanceQueryPlanAgent(query_plan_agent_config)
query_plan_task = Task(
critic_agent = QueryPlanCritic(critic_config)
critic_task = Task(
rag_task = Task(
⋮----
done_if_response=[Entity.LLM],  # done when non-null response from LLM
done_if_no_response=[Entity.LLM],  # done when null response from LLM
</file>

<file path="langroid/agent/special/lance_rag/query_planner_agent.py">
"""
LanceQueryPlanAgent is a ChatAgent created with a specific document schema.
Given a QUERY, the LLM constructs a Query Plan consisting of:
- filter condition if needed (or empty string if no filter is needed)
- query - a possibly rephrased query that can be used to match the `content` field
- dataframe_calc - a Pandas-dataframe calculation/aggregation string, possibly empty
- original_query - the original query for reference

This agent has access to two tools:
- QueryPlanTool, which is used to generate the Query Plan, and the handler of
    this tool simply passes it on to the RAG agent named in config.doc_agent_name.
- QueryPlanFeedbackTool, which is used to handle feedback on the Query Plan and
  Result from the RAG agent. The QueryPlanFeedbackTool is used by
  the QueryPlanCritic, who inserts feedback into the `feedback` field
"""
⋮----
logger = logging.getLogger(__name__)
⋮----
class LanceQueryPlanAgentConfig(ChatAgentConfig)
⋮----
name: str = "LancePlanner"
critic_name: str = "QueryPlanCritic"
doc_agent_name: str = "LanceRAG"
doc_schema: str = ""
use_tools: bool = False
max_retries: int = 5  # max number of retries for query plan
use_functions_api: bool = True
⋮----
system_message: str = """
⋮----
def set_system_message(self) -> None
⋮----
class LanceQueryPlanAgent(ChatAgent)
⋮----
def __init__(self, config: LanceQueryPlanAgentConfig)
⋮----
# This agent should generate the QueryPlanTool
# as well as handle it for validation
⋮----
# neither use nor handle! Added to "known" tools so that the Planner agent
# can avoid processing it
⋮----
# LLM will not use this, so set use=False (Agent generates it)
⋮----
def init_state(self) -> None
⋮----
# how many times re-trying query plan in response to feedback:
⋮----
self.result: str = ""  # answer received from LanceRAG
⋮----
def query_plan(self, msg: QueryPlanTool) -> ForwardTool | str
⋮----
"""Valid, tool msg, forward chat_doc to RAG Agent.
        Note this chat_doc will already have the
        QueryPlanTool in its tool_messages list.
        We just update the recipient to the doc_agent_name.
        """
# save, to be used to assemble QueryPlanResultTool
⋮----
# To forward the QueryPlanTool to doc_agent, we could either:
⋮----
# (a) insert `recipient` in the QueryPlanTool:
# QPWithRecipient = QueryPlanTool.require_recipient()
# qp = QPWithRecipient(**msg.model_dump(), recipient=self.config.doc_agent_name)
# return qp
#
# OR
⋮----
# (b) create an agent response with recipient and tool_messages.
# response = self.create_agent_response(
#     recipient=self.config.doc_agent_name, tool_messages=[msg]
# )
# return response
⋮----
# (c) use the ForwardTool:
⋮----
def query_plan_feedback(self, msg: QueryPlanFeedbackTool) -> str | AgentDoneTool
⋮----
"""Process Critic feedback on QueryPlan + Answer from RAG Agent"""
# We should have saved answer in self.result by this time,
# since this Agent seeks feedback only after receiving RAG answer.
⋮----
# This means the result is good AND Query Plan is fine,
# as judged by Critic
# (Note sometimes critic may have empty suggested_fix even when
# the result is NO_ANSWER)
self.n_retries = 0  # good answer, so reset this
⋮----
# bail out to avoid infinite loop
⋮----
# there is a suggested_fix, OR the result is empty or NO_ANSWER
⋮----
# if result is empty or NO_ANSWER, we should retry the query plan
feedback = """
suggested = "Retry the `query_plan` to try to get a non-null answer"
⋮----
feedback = msg.feedback
suggested = msg.suggested_fix
⋮----
def answer_tool(self, msg: AnswerTool) -> QueryPlanAnswerTool
⋮----
"""Handle AnswerTool received from LanceRagAgent:
        Construct a QueryPlanAnswerTool with the answer"""
self.result = msg.answer  # save answer to interpret feedback later
⋮----
query_plan_answer_tool = QueryPlanAnswerTool(
self.curr_query_plan = None  # reset
⋮----
"""
        Remind to use QueryPlanTool if we are expecting it.
        """
⋮----
self.n_query_plan_reminders = 0  # reset
</file>

<file path="langroid/agent/special/neo4j/csv_kg_chat.py">
app = typer.Typer()
⋮----
BUILD_KG_INSTRUCTIONS = """
⋮----
DEFAULT_CSV_KG_CHAT_SYSTEM_MESSAGE = """
⋮----
"""
    Preprocess a DataFrame for Neo4j import by fixing mismatched quotes in string
        columns and handling null or missing values.

    Args:
        df (DataFrame): The DataFrame to be preprocessed.
        default_value (str, optional): The default value to replace null values.
        This is ignored if remove_null_rows is True. Defaults to None.
        remove_null_rows (bool, optional): If True, rows with any null values will
            be removed.
        If False, null values will be filled with default_value. Defaults to False.

    Returns:
        DataFrame: The preprocessed DataFrame ready for Neo4j import.
    """
⋮----
# Fix mismatched quotes in string columns
⋮----
# Handle null or missing values
⋮----
df = df.dropna()
⋮----
df = df.fillna(default_value)
⋮----
class CSVGraphAgentConfig(Neo4jChatAgentConfig)
⋮----
system_message: str = DEFAULT_CSV_KG_CHAT_SYSTEM_MESSAGE
data: str | pd.DataFrame | None  # data file, URL, or DataFrame
separator: None | str = None  # separator for data file
vecdb: None | VectorStoreConfig = None
llm: OpenAIGPTConfig = OpenAIGPTConfig(
⋮----
class PandasToKGTool(ToolMessage)
⋮----
request: str = "pandas_to_kg"
purpose: str = """Use this tool to create ONLY nodes and their relationships based
cypherQuery: str
args: list[str]
⋮----
@classmethod
    def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]
⋮----
class CSVGraphAgent(Neo4jChatAgent)
⋮----
def __init__(self, config: CSVGraphAgentConfig)
⋮----
formatted_build_instr = ""
⋮----
df = config.data
⋮----
df = read_tabular_data(config.data, config.separator)
df_cleaned = _preprocess_dataframe_for_neo4j(df)
⋮----
formatted_build_instr = BUILD_KG_INSTRUCTIONS.format(
⋮----
def pandas_to_kg(self, msg: PandasToKGTool) -> str
⋮----
"""
        Creates nodes and relationships in the graph database based on the data in
        a CSV file.

        Args:
            msg (PandasToKGTool): An instance of the PandasToKGTool class containing
                the necessary information for generating nodes.

        Returns:
            str: A string indicating the success or failure of the operation.
        """
⋮----
row_dict = row.to_dict()
response = self.write_query(
# there is a possibility the generated cypher query is not correct
# so we need to check the response before continuing to the
# iteration
</file>

<file path="langroid/agent/special/neo4j/neo4j_chat_agent.py">
logger = logging.getLogger(__name__)
⋮----
console = Console()
⋮----
NEO4J_ERROR_MSG = "There was an error in your Cypher Query"
⋮----
# TOOLS to be used by the agent
⋮----
class Neo4jSettings(BaseSettings)
⋮----
uri: str = ""
username: str = ""
password: str = ""
database: str = ""
⋮----
model_config = SettingsConfigDict(env_prefix="NEO4J_")
⋮----
class QueryResult(BaseModel)
⋮----
success: bool
data: List[Dict[Any, Any]] | str | None = None
⋮----
class Neo4jChatAgentConfig(ChatAgentConfig)
⋮----
neo4j_settings: Neo4jSettings = Neo4jSettings()
system_message: str = DEFAULT_NEO4J_CHAT_SYSTEM_MESSAGE
kg_schema: Optional[List[Dict[str, Any]]] = None
database_created: bool = False
# whether agent MUST use schema_tools to get schema, i.e.
# schema is NOT initially provided
use_schema_tools: bool = True
use_functions_api: bool = True
use_tools: bool = False
# whether the agent is used in a continuous chat with user,
# as opposed to returning a result from the task.run()
chat_mode: bool = False
addressing_prefix: str = ""
⋮----
class Neo4jChatAgent(ChatAgent)
⋮----
def __init__(self, config: Neo4jChatAgentConfig)
⋮----
"""Initialize the Neo4jChatAgent.

        Raises:
            ValueError: If database information is not provided in the config.
        """
⋮----
def init_state(self) -> None
⋮----
"""
        When LLM sends a no-tool msg, assume user is the intended recipient,
        and if in interactive mode, forward the msg to the user.
        """
⋮----
done_tool_name = DoneTool.default_value("request")
forward_tool_name = ForwardTool.default_value("request")
⋮----
def _validate_config(self) -> None
⋮----
"""Validate the configuration to ensure all necessary fields are present."""
⋮----
def _import_neo4j(self) -> None
⋮----
"""Dynamically imports the Neo4j module and sets it as a global variable."""
⋮----
def _initialize_db(self) -> None
⋮----
"""
        Initializes a connection to the Neo4j database using the configuration settings.
        """
⋮----
result = session.run("MATCH (n) RETURN count(n) as count")
count = result.single()["count"]  # type: ignore
⋮----
# If database has data, get schema
⋮----
# this updates self.config.kg_schema
⋮----
def close(self) -> None
⋮----
"""close the connection"""
⋮----
def retry_query(self, e: Exception, query: str) -> str
⋮----
"""
        Generate an error message for a failed Cypher query and return it.

        Args:
            e (Exception): The exception raised during the Cypher query execution.
            query (str): The Cypher query that failed.

        Returns:
            str: The error message.
        """
⋮----
# Construct the error message
error_message_template = f"""\
⋮----
"""
        Executes a given Cypher query with parameters on the Neo4j database.

        Args:
            query (str): The Cypher query string to be executed.
            parameters (Optional[Dict[Any, Any]]): A dictionary of parameters for
                                                    the query.

        Returns:
            QueryResult: An object representing the outcome of the query execution.
        """
⋮----
result = session.run(query, parameters)
⋮----
records = [record.data() for record in result]
⋮----
error_message = self.retry_query(e, query)
⋮----
"""
        Executes a write transaction using a given Cypher query on the Neo4j database.
        This method should be used for queries that modify the database.

        Args:
            query (str): The Cypher query string to be executed.
            parameters (dict, optional): A dict of parameters for the Cypher query.

        Returns:
            QueryResult: An object representing the outcome of the query execution.
                         It contains a success flag and an optional error message.
        """
# Check if query contains database/collection creation patterns
query_upper = query.upper()
is_creation_query = any(
⋮----
# TODO: test under enterprise edition because community edition doesn't allow
# database creation/deletion
def remove_database(self) -> None
⋮----
"""Deletes all nodes and relationships from the current Neo4j database."""
delete_query = """
response = self.write_query(delete_query)
⋮----
def cypher_retrieval_tool(self, msg: CypherRetrievalTool) -> str
⋮----
""" "
        Handle a CypherRetrievalTool message by executing a Cypher query and
        returning the result.
        Args:
            msg (CypherRetrievalTool): The tool-message to handle.

        Returns:
            str: The result of executing the cypher_query.
        """
⋮----
query = msg.cypher_query
⋮----
response = self.read_query(query)
⋮----
def cypher_creation_tool(self, msg: CypherCreationTool) -> str
⋮----
""" "
        Handle a CypherCreationTool message by executing a Cypher query and
        returning the result.
        Args:
            msg (CypherCreationTool): The tool-message to handle.

        Returns:
            str: The result of executing the cypher_query.
        """
⋮----
response = self.write_query(query)
⋮----
# TODO: There are various ways to get the schema. The current one uses the func
# `read_query`, which requires post processing to identify whether the response upon
# the schema query is valid. Another way is to isolate this func from `read_query`.
# The current query works well. But we could use the queries here:
# https://github.com/neo4j/NaLLM/blob/1af09cd117ba0777d81075c597a5081583568f9f/api/
# src/driver/neo4j.py#L30
⋮----
"""
        Retrieves the schema of a Neo4j graph database.

        Args:
            msg (GraphSchemaTool): An instance of GraphDatabaseSchema, typically
            containing information or parameters needed for the database query.

        Returns:
            str: The visual representation of the database schema as a string, or a
            message stating that the database schema is empty or not valid.

        Raises:
            This function does not explicitly raise exceptions but depends on the
            behavior of 'self.read_query' method, which might raise exceptions related
             to database connectivity or query execution.
        """
⋮----
schema_result = self.read_query("CALL db.schema.visualization()")
⋮----
# there is a possibility that the schema is empty, which is a valid response
# the schema.data will be: [{"nodes": [], "relationships": []}]
self.config.kg_schema = schema_result.data  # type: ignore
⋮----
def _init_tools_sys_message(self) -> None
⋮----
"""Initialize message tools used for chatting."""
⋮----
message = self._format_message()
⋮----
# Note we are enabling GraphSchemaTool regardless of whether
# self.config.use_schema_tools is True or False, because
# even when schema provided, the agent may later want to get the schema,
# e.g. if the db evolves, or if it needs to bring in the schema
⋮----
def _format_message(self) -> str
</file>

<file path="langroid/agent/special/neo4j/system_messages.py">
done_tool_name = DoneTool.default_value("request")
⋮----
graph_schema_tool_description = f"""
⋮----
cypher_retrieval_tool_description = f"""
⋮----
cypher_creation_tool_description = f"""
⋮----
cypher_query_instructions = """
⋮----
# sys msg to use when schema already provided initially,
# so agent does not need to use schema tool, at least initially,
# but may do so later if the db evolves, or if needs to bring in the schema
# to more recent context.
SCHEMA_PROVIDED_SYS_MSG = f"""You are a data scientist and expert in Knowledge Graphs,
⋮----
# sys msg to use when schema is not initially provided,
# and we want agent to use schema tool to get schema
SCHEMA_TOOLS_SYS_MSG = f"""You are a data scientist and expert in Knowledge Graphs,
⋮----
DEFAULT_NEO4J_CHAT_SYSTEM_MESSAGE = f"""
⋮----
ADDRESSING_INSTRUCTION = """
⋮----
DONE_INSTRUCTION = f"""
</file>

<file path="langroid/agent/special/neo4j/tools.py">
class CypherRetrievalTool(ToolMessage)
⋮----
request: str = "cypher_retrieval_tool"
purpose: str = """To send the <cypher_query> to retrieve
cypher_query: str
⋮----
cypher_retrieval_tool_name = CypherRetrievalTool.default_value("request")
⋮----
class CypherCreationTool(ToolMessage)
⋮----
request: str = "cypher_creation_tool"
purpose: str = """
⋮----
cypher_creation_tool_name = CypherCreationTool.default_value("request")
⋮----
class GraphSchemaTool(ToolMessage)
⋮----
request: str = "graph_schema_tool"
purpose: str = """To get the schema of the graph database."""
⋮----
graph_schema_tool_name = GraphSchemaTool.default_value("request")
</file>

<file path="langroid/agent/special/sql/utils/__init__.py">
__all__ = [
</file>

<file path="langroid/agent/special/sql/utils/description_extractors.py">
"""
    Extracts descriptions for tables and columns from a PostgreSQL database.

    This method retrieves the descriptions of tables and their columns
    from a PostgreSQL database using the provided SQLAlchemy engine.

    Args:
        engine (Engine): SQLAlchemy engine connected to a PostgreSQL database.
        multi_schema (bool): Generate descriptions for all schemas in the database.

    Returns:
        Dict[str, Dict[str, Any]]: A dictionary mapping table names to a
        dictionary containing the table description and a dictionary of
        column descriptions.
    """
inspector = inspect(engine)
result: Dict[str, Dict[str, Any]] = {}
⋮----
def gen_schema_descriptions(schema: Optional[str] = None) -> None
⋮----
table_names: List[str] = inspector.get_table_names(schema=schema)
⋮----
table_name = table
⋮----
table_name = f"{schema}.{table}"
⋮----
table_comment = (
⋮----
columns = {}
col_data = inspector.get_columns(table, schema=schema)
⋮----
col_comment = (
⋮----
"""Extracts descriptions for tables and columns from a MySQL database.

    This method retrieves the descriptions of tables and their columns
    from a MySQL database using the provided SQLAlchemy engine.

    Args:
        engine (Engine): SQLAlchemy engine connected to a MySQL database.
        multi_schema (bool): Generate descriptions for all schemas in the database.

    Returns:
        Dict[str, Dict[str, Any]]: A dictionary mapping table names to a
        dictionary containing the table description and a dictionary of
        column descriptions.
    """
⋮----
query = text(
table_result = conn.execute(
table_comment = table_result.scalar() or ""
⋮----
"""Extracts default descriptions for tables and columns from a database.

    This method retrieves the table and column names from the given database
    and associates empty descriptions with them.

    Args:
        engine (Engine): SQLAlchemy engine connected to a database.
        multi_schema (bool): Generate descriptions for all schemas in the database.

    Returns:
        Dict[str, Dict[str, Any]]: A dictionary mapping table names to a
        dictionary containing an empty table description and a dictionary of
        empty column descriptions.
    """
⋮----
"""
    Extracts the schema descriptions from the database connected to by the engine.

    Args:
        engine (Engine): SQLAlchemy engine instance.
        multi_schema (bool): Generate descriptions for all schemas in the database.

    Returns:
        Dict[str, Dict[str, Any]]: A dictionary representation of table and column
        descriptions.
    """
⋮----
extractors = {
</file>

<file path="langroid/agent/special/sql/utils/populate_metadata.py">
"""
    Extracts information from an SQLAlchemy database's metadata and combines it
    with another dictionary with context descriptions.

    Args:
        metadata (MetaData): SQLAlchemy metadata object of the database.
        info (Dict[str, Dict[str, Any]]): A dictionary with table and column
                                             descriptions.

    Returns:
        Dict[str, Dict[str, Any]]: A dictionary with table and context information.
    """
db_info: Dict[str, Dict[str, Union[str, Dict[str, str]]]] = {}
⋮----
def populate_metadata(md: MetaData) -> None
⋮----
# Create empty metadata dictionary with column datatypes
⋮----
# Populate tables with empty descriptions
⋮----
# Populate columns with datatype
db_info[table_name]["columns"][str(column.name)] = (  # type: ignore
⋮----
"""
    Populate metadata based on the provided database metadata and additional info.

    Args:
        metadata (MetaData): Metadata object from SQLAlchemy.
        info (Dict): Additional information for database tables and columns.

    Returns:
        Dict: A dictionary containing populated metadata information.
    """
# Fetch basic metadata info using available tools
db_info: Dict[str, Dict[str, Union[str, Dict[str, str]]]] = (
⋮----
# Iterate over tables to update column metadata
⋮----
# Update only if additional info for the table exists
⋮----
# Merge and update column description if available
⋮----
db_info[table_name]["columns"][column_name] = (  # type: ignore
db_info[table_name]["columns"][column_name]  # type: ignore
⋮----
+ info[table_name]["columns"][column_name]  # type: ignore
</file>

<file path="langroid/agent/special/sql/utils/system_message.py">
DEFAULT_SYS_MSG = """You are a savvy data scientist/database administrator,
⋮----
SCHEMA_TOOLS_SYS_MSG = """You are a savvy data scientist/database administrator,
</file>

<file path="langroid/agent/special/sql/utils/tools.py">
class RunQueryTool(ToolMessage)
⋮----
request: str = "run_query"
purpose: str = """
query: str
⋮----
@classmethod
    def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]
⋮----
class GetTableNamesTool(ToolMessage)
⋮----
request: str = "get_table_names"
⋮----
class GetTableSchemaTool(ToolMessage)
⋮----
request: str = "get_table_schema"
⋮----
tables: List[str]
⋮----
@classmethod
    def example(cls) -> "GetTableSchemaTool"
⋮----
class GetColumnDescriptionsTool(ToolMessage)
⋮----
request: str = "get_column_descriptions"
⋮----
table: str
columns: str
⋮----
@classmethod
    def example(cls) -> "GetColumnDescriptionsTool"
</file>

<file path="langroid/agent/special/sql/__init__.py">
__all__ = [
</file>

<file path="langroid/agent/special/sql/sql_chat_agent.py">
"""
Agent that allows interaction with an SQL database using SQLAlchemy library. 
The agent can execute SQL queries in the database and return the result. 

Functionality includes:
- adding table and column context
- asking a question about a SQL schema
"""
⋮----
logger = logging.getLogger(__name__)
⋮----
console = Console()
⋮----
DEFAULT_SQL_CHAT_SYSTEM_MESSAGE = """
⋮----
ADDRESSING_INSTRUCTION = """
⋮----
DONE_INSTRUCTION = f"""
⋮----
SQL_ERROR_MSG = "There was an error in your SQL Query"
⋮----
class SQLChatAgentConfig(ChatAgentConfig)
⋮----
system_message: str = DEFAULT_SQL_CHAT_SYSTEM_MESSAGE
user_message: None | str = None
cache: bool = True  # cache results
debug: bool = False
use_helper: bool = True
is_helper: bool = False
stream: bool = True  # allow streaming where needed
database_uri: str = ""  # Database URI
database_session: None | Session = None  # Database session
vecdb: None | VectorStoreConfig = None
context_descriptions: Dict[str, Dict[str, Union[str, Dict[str, str]]]] = {}
use_schema_tools: bool = False
multi_schema: bool = False
# whether the agent is used in a continuous chat with user,
# as opposed to returning a result from the task.run()
chat_mode: bool = False
addressing_prefix: str = ""
max_result_rows: int | None = None  # limit query results to this
max_retained_tokens: int | None = None  # limit history of query results to this
⋮----
"""
    Optional, but strongly recommended, context descriptions for tables, columns, 
    and relationships. It should be a dictionary where each key is a table name 
    and its value is another dictionary. 

    In this inner dictionary:
    - The 'description' key corresponds to a string description of the table.
    - The 'columns' key corresponds to another dictionary where each key is a 
    column name and its value is a string description of that column.
    - The 'relationships' key corresponds to another dictionary where each key 
    is another table name and the value is a description of the relationship to 
    that table.

    If multi_schema support is enabled, the tables names in the description
    should be of the form 'schema_name.table_name'.

    For example:
    {
        'table1': {
            'description': 'description of table1',
            'columns': {
                'column1': 'description of column1 in table1',
                'column2': 'description of column2 in table1'
            }
        },
        'table2': {
            'description': 'description of table2',
            'columns': {
                'column3': 'description of column3 in table2',
                'column4': 'description of column4 in table2'
            }
        }
    }
    """
⋮----
class SQLChatAgent(ChatAgent)
⋮----
"""
    Agent for chatting with a SQL database
    """
⋮----
used_run_query: bool = False
llm_responded: bool = False
⋮----
def __init__(self, config: "SQLChatAgentConfig") -> None
⋮----
"""Initialize the SQLChatAgent.

        Raises:
            ValueError: If database information is not provided in the config.
        """
⋮----
# Caution - this updates the self.config.system_message!
⋮----
# helper_config.system_message is now the fully-populated sys msg of
# the main SQLAgent.
⋮----
def _validate_config(self, config: "SQLChatAgentConfig") -> None
⋮----
"""Validate the configuration to ensure all necessary fields are present."""
⋮----
def _init_database(self) -> None
⋮----
"""Initialize the database engine and session."""
⋮----
def _init_metadata(self) -> None
⋮----
"""Initialize the database metadata."""
⋮----
inspector = inspect(self.engine)
⋮----
metadata = MetaData(schema=schema)
⋮----
def _init_table_metadata(self) -> None
⋮----
"""Initialize metadata for the tables present in the database."""
⋮----
def _init_system_message(self) -> None
⋮----
"""Initialize the system message."""
message = self._format_message()
⋮----
def _init_tools(self) -> None
⋮----
"""Initialize sys msg and tools."""
# Create a custom RunQueryTool class with the desired max_retained_tokens
⋮----
class CustomRunQueryTool(RunQueryTool)
⋮----
_max_retained_tokens = self.config.max_retained_tokens
⋮----
def _format_message(self) -> str
⋮----
"""Format the system message based on the engine and table metadata."""
⋮----
def _enable_schema_tools(self) -> None
⋮----
"""Enable tools for schema-related functionalities."""
⋮----
def _clarify_answer_instruction(self) -> str
⋮----
"""
        Prompt to use when asking LLM to clarify intent of
        an already-generated response
        """
⋮----
def _clarifying_message(self) -> str
⋮----
tools_instruction = f"""
⋮----
"""
        We'd end up here if the current msg has no tool.
        If this is from LLM, we may need to handle the scenario where
        it may have "forgotten" to generate a tool.
        """
⋮----
# send any Non-tool msg to the user
⋮----
# Agent intent not clear => use the helper agent to
# do what this agent should have done, e.g. generate tool, etc.
# This is likelier to succeed since this agent has no "baggage" of
# prior conversation, other than the system msg, and special
# "Intent-interpretation" instructions.
⋮----
AnyTool = self._get_any_tool_message(optional=False)
⋮----
recovery_message = self._strict_recovery_instructions(
result = self.llm_response(recovery_message)
# remove the recovery_message (it has User role) from the chat history,
# else it may cause the LLM to directly use the AnyTool.
self.delete_last_message(role=Role.USER)  # delete last User-role msg
⋮----
response = self.helper_agent.llm_response(message)
tools = self.try_get_tool_messages(response)
⋮----
# fall back on the clarification message
⋮----
def retry_query(self, e: Exception, query: str) -> str
⋮----
"""
        Generate an error message for a failed SQL query and return it.

        Parameters:
        e (Exception): The exception raised during the SQL query execution.
        query (str): The SQL query that failed.

        Returns:
        str: The error message.
        """
⋮----
# Optional part to be included based on `use_schema_tools`
optional_schema_description = ""
⋮----
optional_schema_description = f"""\
⋮----
# Construct the error message
error_message_template = f"""\
⋮----
def _available_tool_names(self) -> str
⋮----
def _tool_result_llm_answer_prompt(self) -> str
⋮----
"""
        Prompt to use at end of tool result,
        to guide LLM, for the case where it wants to answer the user's query
        """
⋮----
def run_query(self, msg: RunQueryTool) -> str
⋮----
"""
        Handle a RunQueryTool message by executing a SQL query and returning the result.

        Args:
            msg (RunQueryTool): The tool-message to handle.

        Returns:
            str: The result of executing the SQL query.
        """
query = msg.query
session = self.Session
⋮----
query_result = session.execute(text(query))
⋮----
# attempt to fetch results: should work for normal SELECT queries
rows = query_result.fetchall()
n_rows = len(rows)
⋮----
rows = rows[: self.config.max_result_rows]
⋮----
response_message = self._format_rows(rows)
⋮----
# If we get here, it's a non-SELECT query (UPDATE, INSERT, DELETE)
affected_rows = query_result.rowcount  # type: ignore
response_message = f"""
⋮----
response_message = self.retry_query(e, query)
⋮----
final_message = f"""
⋮----
def _format_rows(self, rows: Sequence[Row[Any]]) -> str
⋮----
"""
        Format the rows fetched from the query result into a string.

        Args:
            rows (list): List of rows fetched from the query result.

        Returns:
            str: Formatted string representation of rows.
        """
# TODO: UPDATE FORMATTING
⋮----
def get_table_names(self, msg: GetTableNamesTool) -> str
⋮----
"""
        Handle a GetTableNamesTool message by returning the names of all tables in the
        database.

        Returns:
            str: The names of all tables in the database.
        """
⋮----
table_names = [", ".join(md.tables.keys()) for md in self.metadata]
⋮----
def get_table_schema(self, msg: GetTableSchemaTool) -> str
⋮----
"""
        Handle a GetTableSchemaTool message by returning the schema of all provided
        tables in the database.

        Returns:
            str: The schema of all provided tables in the database.
        """
tables = msg.tables
result = ""
⋮----
table = self.table_metadata.get(table_name)
⋮----
def get_column_descriptions(self, msg: GetColumnDescriptionsTool) -> str
⋮----
"""
        Handle a GetColumnDescriptionsTool message by returning the descriptions of all
        provided columns from the database.

        Returns:
            str: The descriptions of all provided columns from the database.
        """
table = msg.table
columns = msg.columns.split(", ")
result = f"\nTABLE: {table}"
descriptions = self.config.context_descriptions.get(table)
⋮----
result += f"\n{col} => {descriptions['columns'][col]}"  # type: ignore
⋮----
class SQLHelperAgent(SQLChatAgent)
⋮----
"""Set up helper sys msg"""
⋮----
# Note that self.config.system_message is already set to the
# parent SQLAgent's system_message
⋮----
# note that the initial msg in chat history will contain:
# - system message
# - tool instructions
# so the final_instructions will be at the end of this initial msg
⋮----
message_str = message if isinstance(message, str) else message.content
instruc_msg = f"""
# user response_forget to avoid accumulating the chat history
</file>

<file path="langroid/agent/special/__init__.py">
__all__ = [
</file>

<file path="langroid/agent/special/lance_doc_chat_agent.py">
"""
LanceDocChatAgent is a subclass of DocChatAgent that uses LanceDB as a vector store:
- Uses the DocChatAgentConfig.filter variable
    (a sql string) in the `where` clause to do filtered vector search.
- Overrides the get_similar_chunks_bm25() to use LanceDB FTS (Full Text Search).

For usage see:
 - `tests/main/test_lance_doc_chat_agent.py`.
 - example script `examples/docqa/lance_rag.py`.

"""
⋮----
logger = logging.getLogger(__name__)
⋮----
class LanceDocChatAgent(DocChatAgent)
⋮----
vecdb: LanceDB
⋮----
def __init__(self, cfg: DocChatAgentConfig)
⋮----
def _get_clean_vecdb_schema(self) -> str
⋮----
"""Get a cleaned schema of the vector-db, to pass to the LLM
        as part of instructions on how to generate a SQL filter."""
⋮----
tbl_pandas = (
⋮----
filterable_fields = tbl_pandas.columns.tolist()
# drop id, vector, metadata.id, metadata.window_ids, metadata.is_chunk
filterable_fields = list(
⋮----
filter_fields_set = set(self.config.filter_fields)
⋮----
# remove 'content' from filter_fields_set, even if it's not in filter_fields_set
⋮----
# possible values of filterable fields
filter_field_values = self.get_field_values(list(filter_fields_set))
⋮----
schema_dict: Dict[str, Dict[str, Any]] = dict(
# add field values to schema_dict as another field `values` for each field
⋮----
dtype = tbl_pandas[field].dtype.name
⋮----
# if self.config.filter_fields is set, restrict to these:
⋮----
schema_dict = {
schema = json.dumps(schema_dict, indent=4)
⋮----
def query_plan(self, msg: QueryPlanTool) -> AgentDoneTool | str
⋮----
"""
        Handle the LLM's use of the FilterTool.
        Temporarily set the config filter and either return the final answer
        in case there's a dataframe_calc, or return the rephrased query
        so the LLM can handle it.
        """
# create document-subset based on this filter
plan = msg.plan
⋮----
# say DONE with err msg so it goes back to LanceFilterAgent
⋮----
# update the filter so it is used in the DocChatAgent
⋮----
# we just get relevant docs then do the calculation
# TODO if calc causes err, it is captured in result,
# and LLM can correct the calc based on the err,
# and this will cause retrieval all over again,
# which may be wasteful if only the calc part is wrong.
# The calc step can later be done with a separate Agent/Tool.
⋮----
# no query to match, so just get all docs matching filter
docs = self.vecdb.get_all_documents(plan.filter)
⋮----
answer = self.vecdb.compute_from_docs(docs, plan.dataframe_calc)
⋮----
# pass on the query so LLM can handle it
response = self.llm_response(plan.query)
answer = NO_ANSWER if response is None else response.content
⋮----
n = super().ingest_docs(docs, split, metadata)
tbl = self.vecdb.client.open_table(self.vecdb.config.collection_name)
# We assume "content" is available as top-level field
⋮----
"""Ingest from a dataframe. Assume we are doing this once, not incrementally"""
⋮----
n = df.shape[0]
⋮----
# If any additional fields need to be added to content,
# add them as key=value pairs, into the `content` field for all rows.
# This helps retrieval for table-like data.
# Note we need to do this at stage so that the embeddings
# are computed on the full content with these additional fields.
fields = [f for f in self.config.add_fields_to_content if f in df.columns]
⋮----
# We still need to do the below so that
# other types of searches in DocChatAgent
# can work, as they require Document objects
docs = dataframe_to_documents(df, content="content", metadata=metadata)
⋮----
# mark each doc as already-chunked so we don't try to split them further
# TODO later we may want to split large text-columns
⋮----
return n  # type: ignore
⋮----
"""
        Override the DocChatAgent.get_similar_chunks_bm25()
        to use LanceDB FTS (Full Text Search).
        """
# Clean up query: replace all newlines with spaces in query,
# force special search keywords to lower case, remove quotes,
# so it's not interpreted as search syntax
query_clean = (
⋮----
result = (
docs = self.vecdb._lance_result_to_docs(result)
scores = [r["score"] for r in result.to_list()]
</file>

<file path="langroid/agent/special/lance_tools.py">
logger = logging.getLogger(__name__)
⋮----
class QueryPlan(BaseModel)
⋮----
original_query: str = Field(..., description="The original query for reference")
query: str = Field(..., description="A possibly NON-EMPTY rephrased query")
filter: str = Field(
dataframe_calc: str = Field(
⋮----
class QueryPlanTool(ToolMessage)
⋮----
request: str = "query_plan"  # the agent method name that handles this tool
purpose: str = """
plan: QueryPlan
⋮----
class AnswerTool(ToolMessage)
⋮----
"""Wrapper for answer from LanceDocChatAgent"""
⋮----
purpose: str = "To package the answer from LanceDocChatAgent"
request: str = "answer_tool"
answer: str
⋮----
class QueryPlanAnswerTool(ToolMessage)
⋮----
request: str = "query_plan_answer"  # the agent method name that handles this tool
⋮----
answer: str = Field(..., description="The answer received from the assistant")
⋮----
class QueryPlanFeedbackTool(ToolMessage)
⋮----
request: str = "query_plan_feedback"
⋮----
feedback: str
suggested_fix: str
</file>

<file path="langroid/agent/special/retriever_agent.py">
"""
DEPRECATED: use DocChatAgent instead, with DocChatAgentConfig.retrieve_only=True,
and if you want to retrieve FULL relevant doc-contents rather than just extracts,
then set DocChatAgentConfig.extraction_granularity=-1

This is an agent to retrieve relevant extracts from a vector store,
where the LLM is used to filter for "true" relevance after retrieval from the
vector store.
This is essentially the same as DocChatAgent, except that instead of
generating final summary answer based on relevant extracts, it just returns
those extracts.
See test_retriever_agent.py for example usage.
"""
⋮----
console = Console()
logger = logging.getLogger(__name__)
⋮----
# for backwards compatibility:
RecordMetadata = DocMetaData
RecordDoc = Document
RetrieverAgentConfig = DocChatAgentConfig
⋮----
class RetrieverAgent(DocChatAgent)
⋮----
"""
    Agent for just retrieving chunks/docs/extracts matching a query
    """
⋮----
def __init__(self, config: DocChatAgentConfig)
⋮----
def get_records(self) -> Sequence[Document]
⋮----
# subclasses should override
⋮----
def ingest(self) -> None
⋮----
records = self.get_records()
</file>

<file path="langroid/agent/special/table_chat_agent.py">
"""
Agent that supports asking queries about a tabular dataset, internally
represented as a Pandas dataframe. The `TableChatAgent` is configured with a
dataset, which can be a Pandas df, file or URL. The delimiter/separator
is auto-detected. In response to a user query, the Agent's LLM generates a Pandas
expression (involving a dataframe `df`) to answer the query.
The expression is passed via the `pandas_eval` tool/function-call,
which is handled by the Agent's `pandas_eval` method. This method evaluates
the expression and returns the result as a string.

WARNING: This Agent should be used only with trusted input, as it can execute system
commands. 

The `full_eval` flag is false by default, which means that the input is sanitized
against most common code injection attack vectors. `full_eval` may be set to True to 
disable sanitization at all. Both cases should be used with caution.
"""
⋮----
logger = logging.getLogger(__name__)
⋮----
console = Console()
⋮----
DEFAULT_TABLE_CHAT_SYSTEM_MESSAGE = f"""
⋮----
@no_type_check
def dataframe_summary(df: pd.DataFrame) -> str
⋮----
"""
    Generate a structured summary for a pandas DataFrame containing numerical
    and categorical values.

    Args:
        df (pd.DataFrame): The input DataFrame to summarize.

    Returns:
        str: A nicely structured and formatted summary string.
    """
⋮----
# Column names display
col_names_str = (
⋮----
# Numerical data summary
num_summary = df.describe().map(lambda x: "{:.2f}".format(x))
num_str = "Numerical Column Summary:\n" + num_summary.to_string() + "\n\n"
⋮----
# Categorical data summary
cat_columns = df.select_dtypes(include=[np.object_]).columns
cat_summary_list = []
⋮----
unique_values = df[col].unique()
⋮----
cat_str = "Categorical Column Summary:\n" + "\n".join(cat_summary_list) + "\n\n"
⋮----
# Missing values summary
nan_summary = df.isnull().sum().rename("missing_values").to_frame()
nan_str = "Missing Values Column Summary:\n" + nan_summary.to_string() + "\n"
⋮----
# Combine the summaries into one structured string
summary_str = col_names_str + num_str + cat_str + nan_str
⋮----
class TableChatAgentConfig(ChatAgentConfig)
⋮----
system_message: str = DEFAULT_TABLE_CHAT_SYSTEM_MESSAGE
user_message: None | str = None
cache: bool = True  # cache results
debug: bool = False
stream: bool = True  # allow streaming where needed
full_eval: bool = (
⋮----
False  # runs eval without sanitization. Use only on trusted input!
⋮----
data: str | pd.DataFrame  # data file, URL, or DataFrame
separator: None | str = None  # separator for data file
vecdb: None | VectorStoreConfig = None
llm: OpenAIGPTConfig = OpenAIGPTConfig(
prompts: PromptsConfig = PromptsConfig(
⋮----
class PandasEvalTool(ToolMessage)
⋮----
"""Tool/function to evaluate a pandas expression involving a dataframe `df`"""
⋮----
request: str = "pandas_eval"
purpose: str = """
expression: str
⋮----
@classmethod
    def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]
⋮----
@classmethod
    def instructions(cls) -> str
⋮----
class TableChatAgent(ChatAgent)
⋮----
"""
    Agent for chatting with a collection of documents.
    """
⋮----
sent_expression: bool = False
⋮----
def __init__(self, config: TableChatAgentConfig)
⋮----
df = config.data
⋮----
df = read_tabular_data(config.data, config.separator)
⋮----
summary = dataframe_summary(df)
⋮----
# enable the agent to use and handle the PandasEvalTool
⋮----
response = super().user_response(msg)
⋮----
def pandas_eval(self, msg: PandasEvalTool) -> str
⋮----
"""
        Handle a PandasEvalTool message by evaluating the `expression` field
            and returning the result.
        Args:
            msg (PandasEvalTool): The tool-message to handle.

        Returns:
            str: The result of running the code along with any print output.
        """
⋮----
exprn = msg.expression
vars = {"df": self.df}
# Create a string-based I/O stream
code_out = io.StringIO()
⋮----
# Temporarily redirect standard output to our string-based I/O stream
⋮----
# Evaluate the last line and get the result;
# SECURITY MITIGATION: Eval input is sanitized by default to prevent most
# common code injection attack vectors.
⋮----
exprn = sanitize_command(exprn)
code = compile(exprn, "<calc>", "eval")
eval_result = eval(code, vars, {})
⋮----
eval_result = f"ERROR: {type(e)}: {e}"
⋮----
eval_result = ""
⋮----
# Always restore the original standard output
⋮----
# If df has been modified in-place, save the changes back to self.df
⋮----
# Get the resulting string from the I/O stream
print_result = code_out.getvalue() or ""
sep = "\n" if print_result else ""
# Combine the print and eval results
result = f"{print_result}{sep}{eval_result}"
⋮----
result = "No result"
# Return the result
⋮----
"""Handle various LLM deviations"""
⋮----
# LLM sent an expression (i.e. used the `pandas_eval` tool)
# but upon receiving the results, simply said DONE without
# narrating the result as instructed.
⋮----
# LLM forgot to say DONE
⋮----
# LLM forgot to use the `pandas_eval` tool
</file>

<file path="langroid/agent/tools/mcp/__init__.py">
__all__ = [
</file>

<file path="langroid/agent/tools/__init__.py">
__all__ = [
</file>

<file path="langroid/agent/tools/duckduckgo_search_tool.py">
"""
A tool to trigger a DuckDuckGo search for a given query, and return the top results with
their titles, links, summaries. Since the tool is stateless (i.e. does not need
access to agent state), it can be enabled for any agent, without having to define a
special method inside the agent: `agent.enable_message(DuckduckgoSearchTool)`
"""
⋮----
class DuckduckgoSearchTool(ToolMessage)
⋮----
request: str = "duckduckgo_search"
purpose: str = """
query: str
num_results: int
⋮----
def handle(self) -> str
⋮----
"""
        Conducts a search using DuckDuckGo based on the provided query
        and number of results by triggering a duckduckgo_search.

        Returns:
            str: A formatted string containing the titles, links, and
                summaries of each search result, separated by two newlines.
        """
search_results = duckduckgo_search(self.query, self.num_results)
# return Title, Link, Summary of each result, separated by two newlines
results_str = "\n\n".join(str(result) for result in search_results)
⋮----
@classmethod
    def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]
</file>

<file path="langroid/agent/tools/exa_search_tool.py">
"""
A tool to trigger a Exa search for a given query,
(https://docs.exa.ai/reference/getting-started)
and return the top results with their titles, links, summaries.
Since the tool is stateless (i.e. does not need
access to agent state), it can be enabled for any agent, without having to define a
special method inside the agent: `agent.enable_message(ExaSearchTool)`

NOTE: To use this tool, you need to:

* set the EXA_API_KEY environment variables in
your `.env` file, e.g. `EXA_API_KEY=your_api_key_here`
(Note as of 28 Jan 2023, Metaphor renamed to Exa, so you can also use
`EXA_API_KEY=your_api_key_here`)

* install langroid with the `exa-py` extra, e.g.
`pip install langroid[exa]` or `uv pip install langroid[exa]`
or `poetry add langroid[exa]`  or `uv add langroid[exa]`
(it installs the `exa_py` package from pypi).

For more information, please refer to the official docs:
https://exa.ai/
"""
⋮----
class ExaSearchTool(ToolMessage)
⋮----
request: str = "exa_search"
purpose: str = """
query: str
num_results: int
⋮----
def handle(self) -> str
⋮----
"""
        Conducts a search using the exa API based on the provided query
        and number of results by triggering a exa_search.

        Returns:
            str: A formatted string containing the titles, links, and
                summaries of each search result, separated by two newlines.
        """
⋮----
search_results = exa_search(self.query, self.num_results)
# return Title, Link, Summary of each result, separated by two newlines
results_str = "\n\n".join(str(result) for result in search_results)
⋮----
@classmethod
    def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]
</file>

<file path="langroid/agent/tools/file_tools.py">
class ReadFileTool(ToolMessage)
⋮----
request: str = "read_file_tool"
purpose: str = "Read the contents of a <file_path>"
file_path: str
⋮----
_line_nums: bool = True  # whether to add line numbers to the content
_curr_dir: Callable[[], str] | None = None
⋮----
"""
        Create a subclass of ReadFileTool for a specific directory

        Args:
            get_curr_dir (callable): A function that returns the current directory.

        Returns:
            Type[ReadFileTool]: A subclass of the ReadFileTool class, specifically
                for the current directory.
        """
⋮----
class CustomReadFileTool(cls):  # type: ignore
⋮----
_curr_dir: Callable[[], str] | None = (
⋮----
@classmethod
    def examples(cls) -> List[ToolMessage | tuple[str, ToolMessage]]
⋮----
def handle(self) -> str
⋮----
# return contents as str for LLM to read
# ASSUME: file_path should be relative to the curr_dir
⋮----
dir = (self._curr_dir and self._curr_dir()) or Path.cwd()
⋮----
# if file doesn't exist, return an error message
content = read_file(self.file_path, self._line_nums)
line_num_str = ""
⋮----
line_num_str = "(Line numbers added for reference only!)"
⋮----
class WriteFileTool(XMLToolMessage)
⋮----
request: str = "write_file_tool"
purpose: str = """
⋮----
file_path: str = Field(..., description="The path to the file to write the content")
⋮----
language: str = Field(
content: str = Field(
⋮----
},  # preserve the content as is; uses CDATA section in XML
⋮----
_git_repo: Callable[[], git.Repo] | None = None
_commit_message: str = "Agent write file tool"
⋮----
"""
        Create a subclass of WriteFileTool with the current directory and git repo.

        Args:
            get_curr_dir (callable): A function that returns the current directory.
            get_git_repo (callable): A function that returns the git repo.

        Returns:
            Type[WriteFileTool]: A subclass of the WriteFileTool class, specifically
                for the current directory and git repo.
        """
⋮----
class CustomWriteFileTool(cls):  # type: ignore
⋮----
_git_repo: Callable[[], str] | None = (
⋮----
@classmethod
    def examples(cls) -> List[ToolMessage | Tuple[str, ToolMessage]]
⋮----
curr_dir = (self._curr_dir and self._curr_dir()) or Path.cwd()
⋮----
msg = f"Content written to {self.file_path}"
# possibly commit the file
⋮----
class ListDirTool(ToolMessage)
⋮----
request: str = "list_dir_tool"
purpose: str = "List the contents of a <dir_path>"
dir_path: str
⋮----
"""
        Create a subclass of ListDirTool for a specific directory

        Args:
            get_curr_dir (callable): A function that returns the current directory.

        Returns:
            Type[ReadFileTool]: A subclass of the ReadFileTool class, specifically
                for the current directory.
        """
⋮----
class CustomListDirTool(cls):  # type: ignore
⋮----
# ASSUME: dir_path should be relative to the curr_dir_path
⋮----
contents = list_dir(self.dir_path)
⋮----
contents_str = "\n".join(contents)
</file>

<file path="langroid/agent/tools/google_search_tool.py">
"""
A tool to trigger a Google search for a given query, and return the top results with
their titles, links, summaries. Since the tool is stateless (i.e. does not need
access to agent state), it can be enabled for any agent, without having to define a
special method inside the agent: `agent.enable_message(GoogleSearchTool)`

NOTE: Using this tool requires setting the GOOGLE_API_KEY and GOOGLE_CSE_ID
environment variables in your `.env` file, as explained in the
[README](https://github.com/langroid/langroid#gear-installation-and-setup).
"""
⋮----
class GoogleSearchTool(ToolMessage)
⋮----
request: str = "web_search"
purpose: str = """
query: str
num_results: int
⋮----
def handle(self) -> str
⋮----
search_results = google_search(self.query, self.num_results)
# return Title, Link, Summary of each result, separated by two newlines
⋮----
@classmethod
    def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]
</file>

<file path="langroid/agent/tools/metaphor_search_tool.py">
"""
A tool to trigger a Metaphor search for a given query,
(https://docs.exa.ai/reference/getting-started)
and return the top results with their titles, links, summaries.
Since the tool is stateless (i.e. does not need
access to agent state), it can be enabled for any agent, without having to define a
special method inside the agent: `agent.enable_message(MetaphorSearchTool)`

NOTE: To use this tool, you need to:

* set the METAPHOR_API_KEY environment variables in
your `.env` file, e.g. `METAPHOR_API_KEY=your_api_key_here`
(Note as of 28 Jan 2023, Metaphor renamed to Exa, so you can also use
`EXA_API_KEY=your_api_key_here`)

* install langroid with the `metaphor` extra, e.g.
`pip install langroid[metaphor]` or `uv pip install langroid[metaphor]` 
or `poetry add langroid[metaphor]`  or `uv add langroid[metaphor]`
(it installs the `metaphor-python` package from pypi).

For more information, please refer to the official docs:
https://metaphor.systems/
"""
⋮----
class MetaphorSearchTool(ToolMessage)
⋮----
request: str = "metaphor_search"
purpose: str = """
query: str
num_results: int
⋮----
def handle(self) -> str
⋮----
"""
        Conducts a search using the metaphor API based on the provided query
        and number of results by triggering a metaphor_search.

        Returns:
            str: A formatted string containing the titles, links, and
                summaries of each search result, separated by two newlines.
        """
⋮----
search_results = metaphor_search(self.query, self.num_results)
# return Title, Link, Summary of each result, separated by two newlines
results_str = "\n\n".join(str(result) for result in search_results)
⋮----
@classmethod
    def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]
</file>

<file path="langroid/agent/tools/orchestration.py">
"""
Various tools to for agents to be able to control flow of Task, e.g.
termination, routing to another agent, etc.
"""
⋮----
class AgentDoneTool(ToolMessage)
⋮----
"""Tool for AGENT entity (i.e. agent_response or downstream tool handling fns) to
    signal the current task is done."""
⋮----
purpose: str = """
request: str = "agent_done_tool"
content: Any = None
tools: List[ToolMessage] = []
# only meant for agent_response or tool-handlers, not for LLM generation:
_allow_llm_use: bool = False
⋮----
def response(self, agent: ChatAgent) -> ChatDocument
⋮----
content_str = "" if self.content is None else to_string(self.content)
⋮----
class DoneTool(ToolMessage)
⋮----
"""Tool for Agent Entity (i.e. agent_response) or LLM entity (i.e. llm_response) to
    signal the current task is done, with some content as the result."""
⋮----
request: str = "done_tool"
content: str = ""
⋮----
@field_validator("content", mode="before")
@classmethod
    def convert_content_to_string(cls, v: Any) -> str
⋮----
"""Convert content to string if it's not already."""
⋮----
@classmethod
    def instructions(cls) -> str
⋮----
tool_name = cls.default_value("request")
⋮----
class ResultTool(ToolMessage)
⋮----
"""Class to use as a wrapper for sending arbitrary results from an Agent's
    agent_response or tool handlers, to:
    (a) trigger completion of the current task (similar to (Agent)DoneTool), and
    (b) be returned as the result of the current task, i.e. this tool would appear
         in the resulting ChatDocument's `tool_messages` list.
    See test_tool_handlers_and_results in test_tool_messages.py, and
    examples/basic/tool-extract-short-example.py.

    Note:
        - when defining a tool handler or agent_response, you can directly return
            ResultTool(field1 = val1, ...),
            where the values can be arbitrary data structures, including nested
            Pydantic objs, or you can define a subclass of ResultTool with the
            fields you want to return.
        - This is a special ToolMessage that is NOT meant to be used or handled
            by an agent.
        - AgentDoneTool is more restrictive in that you can only send a `content`
            or `tools` in the result.
    """
⋮----
request: str = "result_tool"
purpose: str = "Ignored; Wrapper for a structured message"
id: str = ""  # placeholder for OpenAI-API tool_call_id
⋮----
model_config = ConfigDict(
⋮----
def handle(self) -> AgentDoneTool
⋮----
class FinalResultTool(ToolMessage)
⋮----
"""Class to use as a wrapper for sending arbitrary results from an Agent's
    agent_response or tool handlers, to:
    (a) trigger completion of the current task as well as all parent tasks, and
    (b) be returned as the final result of the root task, i.e. this tool would appear
         in the final ChatDocument's `tool_messages` list.
    See test_tool_handlers_and_results in test_tool_messages.py, and
    examples/basic/chat-tool-function.py.

    Note:
        - when defining a tool handler or agent_response, you can directly return
            FinalResultTool(field1 = val1, ...),
            where the values can be arbitrary data structures, including nested
            Pydantic objs, or you can define a subclass of FinalResultTool with the
            fields you want to return.
        - This is a special ToolMessage that is NOT meant to be used by an agent's
            llm_response, but only by agent_response or tool handlers.
        - A subclass of this tool can be defined, with specific fields, and
          with _allow_llm_use = True, to allow the LLM to generate this tool,
          and have the effect of terminating the current and all parent tasks,
          with the tool appearing in the final ChatDocument's `tool_messages` list.
          See examples/basic/multi-agent-return-result.py.
    """
⋮----
request: str = ""
⋮----
class PassTool(ToolMessage)
⋮----
"""Tool for "passing" on the received msg (ChatDocument),
    so that an as-yet-unspecified agent can handle it.
    Similar to ForwardTool, but without specifying the recipient agent.
    """
⋮----
request: str = "pass_tool"
⋮----
def response(self, agent: ChatAgent, chat_doc: ChatDocument) -> ChatDocument
⋮----
"""When this tool is enabled for an Agent, this will result in a method
        added to the Agent with signature:
        `pass_tool(self, tool: PassTool, chat_doc: ChatDocument) -> ChatDocument:`
        """
# if PassTool is in chat_doc, pass its parent, else pass chat_doc itself
doc = chat_doc
⋮----
tools = agent.get_tool_messages(doc)
⋮----
doc = doc.parent
⋮----
new_doc = ChatDocument.deepcopy(doc)
⋮----
class DonePassTool(PassTool)
⋮----
"""Tool to signal DONE, AND Pass incoming/current msg as result.
    Similar to PassTool, except we append a DoneTool to the result tool_messages.
    """
⋮----
request: str = "done_pass_tool"
⋮----
# use PassTool to get the right ChatDocument to pass...
new_doc = PassTool.response(self, agent, chat_doc)
tools = agent.get_tool_messages(new_doc)
# ...then return an AgentDoneTool with content, tools from this ChatDocument
return AgentDoneTool(content=new_doc.content, tools=tools)  # type: ignore
⋮----
class ForwardTool(PassTool)
⋮----
"""Tool for forwarding the received msg (ChatDocument) to another agent or entity.
    Similar to PassTool, but with a specified recipient agent.
    """
⋮----
request: str = "forward_tool"
agent: str
⋮----
"""When this tool is enabled for an Agent, this will result in a method
        added to the Agent with signature:
        `forward_tool(self, tool: ForwardTool, chat_doc: ChatDocument) -> ChatDocument:`
        """
# if chat_doc contains ForwardTool, then we forward its parent ChatDocument;
# else forward chat_doc itself
⋮----
class SendTool(ToolMessage)
⋮----
"""Tool for agent or LLM to send content to a specified agent.
    Similar to RecipientTool.
    """
⋮----
request: str = "send_tool"
to: str
⋮----
@classmethod
    def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]
⋮----
class AgentSendTool(ToolMessage)
⋮----
"""Tool for Agent (i.e. agent_response) to send content or tool_messages
    to a specified agent. Similar to SendTool except that AgentSendTool is only
    usable by agent_response (or handler of another tool), to send content or
    tools to another agent. SendTool does not allow sending tools.
    """
⋮----
request: str = "agent_send_tool"
</file>

<file path="langroid/agent/tools/recipient_tool.py">
"""
The `recipient_tool` is used to send a message to a specific recipient.
Various methods from the RecipientTool and AddRecipientTool class
are inserted into the Agent as methods (see `langroid/agent/base.py`,
the method `_get_tool_list()`).

See usage examples in `tests/main/test_multi_agent_complex.py` and
`tests/main/test_recipient_tool.py`.

A simpler alternative to this tool is `SendTool`, see here:
https://github.com/langroid/langroid/blob/main/langroid/agent/tools/orchestration.py

You can also define your own XML-based variant of this tool:
https://github.com/langroid/langroid/blob/main/examples/basic/xml-tool.py
which uses XML rather than JSON, and can be more reliable than JSON,
especially with weaker LLMs.

"""
⋮----
class AddRecipientTool(ToolMessage)
⋮----
"""
    Used by LLM to add a recipient to the previous message, when it has
    forgotten to specify a recipient. This avoids having to re-generate the
    previous message (and thus saves token-cost and time).
    """
⋮----
request: str = "add_recipient"
purpose: str = (
intended_recipient: str
_saved_content: str = ""
⋮----
def response(self, agent: ChatAgent) -> ChatDocument
⋮----
"""
        Returns:
            (ChatDocument): with content set to self.content and
                metadata.recipient set to self.recipient.
        """
⋮----
recipient_request_name = RecipientTool.default_value("request")
content = f"""
⋮----
content = self.__class__._saved_content  # use class-level attrib value
# erase content since we just used it.
⋮----
# we are constructing this so it looks as it msg is from LLM
⋮----
class RecipientTool(ToolMessage)
⋮----
"""
    Used by LLM to send a message to a specific recipient.

    Useful in cases where an LLM is talking to 2 or more
    agents (or an Agent and human user), and needs to specify which agent (task)
    its message is intended for. The recipient name should be the name of a task
    (which is normally the name of the agent that the task wraps, although the task
    can have its own name).

    To use this tool/function-call, LLM must generate a JSON structure
    with these fields:
    {
        "request": "recipient_message", # also the function name when using fn-calling
        "intended_recipient": <name_of_recipient_task_or_entity>,
        "content": <content>
    }
    The effect of this is that `content` will be sent to the `intended_recipient` task.
    """
⋮----
request: str = "recipient_message"
purpose: str = "To send message <content> to a specific <intended_recipient>."
⋮----
content: str
⋮----
@classmethod
    def create(cls, recipients: List[str], default: str = "") -> Type["RecipientTool"]
⋮----
"""Create a restricted version of RecipientTool that
        only allows certain recipients, and possibly sets a default recipient."""
⋮----
class RecipientToolRestricted(cls):  # type: ignore
⋮----
allowed_recipients: ClassVar[List[str]] = recipients
default_recipient: ClassVar[str] = default
⋮----
@classmethod
    def instructions(cls) -> str
⋮----
"""
        Generate instructions for using this tool/function.
        These are intended to be appended to the system message of the LLM.
        """
recipients = []
⋮----
recipients = cls.default_value("allowed_recipients")
⋮----
recipients_str = ", ".join(recipients)
⋮----
def response(self, agent: ChatAgent) -> str | ChatDocument
⋮----
"""
        When LLM has correctly used this tool,
        construct a ChatDocument with an explicit recipient,
        and make it look like it is from the LLM.

        Returns:
            (ChatDocument): with content set to self.content and
                metadata.recipient set to self.intended_recipient.
        """
default_recipient = self.__class__.default_value("default_recipient")
⋮----
# save the content as a class-variable, so that
# we can construct the ChatDocument once the LLM specifies a recipient.
# This avoids having to re-generate the entire message, saving time + cost.
⋮----
# we are constructing this so it looks as if msg is from LLM
⋮----
"""
        Response of agent if this tool is not used, e.g.
        the LLM simply sends a message without using this tool.
        This method has two purposes:
        (a) Alert the LLM that it has forgotten to specify a recipient, and prod it
            to use the `add_recipient` tool to specify just the recipient
            (and not re-generate the entire message).
        (b) Save the content of the message in the agent's `content` field,
            so the agent can construct a ChatDocument with this content once LLM
            later specifies a recipient using the `add_recipient` tool.

        This method is used to set the agent's handle_message_fallback() method.

        Returns:
            (str): reminder to LLM to use the `add_recipient` tool.
        """
# Note: once the LLM specifies a missing recipient, the task loop
# mechanism will not allow any of the "native" responders to respond,
# since the recipient will differ from the task name.
# So if this method is called, we can be sure that the recipient has not
# been specified.
⋮----
or msg.metadata.recipient != ""  # there IS an explicit recipient
⋮----
content = msg if isinstance(msg, str) else msg.content
# save the content as a class-variable, so that
# we can construct the ChatDocument once the LLM specifies a recipient.
# This avoids having to re-generate the entire message, saving time + cost.
</file>

<file path="langroid/agent/tools/retrieval_tool.py">
class RetrievalTool(ToolMessage)
⋮----
"""
    Retrieval tool, only to be used by a DocChatAgent.
    The handler method is defined in DocChatAgent.retrieval_tool
    """
⋮----
request: str = "retrieval_tool"
purpose: str = """
query: str
num_results: int
⋮----
@classmethod
    def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]
</file>

<file path="langroid/agent/tools/rewind_tool.py">
"""
The `rewind_tool` is used to rewind to the `n`th previous Assistant message
and replace it with a new `content`. This is useful in several scenarios and
- saves token-cost + inference time,
- reduces distracting clutter in chat history, which helps improve response quality.

This is intended to mimic how a human user might use a chat interface, where they
go down a conversation path, and want to go back in history to "edit and re-submit"
a previous message, to get a better response.

See usage examples in `tests/main/test_rewind_tool.py`.
"""
⋮----
def prune_messages(agent: ChatAgent, idx: int) -> ChatDocument | None
⋮----
"""
    Clear the message history of agent, starting at index `idx`,
    taking care to first clear all dependent messages (possibly from other agents'
    message histories) that are linked to the message at `idx`, via the `child_id` field
    of the `metadata` field of the ChatDocument linked from the message at `idx`.

    Args:
        agent (ChatAgent): The agent whose message history is to be pruned.
        idx (int): The index from which to start clearing the message history.

    Returns:
        The parent ChatDocument of the ChatDocument linked from the message at `idx`,
        if it exists, else None.

    """
⋮----
chat_doc_id = agent.message_history[idx].chat_document_id
chat_doc = ChatDocument.from_id(chat_doc_id)
⋮----
parent = ChatDocument.from_id(chat_doc.metadata.parent_id)  # may be None
# We're invaliding the msg at idx,
# so starting with chat_doc, go down the child links
# and clear history of each agent, to the msg_idx
curr_doc = chat_doc
⋮----
child_agent = ChatAgent.from_id(child_doc.metadata.agent_id)
⋮----
curr_doc = child_doc
⋮----
# Clear out ObjectRegistry entries for this ChatDocuments
# and all descendants (in case they weren't already cleared above)
⋮----
# Finally, clear this agent's history back to idx,
# and replace the msg at idx with the new content
⋮----
class RewindTool(ToolMessage)
⋮----
"""
    Used by LLM to rewind (i.e. backtrack) to the `n`th Assistant message
    and replace with a new msg.
    """
⋮----
request: str = "rewind_tool"
purpose: str = """
n: int
content: str
⋮----
@classmethod
    def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]
⋮----
def response(self, agent: ChatAgent) -> str | ChatDocument
⋮----
"""
        Define the tool-handler method for this tool here itself,
        since it is a generic tool whose functionality should be the
        same for any agent.

        When LLM has correctly used this tool, rewind this agent's
        `message_history` to the `n`th assistant msg, and replace it with `content`.
        We need to mock it as if the LLM is sending this message.

        Within a multi-agent scenario, this also means that any other messages dependent
        on this message will need to be invalidated --
        so go down the chain of child messages and clear each agent's history
        back to the `msg_idx` corresponding to the child message.

        Returns:
            (ChatDocument): with content set to self.content.
        """
idx = agent.nth_message_idx_with_role(lm.Role.ASSISTANT, self.n)
⋮----
# set up a corrective message from AGENT
msg = f"""
⋮----
parent = prune_messages(agent, idx)
⋮----
# create ChatDocument with new content, to be returned as result of this tool
result_doc = agent.create_llm_response(self.content)
⋮----
# replace the message at idx with this new message
⋮----
# set the replaced doc's parent's child to this result_doc
⋮----
# first remove the this parent's child from registry
</file>

<file path="langroid/agent/tools/segment_extract_tool.py">
"""
A tool to extract segment numbers from the last user message,
containing numbered segments.

The idea is that when an LLM wants to (or is asked to) simply extract
portions of a message verbatim, it should use this tool/function to
SPECIFY what should be extracted, rather than actually extracting it.
The output will be in the form of a list of segment numbers or ranges.
This will usually be much cheaper and faster than actually writing out the extracted
text. The handler of this tool/function will then extract the text and send it back.
"""
⋮----
class SegmentExtractTool(ToolMessage)
⋮----
request: str = "extract_segments"
purpose: str = """
segment_list: str
⋮----
@classmethod
    def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]
⋮----
@classmethod
    def instructions(cls) -> str
</file>

<file path="langroid/agent/tools/task_tool.py">
"""
TaskTool: A tool that allows agents to delegate a task to a sub-agent with
    specific tools enabled.
"""
⋮----
class TaskTool(ToolMessage)
⋮----
"""
    Tool that spawns a sub-agent with specified tools to handle a task.

    The sub-agent can be given a custom name for identification in logs.
    If no name is provided, a random unique name starting with 'agent'
    will be generated.
    """
⋮----
# TODO: setting up termination conditions of sub-task needs to be improved
request: str = "task_tool"
purpose: str = """
⋮----
# Parameters for the agent tool
⋮----
system_message: Optional[str] = Field(
⋮----
prompt: str = Field(
⋮----
tools: List[str] = Field(
# TODO: ensure valid model name
model: Optional[str] = Field(
max_iterations: Optional[int] = Field(
agent_name: Optional[str] = Field(
⋮----
def _set_up_task(self, agent: ChatAgent) -> Task
⋮----
"""
        Helper method to set up a task for the sub-agent.

        Args:
            agent: The parent ChatAgent that is handling this tool
        """
# Generate a random name if not provided
agent_name = self.agent_name or f"agent-{str(uuid.uuid4())[:8]}"
⋮----
# Create chat agent config with system message if provided
# TODO: Maybe we just copy the parent agent's config and override chat_model?
#   -- but what if parent agent has a MockLMConfig?
llm_config = lm.OpenAIGPTConfig(
config = ChatAgentConfig(
⋮----
# Create the sub-agent
sub_agent = ChatAgent(config)
⋮----
# Enable the specified tools for the sub-agent
# Convert tool names to actual tool classes using parent agent's tools_map
⋮----
# Enable all tools from the parent agent:
# This is the list of all tools KNOWN (whether usable or handle-able or not)
tool_classes = []
⋮----
tool_class = agent.llm_tools_map[t]
allow_llm_use = tool_class._allow_llm_use
⋮----
allow_llm_use = allow_llm_use.default
⋮----
# No tools enabled
⋮----
# Enable only specified tools
⋮----
tool_class = agent.llm_tools_map[tool_name]
⋮----
# always enable the DoneTool to signal task completion
⋮----
# Create a non-interactive task
task = Task(sub_agent, interactive=False)
⋮----
"""

        Handle the TaskTool by creating a sub-agent with specified tools
        and running the task non-interactively.

        Args:
            agent: The parent ChatAgent that is handling this tool
            chat_doc: The ChatDocument containing this tool message
        """
⋮----
task = self._set_up_task(agent)
⋮----
# Create a ChatDocument for the prompt with parent pointer
prompt_doc = None
⋮----
prompt_doc = ChatDocument(
# Set bidirectional parent-child relationship
⋮----
# Run the task with the ChatDocument or string prompt
result = task.run(prompt_doc or self.prompt, turns=self.max_iterations or 10)
⋮----
"""
        Async method to handle the TaskTool by creating a sub-agent with specified tools
        and running the task non-interactively.

        Args:
            agent: The parent ChatAgent that is handling this tool
            chat_doc: The ChatDocument containing this tool message
        """
⋮----
# TODO eventually allow the various task setup configs,
#  including termination conditions
result = await task.run_async(
</file>

<file path="langroid/agent/tools/tavily_search_tool.py">
"""
A tool to trigger a Tavily search for a given query, and return the top results with
their titles, links, summaries. Since the tool is stateless (i.e. does not need
access to agent state), it can be enabled for any agent, without having to define a
special method inside the agent: `agent.enable_message(TavilySearchTool)`
"""
⋮----
class TavilySearchTool(ToolMessage)
⋮----
request: str = "tavily_search"
purpose: str = """
query: str
num_results: int
⋮----
def handle(self) -> str
⋮----
"""
        Conducts a search using Tavily based on the provided query
        and number of results by triggering a tavily_search.

        Returns:
            str: A formatted string containing the titles, links, and
                summaries of each search result, separated by two newlines.
        """
search_results = tavily_search(self.query, self.num_results)
# return Title, Link, Summary of each result, separated by two newlines
results_str = "\n\n".join(str(result) for result in search_results)
⋮----
@classmethod
    def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]
</file>

<file path="langroid/agent/__init__.py">
__all__ = [
</file>

<file path="langroid/agent/batch.py">
T = TypeVar("T")
U = TypeVar("U")
⋮----
class ExceptionHandling(str, Enum)
⋮----
"""Enum for exception handling options."""
⋮----
RAISE = "raise"
RETURN_NONE = "return_none"
RETURN_EXCEPTION = "return_exception"
⋮----
"""Convert legacy boolean handle_exceptions to ExceptionHandling enum."""
⋮----
"""
    Unified batch processing logic for both agent methods and tasks.

    Args:
        inputs: Iterable of inputs to process
        do_task: Task execution function that takes (input, index) and returns result
        start_idx: Starting index for the batch
        stop_on_first_result: Whether to stop after first valid result
        sequential: Whether to process sequentially
        handle_exceptions: How to handle exceptions:
            - RAISE or False: Let exceptions propagate
            - RETURN_NONE or True: Convert exceptions to None in results
            - RETURN_EXCEPTION: Include exception objects in results
            Boolean values are deprecated and will be removed in a future version.
        output_map: Function to map results to final output format
    """
exception_handling = _convert_exception_handling(handle_exceptions)
⋮----
def handle_error(e: BaseException) -> Any
⋮----
"""Handle exceptions based on exception_handling."""
⋮----
results: List[Optional[ChatDocument] | BaseException] = []
pending: set[asyncio.Task[Any]] = set()
# Create task-to-index mapping
task_indices: dict[asyncio.Task[Any], int] = {}
⋮----
tasks = [
task_indices = {task: i for i, task in enumerate(tasks)}
results = [None] * len(tasks)
⋮----
# Process completed tasks
⋮----
index = task_indices[task]
⋮----
result = await task
⋮----
results = []
⋮----
result = await do_task(input, i + start_idx)
⋮----
# Parallel execution
⋮----
return_exceptions = exception_handling != ExceptionHandling.RAISE
⋮----
results_with_exceptions = cast(
⋮----
results = [
else:  # ExceptionHandling.RETURN_EXCEPTION
results = results_with_exceptions
⋮----
results = [handle_error(e) for _ in inputs]
⋮----
"""
    Common batch processing logic for both agent methods and tasks.

    Args:
        inputs: List of inputs to process
        do_task: Task execution function
        batch_size: Size of batches, if None process all at once
        stop_on_first_result: Whether to stop after first valid result
        sequential: Whether to process sequentially
        handle_exceptions: How to handle exceptions:
            - RAISE or False: Let exceptions propagate
            - RETURN_NONE or True: Convert exceptions to None in results
            - RETURN_EXCEPTION: Include exception objects in results
            Boolean values are deprecated and will be removed in a future version.
        output_map: Function to map results
        message_template: Template for status message
        message: Optional override for status message
    """
⋮----
"""Extra wrap to run asyncio.run one single time and not once per loop

        Args:
            inputs (List[str  |  ChatDocument]): inputs to process
            batch_size (int | None): batch size

        Returns:
            List[Any]: results
        """
results: List[Any] = []
⋮----
msg = message or message_template.format(total=len(inputs))
⋮----
results = await _process_batch_async(
⋮----
batches = batched(inputs, batch_size)
⋮----
start_idx = len(results)
complete_str = f", {start_idx} complete" if start_idx > 0 else ""
msg = (
⋮----
output_map: Callable[[ChatDocument | None], U] = lambda x: x,  # type: ignore
⋮----
"""
    Generate and run copies of a task async/concurrently one per item in `items` list.
    For each item, apply `input_map` to get the initial message to process.
    For each result, apply `output_map` to get the final result.
    Args:
        gen_task (Callable[[int], Task]): generates the tasks to run
        items (list[T]): list of items to process
        input_map (Callable[[T], str|ChatDocument]): function to map item to
            initial message to process
        output_map (Callable[[ChatDocument|str], U]): function to map result
            to final result. If stop_on_first_result is enabled, then
            map any invalid output to None. We continue until some non-None
            result is obtained.
        stop_on_first_result (bool): whether to stop after the first valid
            (not-None) result. In this case all other tasks are
            cancelled, and their corresponding result is None in the
            returned list.
        sequential (bool): whether to run sequentially
            (e.g. some APIs such as ooba don't support concurrent requests)
        batch_size (Optional[int]): The number of tasks to run at a time,
            if None, unbatched
        turns (int): number of turns to run, -1 for infinite
        message (Optional[str]): optionally overrides the console status messages
        handle_exceptions: How to handle exceptions:
            - RAISE or False: Let exceptions propagate
            - RETURN_NONE or True: Convert exceptions to None in results
            - RETURN_EXCEPTION: Include exception objects in results
            Boolean values are deprecated and will be removed in a future version.
        max_cost: float: maximum cost to run the task (default 0.0 for unlimited)
        max_tokens: int: maximum token usage (in and out) (default 0 for unlimited)


    Returns:
        list[Optional[U]]: list of final results. Always list[U] if
        `stop_on_first_result` is disabled
    """
inputs = [input_map(item) for item in items]
⋮----
task_i = gen_task(i)
⋮----
result = await task_i.run_async(
⋮----
# exception will be handled by the caller
⋮----
# ----------------------------------------
# Propagate any exception stored on the task that may have been
# swallowed inside `Task.run_async`, so that the upper-level
# exception-handling logic works as expected.
⋮----
exc = getattr(task_i, attr, None)
⋮----
# Fallback: treat a KILL-status result as an error
⋮----
"""
    Run copies of `task` async/concurrently one per item in `items` list.
    For each item, apply `input_map` to get the initial message to process.
    For each result, apply `output_map` to get the final result.
    Args:
        task (Task): task to run
        items (list[T]): list of items to process
        input_map (Callable[[T], str|ChatDocument]): function to map item to
            initial message to process
        output_map (Callable[[ChatDocument|str], U]): function to map result
            to final result
        sequential (bool): whether to run sequentially
            (e.g. some APIs such as ooba don't support concurrent requests)
        batch_size (Optional[int]): The number of tasks to run at a time,
            if None, unbatched
        turns (int): number of turns to run, -1 for infinite
        max_cost: float: maximum cost to run the task (default 0.0 for unlimited)
        max_tokens: int: maximum token usage (in and out) (default 0 for unlimited)

    Returns:
        list[Optional[U]]: list of final results. Always list[U] if
        `stop_on_first_result` is disabled
    """
message = f"[bold green]Running {len(items)} copies of {task.name}..."
⋮----
"""
    Run the `method` on copies of `agent`, async/concurrently one per
    item in `items` list.
    ASSUMPTION: The `method` is an async method and has signature:
        method(self, input: str|ChatDocument|None) -> ChatDocument|None
    So this would typically be used for the agent's "responder" methods,
    e.g. `llm_response_async` or `agent_responder_async`.

    For each item, apply `input_map` to get the initial message to process.
    For each result, apply `output_map` to get the final result.

    Args:
        agent (Agent): agent whose method to run
        method (str): Async method to run on copies of `agent`.
            The method is assumed to have signature:
            `method(self, input: str|ChatDocument|None) -> ChatDocument|None`
        input_map (Callable[[Any], str|ChatDocument]): function to map item to
            initial message to process
        output_map (Callable[[ChatDocument|str], Any]): function to map result
            to final result
        sequential (bool): whether to run sequentially
            (e.g. some APIs such as ooba don't support concurrent requests)
        stop_on_first_result (bool): whether to stop after the first valid
        handle_exceptions: How to handle exceptions:
            - RAISE or False: Let exceptions propagate
            - RETURN_NONE or True: Convert exceptions to None in results
            - RETURN_EXCEPTION: Include exception objects in results
            Boolean values are deprecated and will be removed in a future version.
        batch_size (Optional[int]): The number of items to process in each batch.
            If None, process all items at once.
    Returns:
        List[Any]: list of final results
    """
# Check if the method is async
method_name = method.__name__
⋮----
agent_cfg = copy.deepcopy(agent.config)
⋮----
agent_cls = type(agent)
agent_name = agent_cfg.name
⋮----
async def _do_task(input: str | ChatDocument, i: int) -> Any
⋮----
agent_i = agent_cls(agent_cfg)
method_i = getattr(agent_i, method_name, None)
⋮----
result = await method_i(input)
⋮----
async def _do_task(item: T) -> U
⋮----
async def _do_all(items: Iterable[T]) -> List[U]
⋮----
result = await _do_task(item)
⋮----
results: List[U] = []
⋮----
results = asyncio.run(_do_all(items))
⋮----
batches = batched(items, batch_size)
</file>

<file path="langroid/agent/done_sequence_parser.py">
"""Parser for done sequence DSL (Domain Specific Language).

Converts string patterns into DoneSequence objects for convenient task completion
configuration.

Examples:
    "T, A" -> Tool followed by Agent response
    "T[calculator], A" -> Specific tool 'calculator' followed by Agent response
    "L, T, A, L" -> LLM, Tool, Agent, LLM sequence
    "C[quit|exit]" -> Content matching regex pattern
"""
⋮----
"""Parse a string pattern or return existing DoneSequence unchanged.

    Args:
        sequence: Either a DoneSequence object or a string pattern to parse
        tools_map: Optional dict mapping tool names to tool classes
            (e.g., agent.llm_tools_map)

    Returns:
        DoneSequence object

    Raises:
        ValueError: If the string pattern is invalid
    """
⋮----
events = _parse_string_pattern(sequence, tools_map)
⋮----
"""Parse a string pattern into a list of AgentEvent objects.

    Pattern format:
        - Single letter codes: T, A, L, U, N, C
        - Specific tools: T[tool_name] or T[ToolClass]
        - Content match: C[regex_pattern]
        - Separated by commas, spaces allowed

    Args:
        pattern: String pattern to parse
        tools_map: Optional dict mapping tool names to tool classes
            (e.g., agent.llm_tools_map)

    Returns:
        List of AgentEvent objects

    Raises:
        ValueError: If pattern is invalid
    """
events = []
⋮----
# Split by comma and strip whitespace
parts = [p.strip() for p in pattern.split(",")]
⋮----
event = _parse_event_token(part, tools_map)
⋮----
"""Parse a single event token into an AgentEvent.

    Args:
        token: Single event token (e.g., "T", "T[calc]", "C[quit|exit]")
        tools_map: Optional dict mapping tool names to tool classes
            (e.g., agent.llm_tools_map)

    Returns:
        AgentEvent object

    Raises:
        ValueError: If token is invalid
    """
# Check for bracket notation
bracket_match = re.match(r"^([A-Z])\[([^\]]+)\]$", token)
⋮----
event_code = bracket_match.group(1)
param = bracket_match.group(2)
⋮----
# Specific tool: T[tool_name] or T[ToolClass]
tool_class = None
tool_name = param
⋮----
# First try direct lookup in tools_map by the param (tool name)
⋮----
tool_class = tools_map[param]
⋮----
# If not found, loop through tools_map to find a tool class
# whose __name__ matches param
⋮----
tool_class = cls
tool_name = name
⋮----
# Content match: C[regex_pattern]
⋮----
# Simple single-letter codes
event_map = {
⋮----
"C": EventType.CONTENT_MATCH,  # C without brackets matches any content
⋮----
# If not a single letter, could be a full event type name
token_upper = token.upper()
⋮----
"""Parse a list of mixed string patterns and DoneSequence objects.

    Args:
        sequences: List containing strings and/or DoneSequence objects
        tools_map: Optional dict mapping tool names to tool classes
            (e.g., agent.llm_tools_map)

    Returns:
        List of DoneSequence objects
    """
</file>

<file path="langroid/agent/tool_message.py">
"""
Structured messages to an agent, typically from an LLM, to be handled by
an agent. The messages could represent, for example:
- information or data given to the agent
- request for information or data from the agent
- request to run a method of the agent
"""
⋮----
K = TypeVar("K")
⋮----
def remove_if_exists(k: K, d: dict[K, Any]) -> None
⋮----
"""Removes key `k` from `d` if present."""
⋮----
def format_schema_for_strict(schema: Any) -> None
⋮----
"""
    Recursively set additionalProperties to False and replace
    oneOf and allOf with anyOf, required for OpenAI structured outputs.
    Additionally, remove all defaults and set all fields to required.
    This may not be equivalent to the original schema.
    """
⋮----
# Handle $ref nodes - they can't have any other properties
⋮----
# Keep only the $ref, remove all other properties like description
ref_value = schema["$ref"]
⋮----
properties = schema["properties"]
all_properties = list(properties.keys())
⋮----
anyOf = (
⋮----
class ToolMessage(ABC, BaseModel)
⋮----
"""
    Abstract Class for a class that defines the structure of a "Tool" message from an
    LLM. Depending on context, "tools" are also referred to as "plugins",
    or "function calls" (in the context of OpenAI LLMs).
    Essentially, they are a way for the LLM to express its intent to run a special
    function or method. Currently these "tools" are handled by methods of the
    agent.

    Attributes:
        request (str): name of agent method to map to.
        purpose (str): purpose of agent method, expressed in general terms.
            (This is used when auto-generating the tool instruction to the LLM)
    """
⋮----
request: str
purpose: str
id: str = ""  # placeholder for OpenAI-API tool_call_id
⋮----
# If enabled, forces strict adherence to schema.
# Currently only supported by OpenAI LLMs. When unset, enables if supported.
_strict: Optional[bool] = None
_allow_llm_use: bool = True  # allow an LLM to use (i.e. generate) this tool?
⋮----
# Optional param to limit number of result tokens to retain in msg history.
# Some tools can have large results that we may not want to fully retain,
# e.g. result of a db query, which the LLM later reduces to a summary, so
# in subsequent dialog we may only want to retain the summary,
# and replace this raw result truncated to _max_retained_tokens.
# Important to note: unlike _max_result_tokens, this param is used
# NOT used to immediately truncate the result;
# it is only used to truncate what is retained in msg history AFTER the
# response to this result.
_max_retained_tokens: int | None = None
⋮----
# Optional param to limit number of tokens in the result of the tool.
_max_result_tokens: int | None = None
⋮----
model_config = ConfigDict(
⋮----
# do not include these fields in the generated schema
# since we don't require the LLM to specify them
⋮----
# Define excluded fields as a class method to avoid Pydantic treating it as
# a model field
⋮----
@classmethod
    def _get_excluded_fields(cls) -> set[str]
⋮----
@classmethod
    def name(cls) -> str
⋮----
return str(cls.default_value("request"))  # redundant str() to appease mypy
⋮----
@classmethod
    def instructions(cls) -> str
⋮----
"""
        Instructions on tool usage.
        """
⋮----
@classmethod
    def langroid_tools_instructions(cls) -> str
⋮----
"""
        Instructions on tool usage when `use_tools == True`, i.e.
        when using langroid built-in tools
        (as opposed to OpenAI-like function calls/tools).
        """
⋮----
@classmethod
    def require_recipient(cls) -> Type["ToolMessage"]
⋮----
class ToolMessageWithRecipient(cls):  # type: ignore
⋮----
recipient: str  # no default, so it is required
⋮----
@classmethod
    def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]
⋮----
"""
        Examples to use in few-shot demos with formatting instructions.
        Each example can be either:
        - just a ToolMessage instance, e.g. MyTool(param1=1, param2="hello"), or
        - a tuple (description, ToolMessage instance), where the description is
            a natural language "thought" that leads to the tool usage,
            e.g. ("I want to find the square of 5",  SquareTool(num=5))
            In some scenarios, including such a description can significantly
            enhance reliability of tool use.
        Returns:
        """
⋮----
@classmethod
    def usage_examples(cls, random: bool = False) -> str
⋮----
"""
        Instruction to the LLM showing examples of how to use the tool-message.

        Args:
            random (bool): whether to pick a random example from the list of examples.
                Set to `true` when using this to illustrate a dialog between LLM and
                user.
                (if false, use ALL examples)
        Returns:
            str: examples of how to use the tool/function-call
        """
# pick a random example of the fields
⋮----
examples = [choice(cls.examples())]
⋮----
examples = cls.examples()
formatted_examples = [
⋮----
def to_json(self) -> str
⋮----
def format_example(self) -> str
⋮----
def dict_example(self) -> Dict[str, Any]
⋮----
def get_value_of_type(self, target_type: Type[Any]) -> Any
⋮----
"""Try to find a value of a desired type in the fields of the ToolMessage."""
ignore_fields = self._get_excluded_fields().union({"request"})
⋮----
value = getattr(self, field_name)
⋮----
@classmethod
    def default_value(cls, f: str) -> Any
⋮----
"""
        Returns the default value of the given field, for the message-class
        Args:
            f (str): field name

        Returns:
            Any: default value of the field, or None if not set or if the
                field does not exist.
        """
schema = cls.model_json_schema()
⋮----
@classmethod
    def format_instructions(cls, tool: bool = False) -> str
⋮----
"""
        Default Instructions to the LLM showing how to use the tool/function-call.
        Works for GPT4 but override this for weaker LLMs if needed.

        Args:
            tool: instructions for Langroid-native tool use? (e.g. for non-OpenAI LLM)
                (or else it would be for OpenAI Function calls).
                Ignored in the default implementation, but can be used in subclasses.
        Returns:
            str: instructions on how to use the message
        """
# TODO: when we attempt to use a "simpler schema"
# (i.e. all nested fields explicit without definitions),
# we seem to get worse results, so we turn it off for now
param_dict = (
⋮----
# cls.simple_schema() if tool else
⋮----
examples_str = ""
⋮----
examples_str = "EXAMPLES:\n" + cls.usage_examples()
⋮----
@staticmethod
    def group_format_instructions() -> str
⋮----
"""Template for instructions for a group of tools.
        Works with GPT4 but override this for weaker LLMs if needed.
        """
⋮----
"""
        Clean up the schema of the Pydantic class (which can recursively contain
        other Pydantic classes), to create a version compatible with OpenAI
        Function-call API.

        Adapted from this excellent library:
        https://github.com/jxnl/instructor/blob/main/instructor/function_calls.py

        Args:
            request: whether to include the "request" field in the schema.
                (we set this to True when using Langroid-native TOOLs as opposed to
                OpenAI Function calls)
            defaults: whether to include fields with default values in the schema,
                    in the "properties" section.

        Returns:
            LLMFunctionSpec: the schema as an LLMFunctionSpec

        """
schema = copy.deepcopy(cls.model_json_schema())
docstring = parse(cls.__doc__ or "")
parameters = {
⋮----
excludes = cls._get_excluded_fields().copy()
⋮----
excludes = excludes.union({"request"})
# exclude 'excludes' from parameters["properties"]:
⋮----
# If request is present it must match the default value
# Similar to defining request as a literal type
⋮----
# Handle nested ToolMessage fields
⋮----
@classmethod
    def simple_schema(cls) -> Dict[str, Any]
⋮----
"""
        Return a simplified schema for the message, with only the request and
        required fields.
        Returns:
            Dict[str, Any]: simplified schema
        """
schema = generate_simple_schema(
</file>

<file path="langroid/agent/xml_tool_message.py">
# For Union type handling - check if we have Python 3.10+ UnionType
HAS_UNION_TYPE = False
⋮----
from types import UnionType  # noqa: F401 # Used conditionally
⋮----
HAS_UNION_TYPE = True
⋮----
class XMLToolMessage(ToolMessage)
⋮----
"""
    Abstract class for tools formatted using XML instead of JSON.

    When a subclass defines a field with the attribute `verbatim=True`,
    instructions are sent to the LLM to ensure the field's content is:
        - preserved as is, including whitespace, indents, quotes, newlines, etc
            with no escaping, and
        - enclosed in a CDATA section in the XML output.
    This is useful for LLMs sending code as part of a tool;
    results can be far superior compared to sending code in JSON-formatted tools,
    where code needs to confirm to JSON's strict rules and escaping requirements.
    (see test_xml_tool_message.py for an example).

    """
⋮----
request: str
purpose: str
⋮----
_allow_llm_use: bool = True
⋮----
model_config = ConfigDict(
⋮----
# Inherit settings from ToolMessage
⋮----
# XMLToolMessage-specific settings as class methods to avoid Pydantic
# treating them as model fields
⋮----
@classmethod
    def _get_excluded_fields(cls) -> set[str]
⋮----
# Root element for XML formatting
⋮----
@classmethod
    def _get_root_element(cls) -> str
⋮----
@classmethod
    def extract_field_values(cls, formatted_string: str) -> Optional[Dict[str, Any]]
⋮----
"""
        Extracts field values from an XML-formatted string.

        Args:
            formatted_string (str): The XML-formatted string to parse.

        Returns:
            Optional[Dict[str, Any]]: A dictionary containing the extracted field
                values, where keys are the XML element names and values are their
                corresponding contents.
            Returns None if parsing fails or the root element is not a dictionary.

        Raises:
            etree.XMLSyntaxError: If the input string is not valid XML.
        """
# SECURITY: Initialize XMLParser with flags to prevent
# XML External Entity (XXE), billion laughs, and external DTD attacks by
# disabling entity resolution, DTD loading, and network access;
# `strip_cdata=False` is needed to preserve
# content within CDATA sections (e.g., for code).
parser = etree.XMLParser(
root = etree.fromstring(formatted_string.encode("utf-8"), parser=parser)
⋮----
def parse_element(element: etree._Element) -> Any
⋮----
# Skip elements starting with underscore
⋮----
field_info = cls.model_fields.get(element.tag)
is_verbatim = (
⋮----
# For code elements, preserve the content as is, including whitespace
content = element.text if element.text else ""
# Strip leading and trailing triple backticks if present,
# accounting for whitespace
⋮----
# For non-code leaf elements, strip whitespace
⋮----
# For branch elements, handle potential lists or nested structures
children = [parse_element(child) for child in element]
⋮----
# If all children have the same tag, treat as a list
⋮----
# Otherwise, treat as a dictionary
result = {child.tag: parse_element(child) for child in element}
# Check if this corresponds to a nested Pydantic model
⋮----
result = parse_element(root)
⋮----
# Filter out empty dictionaries from skipped underscore fields
⋮----
@classmethod
    def parse(cls, formatted_string: str) -> Optional["XMLToolMessage"]
⋮----
"""
        Parses the XML-formatted string and returns an instance of the class.

        Args:
            formatted_string (str): The XML-formatted string to parse.

        Returns:
            Optional["XMLToolMessage"]: An instance of the class if parsing succeeds,
                None otherwise.
        """
⋮----
parsed_data = cls.extract_field_values(formatted_string)
⋮----
# Use Pydantic's parse_obj to create and validate the instance
⋮----
verbatim_fields = []
⋮----
full_name = f"{prefix}.{field_name}" if prefix else field_name
⋮----
@classmethod
    def format_instructions(cls, tool: bool = False) -> str
⋮----
fields = [
⋮----
instructions = """
⋮----
preamble = "Placeholders:\n"
xml_format = f"Formatting example:\n\n<{cls._get_root_element()}>\n"
⋮----
current_path = f"{path}.{field_name}" if path else field_name
⋮----
origin = get_origin(field_type)
args = get_args(field_type)
⋮----
# Handle Union types (including Optional types like List[Person] | None)
# Support both typing.Union and types.UnionType (Python 3.10+ | syntax)
is_union = origin is Union
⋮----
is_union = is_union or origin is _UnionType
⋮----
# Filter out None type for Optional types
non_none_args = [arg for arg in args if arg is not type(None)]
⋮----
# This is an Optional type, process the non-None type
field_type = non_none_args[0]
⋮----
# If there are multiple non-None types, fall through to default handling
⋮----
item_type = args[0] if args else Any
⋮----
verbatim_fields = cls.find_verbatim_fields()
⋮----
field_info = cls.model_fields[field]
field_type = field_info.annotation
# Ensure we have a valid type
⋮----
verbatim_alert = ""
⋮----
verbatim_alert = f"""
⋮----
examples_str = ""
⋮----
examples_str = "EXAMPLES:\n" + cls.usage_examples()
⋮----
def format_example(self) -> str
⋮----
"""
        Format the current instance as an XML example.

        Returns:
            str: A string representation of the current instance in XML format.

        Raises:
            ValueError: If the result from etree.tostring is not a string.
        """
⋮----
elem = etree.SubElement(parent, name)
current_path = f"{path}.{name}" if path else name
⋮----
# Handle nested Pydantic models
⋮----
root = etree.Element(self._get_root_element())
exclude_fields: set[str] = self._get_excluded_fields()
⋮----
result = etree.tostring(root, encoding="unicode", pretty_print=True)
⋮----
@classmethod
    def find_candidates(cls, text: str) -> List[str]
⋮----
"""
        Finds XML-like tool message candidates in text, with relaxed opening tag rules.

        Args:
            text: Input text to search for XML structures.

        Returns:
            List of XML strings. For fragments missing the root opening tag but having
            valid XML structure and root closing tag, prepends the root opening tag.

        Example:
            With root_tag="tool", given:
            "Hello <field1>data</field1> </tool>"
            Returns: ["<tool><field1>data</field1></tool>"]
        """
⋮----
root_tag = cls._get_root_element()
opening_tag = f"<{root_tag}>"
closing_tag = f"</{root_tag}>"
⋮----
candidates = []
pos = 0
⋮----
# Look for either proper opening tag or closing tag
start_normal = text.find(opening_tag, pos)
end = text.find(closing_tag, pos)
⋮----
# Handle normal case (has opening tag)
end = text.find(closing_tag, start_normal)
⋮----
pos = max(end + len(closing_tag), start_normal + 1)
⋮----
# last fragment - ok to miss closing tag
⋮----
pos = start_normal + 1
⋮----
# Look backwards for first XML tag
text_before = text[pos:end]
first_tag_match = re.search(r"<\w+>", text_before)
⋮----
start = pos + first_tag_match.start()
⋮----
pos = end + len(closing_tag)
</file>

<file path="langroid/cachedb/__init__.py">
__all__ = [
</file>

<file path="langroid/cachedb/base.py">
class CacheDBConfig(BaseSettings)
⋮----
"""Configuration model for CacheDB."""
⋮----
class CacheDB(ABC)
⋮----
"""Abstract base class for a cache database."""
⋮----
@abstractmethod
    def store(self, key: str, value: Any) -> None
⋮----
"""
        Abstract method to store a value associated with a key.

        Args:
            key (str): The key under which to store the value.
            value (Any): The value to store.
        """
⋮----
@abstractmethod
    def retrieve(self, key: str) -> Dict[str, Any] | str | None
⋮----
"""
        Abstract method to retrieve the value associated with a key.

        Args:
            key (str): The key to retrieve the value for.

        Returns:
            dict: The value associated with the key.
        """
⋮----
@abstractmethod
    def delete_keys(self, keys: List[str]) -> None
⋮----
"""
        Delete the keys from the cache.

        Args:
            keys (List[str]): The keys to delete.
        """
⋮----
@abstractmethod
    def delete_keys_pattern(self, pattern: str) -> None
⋮----
"""
        Delete all keys with the given pattern

        Args:
            prefix (str): The pattern to match.
        """
</file>

<file path="langroid/embedding_models/protoc/embeddings_pb2_grpc.py">
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
"""Client and server classes corresponding to protobuf-defined services."""
⋮----
class EmbeddingStub(object)
⋮----
"""Missing associated documentation comment in .proto file."""
⋮----
def __init__(self, channel)
⋮----
"""Constructor.

        Args:
            channel: A grpc.Channel.
        """
⋮----
class EmbeddingServicer(object)
⋮----
def Embed(self, request, context)
⋮----
"""Missing associated documentation comment in .proto file."""
⋮----
def add_EmbeddingServicer_to_server(servicer, server)
⋮----
rpc_method_handlers = {
generic_handler = grpc.method_handlers_generic_handler(
⋮----
# This class is part of an EXPERIMENTAL API.
class Embedding(object)
</file>

<file path="langroid/embedding_models/protoc/embeddings_pb2.py">
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler.  DO NOT EDIT!
# source: embeddings.proto
# Protobuf Python Version: 4.25.1
"""Generated protocol buffer code."""
⋮----
# @@protoc_insertion_point(imports)
⋮----
_sym_db = _symbol_database.Default()
⋮----
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
⋮----
_globals = globals()
⋮----
# @@protoc_insertion_point(module_scope)
</file>

<file path="langroid/embedding_models/protoc/embeddings_pb2.pyi">
from typing import (
    ClassVar as _ClassVar,
)
from typing import (
    Iterable as _Iterable,
)
from typing import (
    Mapping as _Mapping,
)
from typing import (
    Optional as _Optional,
)
from typing import (
    Union as _Union,
)

from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf.internal import containers as _containers

DESCRIPTOR: _descriptor.FileDescriptor

class EmbeddingRequest(_message.Message):
    __slots__ = ("model_name", "batch_size", "strings")
    MODEL_NAME_FIELD_NUMBER: _ClassVar[int]
    BATCH_SIZE_FIELD_NUMBER: _ClassVar[int]
    STRINGS_FIELD_NUMBER: _ClassVar[int]
    model_name: str
    batch_size: int
    strings: _containers.RepeatedScalarFieldContainer[str]
    def __init__(
        self,
        model_name: _Optional[str] = ...,
        batch_size: _Optional[int] = ...,
        strings: _Optional[_Iterable[str]] = ...,
    ) -> None: ...

class BatchEmbeds(_message.Message):
    __slots__ = ("embeds",)
    EMBEDS_FIELD_NUMBER: _ClassVar[int]
    embeds: _containers.RepeatedCompositeFieldContainer[Embed]
    def __init__(
        self, embeds: _Optional[_Iterable[_Union[Embed, _Mapping]]] = ...
    ) -> None: ...

class Embed(_message.Message):
    __slots__ = ("embed",)
    EMBED_FIELD_NUMBER: _ClassVar[int]
    embed: _containers.RepeatedScalarFieldContainer[float]
    def __init__(self, embed: _Optional[_Iterable[float]] = ...) -> None: ...
</file>

<file path="langroid/embedding_models/protoc/embeddings.proto">
syntax = "proto3";

service Embedding {
    rpc Embed (EmbeddingRequest) returns (BatchEmbeds) {};
}

message EmbeddingRequest {
    string model_name = 1;
    int32 batch_size = 2;
    repeated string strings = 3;
}

message BatchEmbeds {
    repeated Embed embeds = 1;
}

message Embed {
    repeated float embed = 1;
}
</file>

<file path="langroid/embedding_models/__init__.py">
__all__ = [
</file>

<file path="langroid/embedding_models/remote_embeds.py">
"""
If run as a script, starts an RPC server which handles remote
embedding requests:

For example:
python3 -m langroid.embedding_models.remote_embeds --port `port`

where `port` is the port at which the service is exposed.  Currently,
supports insecure connections only, and this should NOT be exposed to
the internet.
"""
⋮----
class RemoteEmbeddingRPCs(embeddings_grpc.EmbeddingServicer)
⋮----
embeds = self.embedding_fn(list(request.strings))
⋮----
embeds_pb = [embeddings_pb.Embed(embed=e) for e in embeds]
⋮----
class RemoteEmbeddingsConfig(em.SentenceTransformerEmbeddingsConfig)
⋮----
api_base: str = "localhost"
port: int = 50052
# The below are used only when waiting for server creation
poll_delay: float = 0.01
max_retries: int = 1000
⋮----
class RemoteEmbeddings(em.SentenceTransformerEmbeddings)
⋮----
def __init__(self, config: RemoteEmbeddingsConfig = RemoteEmbeddingsConfig())
⋮----
def embedding_fn(self) -> Callable[[list[str]], Embeddings]
⋮----
def fn(texts: list[str]) -> Embeddings
⋮----
url = f"{self.config.api_base}:{self.config.port}"
⋮----
stub = embeddings_grpc.EmbeddingStub(channel)  # type: ignore
response = stub.Embed(
⋮----
def with_handling(texts: list[str]) -> Embeddings
⋮----
# In local mode, start the server if it has not already
# been started
⋮----
# Occurs when the server hasn't been started
⋮----
# Start the server
proc = subprocess.Popen(
⋮----
# The remote is not local or we have exhausted retries
# We should now raise an error if the server is not accessible
⋮----
"""Starts the RPC server."""
server = grpc.aio.server()
⋮----
)  # type: ignore
url = f"{bind_address_base}:{port}"
</file>

<file path="langroid/language_models/prompt_formatter/__init__.py">
__all__ = [
</file>

<file path="langroid/language_models/prompt_formatter/base.py">
logger = logging.getLogger(__name__)
⋮----
class PromptFormatter(ABC)
⋮----
"""
    Abstract base class for a prompt formatter
    """
⋮----
def __init__(self, config: PromptFormatterConfig)
⋮----
@staticmethod
    def create(formatter: str) -> "PromptFormatter"
⋮----
@abstractmethod
    def format(self, messages: List[LLMMessage]) -> str
⋮----
"""
        Convert sequence of messages (system, user, assistant, user, assistant...user)
            to a single prompt formatted according to the specific format type,
            to be used in a /completions endpoint.

        Args:
            messages (List[LLMMessage]): chat history as a sequence of messages

        Returns:
            (str): formatted version of chat history

        """
</file>

<file path="langroid/language_models/prompt_formatter/hf_formatter.py">
"""
Prompt formatter based on HuggingFace `AutoTokenizer.apply_chat_template` method
from their Transformers library. It searches the hub for a model matching the
specified name, and uses the first one it finds. We assume that all matching
models will have the same tokenizer, so we just use the first one.
"""
⋮----
logger = logging.getLogger(__name__)
⋮----
def try_import_hf_modules() -> Tuple[Type[Any], Type[Any]]
⋮----
"""
    Attempts to import the AutoTokenizer class from the transformers package.
    Returns:
        The AutoTokenizer class if successful.
    Raises:
        ImportError: If the transformers package is not installed.
    """
⋮----
def find_hf_formatter(model_name: str) -> str
⋮----
hf_api = HfApi()
# try to find a matching model, with progressivly shorter prefixes of model_name
model_name = model_name.lower().split("/")[-1]
parts = re.split("[:\\-_]", model_name)
parts = [p.lower() for p in parts if p != ""]
⋮----
prefix = "-".join(parts[:i])
models = hf_api.list_models(
⋮----
mdl = next(models)
tokenizer = AutoTokenizer.from_pretrained(mdl.id)
⋮----
class HFFormatter(PromptFormatter)
⋮----
models: Set[str] = set()  # which models have been used for formatting
⋮----
def __init__(self, config: HFPromptFormatterConfig)
⋮----
# only warn if this is the first time we've used this mdl.id
⋮----
def format(self, messages: List[LLMMessage]) -> str
⋮----
# build msg dicts expected by AutoTokenizer.apply_chat_template
sys_msg_dict = dict(role=Role.SYSTEM.value, content=sys_msg)
chat_dicts = []
⋮----
all_dicts = [sys_msg_dict] + chat_dicts
⋮----
# apply chat template
result = self.tokenizer.apply_chat_template(all_dicts, tokenize=False)
⋮----
# this likely means the model doesn't support a system msg,
# so combine it with the first user msg
first_user_msg = chat_msgs[0][0] if len(chat_msgs) > 0 else user_msg
first_user_msg = sys_msg + "\n\n" + first_user_msg
⋮----
result = self.tokenizer.apply_chat_template(chat_dicts, tokenize=False)
</file>

<file path="langroid/language_models/prompt_formatter/llama2_formatter.py">
logger = logging.getLogger(__name__)
⋮----
BOS: str = "<s>"
EOS: str = "</s>"
B_INST: str = "[INST]"
E_INST: str = "[/INST]"
B_SYS: str = "<<SYS>>\n"
E_SYS: str = "\n<</SYS>>\n\n"
SPECIAL_TAGS: List[str] = [B_INST, E_INST, BOS, EOS, "<<SYS>>", "<</SYS>>"]
⋮----
class Llama2Formatter(PromptFormatter)
⋮----
def __int__(self, config: Llama2FormatterConfig) -> None
⋮----
def format(self, messages: List[LLMMessage]) -> str
⋮----
"""
        For llama2 models, convert chat history into a single
        prompt for Llama2 models, for use in the /completions endpoint
        (as opposed to the /chat/completions endpoint).
        See:
        https://www.reddit.com/r/LocalLLaMA/comments/155po2p/get_llama_2_prompt_format_right/
        https://github.com/facebookresearch/llama/blob/main/llama/generation.py#L44

        Args:
            system_prompt (str): system prompt, typically specifying role/task.
            chat_history (List[Tuple[str,str]]): List of (user, assistant) pairs
            user_message (str): user message, at the end of the chat, i.e. the message
                for which we want to generate a response.

        Returns:
            str: Prompt for Llama2 models

        Typical structure of the formatted prompt:
        Note important that the first [INST], [/INST] surrounds the system prompt,
        together with the first user message. A lot of libs seem to miss this detail.

        <s>[INST] <<SYS>>
        You are are a helpful... bla bla.. assistant
        <</SYS>>

        Hi there! [/INST] Hello! How can I help you today? </s><s>[INST]
        What is a neutron star? [/INST] A neutron star is a ... </s><s>
        [INST] Okay cool, thank you! [/INST] You're welcome! </s><s>
        [INST] Ah, I have one more question.. [/INST]
        """
bos = BOS if self.config.use_bos_eos else ""
eos = EOS if self.config.use_bos_eos else ""
text = f"{bos}{B_INST} {B_SYS}{system_prompt}{E_SYS}"
</file>

<file path="langroid/language_models/__init__.py">
__all__ = [
</file>

<file path="langroid/language_models/azure_openai.py">
azureStructuredOutputList = [
⋮----
azureStructuredOutputAPIMin = "2024-08-01-preview"
⋮----
logger = logging.getLogger(__name__)
⋮----
class AzureConfig(OpenAIGPTConfig)
⋮----
"""
    Configuration for Azure OpenAI GPT.

    Attributes:
        type (str): should be ``azure.``
        api_version (str): can be set in the ``.env`` file as
            ``AZURE_OPENAI_API_VERSION.``
        deployment_name (str|None): can be optionally set in the ``.env`` file as
            ``AZURE_OPENAI_DEPLOYMENT_NAME`` and should be based the custom name you
            chose for your deployment when you deployed a model.
        model_name (str): [DEPRECATED] can be set in the ``.env``
            file as ``AZURE_OPENAI_MODEL_NAME``
            and should be based on the model name chosen during setup.
        chat_model (str): the chat model name to use. Can be set via
            the env variable ``AZURE_OPENAI_CHAT_MODEL``.
            Recommended to use this instead of ``model_name``.

    """
⋮----
api_key: str = ""  # CAUTION: set this ONLY via env var AZURE_OPENAI_API_KEY
type: str = "azure"
api_version: str = "2023-05-15"
deployment_name: str | None = None
model_name: str = ""
api_base: str = ""
⋮----
# Alternatively, bring your own clients:
azure_openai_client_provider: Callable[[], AzureOpenAI] | None = None
azure_openai_async_client_provider: Callable[[], AsyncAzureOpenAI] | None = None
⋮----
# all of the vars above can be set via env vars,
# by upper-casing the name and prefixing with `env_prefix`, e.g.
# AZURE_OPENAI_API_VERSION=2023-05-15
# This is either done in the .env file, or via an explicit
# `export AZURE_OPENAI_API_VERSION=...`
model_config = SettingsConfigDict(env_prefix="AZURE_OPENAI_")
⋮----
def __init__(self, **kwargs) -> None:  # type: ignore
⋮----
class AzureGPT(OpenAIGPT)
⋮----
"""
    Class to access OpenAI LLMs via Azure. These env variables can be obtained from the
    file `.azure_env`. Azure OpenAI doesn't support ``completion``
    """
⋮----
def __init__(self, config: AzureConfig)
⋮----
# This will auto-populate config values from .env file
</file>

<file path="langroid/language_models/config.py">
class PromptFormatterConfig(BaseSettings)
⋮----
type: str = "llama2"
⋮----
model_config = SettingsConfigDict(env_prefix="FORMAT_", case_sensitive=False)
⋮----
class Llama2FormatterConfig(PromptFormatterConfig)
⋮----
use_bos_eos: bool = False
⋮----
class HFPromptFormatterConfig(PromptFormatterConfig)
⋮----
type: str = "hf"
model_name: str
</file>

<file path="langroid/language_models/mock_lm.py">
"""Mock Language Model for testing"""
⋮----
def none_fn(x: str) -> None | str
⋮----
class MockLMConfig(LLMConfig)
⋮----
"""
    Mock Language Model Configuration.

    Attributes:
        response_dict (Dict[str, str]): A "response rule-book", in the form of a
            dictionary; if last msg in dialog is x,then respond with response_dict[x]
    """
⋮----
chat_context_length: int = 1_000_000_000  # infinite
response_dict: Dict[str, str] = {}
response_fn: Callable[[str], None | str] = none_fn
response_fn_async: Optional[Callable[[str], Awaitable[Optional[str]]]] = None
default_response: str = "Mock response"
⋮----
type: str = "mock"
⋮----
class MockLM(LanguageModel)
⋮----
def __init__(self, config: MockLMConfig = MockLMConfig())
⋮----
def _response(self, msg: str) -> LLMResponse
⋮----
# response is based on this fallback order:
# - response_dict
# - response_fn
# - default_response
mapped_response = self.config.response_dict.get(
⋮----
async def _response_async(self, msg: str) -> LLMResponse
⋮----
# - response_fn_async
⋮----
response = await self.config.response_fn_async(msg)
⋮----
response = self.config.response_fn(msg)
⋮----
"""
        Mock chat function for testing
        """
last_msg = messages[-1].content if isinstance(messages, list) else messages
⋮----
def generate(self, prompt: str, max_tokens: int = 200) -> lm.LLMResponse
⋮----
"""
        Mock generate function for testing
        """
⋮----
async def agenerate(self, prompt: str, max_tokens: int = 200) -> LLMResponse
⋮----
def get_stream(self) -> bool
⋮----
def set_stream(self, stream: bool) -> bool
</file>

<file path="langroid/language_models/provider_params.py">
"""
Provider-specific parameter configurations for various LLM providers.
"""
⋮----
# Constants
LANGDB_BASE_URL = "https://api.us-east-1.langdb.ai"
PORTKEY_BASE_URL = "https://api.portkey.ai"
DUMMY_API_KEY = "xxx"
⋮----
class LangDBParams(BaseSettings)
⋮----
"""
    Parameters specific to LangDB integration.
    """
⋮----
api_key: str = DUMMY_API_KEY
project_id: str = ""
label: Optional[str] = None
run_id: Optional[str] = None
thread_id: Optional[str] = None
base_url: str = LANGDB_BASE_URL
⋮----
model_config = SettingsConfigDict(env_prefix="LANGDB_")
⋮----
class PortkeyParams(BaseSettings)
⋮----
"""
    Parameters specific to Portkey integration.

    Portkey is an AI gateway that provides a unified API for multiple LLM providers,
    with features like automatic retries, fallbacks, load balancing, and observability.

    Example usage:
        # Use Portkey with Anthropic
        config = OpenAIGPTConfig(
            chat_model="portkey/anthropic/claude-3-sonnet-20240229",
            portkey_params=PortkeyParams(
                api_key="your-portkey-api-key",
                provider="anthropic"
            )
        )
    """
⋮----
api_key: str = DUMMY_API_KEY  # Portkey API key
provider: str = ""  # Required: e.g., "openai", "anthropic", "cohere", etc.
virtual_key: Optional[str] = None  # Optional: virtual key for the provider
trace_id: Optional[str] = None  # Optional: trace ID for request tracking
metadata: Optional[Dict[str, Any]] = None  # Optional: metadata for logging
retry: Optional[Dict[str, Any]] = None  # Optional: retry configuration
cache: Optional[Dict[str, Any]] = None  # Optional: cache configuration
cache_force_refresh: Optional[bool] = None  # Optional: force cache refresh
user: Optional[str] = None  # Optional: user identifier
organization: Optional[str] = None  # Optional: organization identifier
custom_headers: Optional[Dict[str, str]] = None  # Optional: additional headers
base_url: str = PORTKEY_BASE_URL
⋮----
model_config = SettingsConfigDict(env_prefix="PORTKEY_")
⋮----
def get_headers(self) -> Dict[str, str]
⋮----
"""Generate Portkey-specific headers from parameters."""
⋮----
headers = {}
⋮----
portkey_key = os.getenv("PORTKEY_API_KEY", "")
⋮----
def parse_model_string(self, model_string: str) -> tuple[str, str]
⋮----
"""
        Parse a model string like "portkey/anthropic/claude-3-sonnet"
        and extract provider and model name.

        Returns:
            tuple: (provider, model_name)
        """
parts = model_string.split("/", 2)
⋮----
model = model_string.replace("portkey/", "")
⋮----
"""
        Get the API key for the provider from environment variables.

        Args:
            provider: The provider name (e.g., "anthropic", "openai")
            default_key: Default key to return if not found

        Returns:
            The API key for the provider
        """
⋮----
env_patterns = [
⋮----
key = os.getenv(pattern, "")
</file>

<file path="langroid/language_models/utils.py">
# from openai-cookbook
⋮----
logger = logging.getLogger(__name__)
# setlevel to warning
⋮----
# define a retry decorator
⋮----
errors: tuple = (  # type: ignore
⋮----
"""Retry a function with exponential backoff."""
⋮----
def wrapper(*args: List[Any], **kwargs: Dict[Any, Any]) -> Any
⋮----
# Initialize variables
num_retries = 0
delay = initial_delay
⋮----
# Loop until a successful response or max_retries is hit or exception is raised
⋮----
# do not retry when the request itself is invalid,
# e.g. when context is too long
⋮----
# do not retry when there's an auth error
⋮----
# Retry on specified errors
⋮----
# For certain types of errors that slip through here
# (e.g. when using proxies like LiteLLM, do not retry)
⋮----
# Increment retries
⋮----
# Check if max retries has been reached
⋮----
# Increment the delay
⋮----
# Sleep for the delay
⋮----
# Raise exceptions for any errors not specified
⋮----
async def wrapper(*args: List[Any], **kwargs: Dict[Any, Any]) -> Any
⋮----
result = await func(*args, **kwargs)
</file>

<file path="langroid/parsing/__init__.py">
__all__ = [
</file>

<file path="langroid/parsing/code_parser.py">
"""
    Chunk code into smaller pieces, so that we don't exceed the maximum
    number of tokens allowed by the embedding model.
    Args:
        code: string of code
        language: str as a file extension, e.g. "py", "yml"
        max_tokens: max tokens per chunk
        len_fn: function to get the length of a string in token units
    Returns:

    """
lexer = get_lexer_by_name(language)
tokens = list(lex(code, lexer))
⋮----
chunks = []
current_chunk = ""
⋮----
token_tokens = len_fn(token_value)
⋮----
current_chunk = token_value
⋮----
class CodeParsingConfig(BaseSettings)
⋮----
extensions: List[str] = [
⋮----
"cfg",  # e.g. setup.cfg
⋮----
chunk_size: int = 500  # tokens
token_encoding_model: str = "text-embedding-3-small"
n_similar_docs: int = 4
⋮----
class CodeParser
⋮----
def __init__(self, config: CodeParsingConfig)
⋮----
def num_tokens(self, text: str) -> int
⋮----
"""
        How many tokens are in the text, according to the tokenizer.
        This needs to be accurate, otherwise we may exceed the maximum
        number of tokens allowed by the model.
        Args:
            text: string to tokenize
        Returns:
            number of tokens in the text
        """
tokens = self.tokenizer.encode(text)
⋮----
def split(self, docs: List[Document]) -> List[Document]
⋮----
"""
        Split the documents into chunks, according to the config.splitter.
        Only the documents with a language in the config.extensions are split.
        !!! note
            We assume the metadata in each document has at least a `language` field,
            which is used to determine how to chunk the code.
        Args:
            docs: list of documents to split
        Returns:
            list of documents, where each document is a chunk; the metadata of the
            original document is duplicated for each chunk, so that when we retrieve a
            chunk, we immediately know info about the original document.
        """
chunked_docs = [
⋮----
d.metadata.language,  # type: ignore
⋮----
if d.metadata.language in self.config.extensions  # type: ignore
⋮----
# collapse the list of lists into a single list
</file>

<file path="langroid/parsing/document_parser.py">
import docling  # noqa
⋮----
import pymupdf4llm  # noqa
⋮----
logger = logging.getLogger(__name__)
⋮----
class DocumentType(str, Enum)
⋮----
# TODO add `md` (Markdown) and `html`
PDF = "pdf"
DOCX = "docx"
DOC = "doc"
TXT = "txt"
XLSX = "xlsx"
XLS = "xls"
PPTX = "pptx"
⋮----
def find_last_full_char(possible_unicode: bytes) -> int
⋮----
"""
    Find the index of the last full character in a byte string.
    Args:
        possible_unicode (bytes): The bytes to check.
    Returns:
        int: The index of the last full unicode character.
    """
⋮----
def is_plain_text(path_or_bytes: str | bytes) -> bool
⋮----
"""
    Check if a file is plain text by attempting to decode it as UTF-8.
    Args:
        path_or_bytes (str|bytes): The file path or bytes object.
    Returns:
        bool: True if the file is plain text, False otherwise.
    """
⋮----
response = requests.get(path_or_bytes)
⋮----
content = response.content[:1024]
⋮----
content = f.read(1024)
⋮----
content = path_or_bytes[:1024]
⋮----
# Use magic to detect the MIME type
⋮----
mime_type = magic.from_buffer(content, mime=True)
⋮----
# Check if the MIME type is not a text type
⋮----
# Attempt to decode the content as UTF-8
content = content[: find_last_full_char(content)]
⋮----
_ = content.decode("utf-8")
# Additional checks can go here, e.g., to verify that the content
# doesn't contain too many unusual characters for it to be considered text
⋮----
# If decoding fails, it's likely not plain text (or not encoded in UTF-8)
⋮----
class DocumentParser(Parser)
⋮----
"""
    Abstract base class for extracting text from special types of docs
    such as PDFs or Docx.

    Attributes:
        source (str): The source, either a URL or a file path.
        doc_bytes (BytesIO): BytesIO object containing the doc data.
    """
⋮----
"""
        Create a DocumentParser instance based on source type
            and config.<source_type>.library specified.

        Args:
            source (str|bytes): The source, could be a URL, file path,
                or bytes object.
            config (ParserConfig): The parser configuration.
            doc_type (str|None): The type of document, if known

        Returns:
            DocumentParser: An instance of a DocumentParser subclass.
        """
inferred_doc_type = DocumentParser._document_type(source, doc_type)
⋮----
source_name = source if isinstance(source, str) else "bytes"
⋮----
def __init__(self, source: str | bytes, config: ParsingConfig)
⋮----
"""
        Args:
            source (str|bytes): The source, which could be
            a path, a URL or a bytes object.
        """
⋮----
"""
        Determine the type of document based on the source.

        Args:
            source (str|bytes): The source, which could be a URL,
                a file path, or a bytes object.
            doc_type (str|DocumentType|None): The type of document, if known.

        Returns:
            str: The document type.
        """
⋮----
# detect file type from path extension
⋮----
# must be bytes: attempt to detect type from content
# using magic mime type detection
⋮----
mime_type = magic.from_buffer(source, mime=True)
⋮----
def _load_doc_as_bytesio(self) -> BytesIO
⋮----
"""
        Load the docs into a BytesIO object.

        Returns:
            BytesIO: A BytesIO object containing the doc data.
        """
⋮----
response = requests.get(self.source)
⋮----
"""
        Get document chunks from a file path or bytes object.
        Args:
            source (str|bytes): The source, which could be a URL, path or bytes object.
            parser (Parser): The parser instance (for splitting the document).
            doc_type (str|DocumentType|None): The type of document, if known.
            lines (int|None): The number of lines to read from a plain text file.
        Returns:
            List[Document]: A list of `Document` objects,
                each containing a chunk of text, determined by the
                chunking and splitting settings in the parser config.
        """
dtype: DocumentType = DocumentParser._document_type(source, doc_type)
⋮----
doc_parser = DocumentParser.create(
chunks = doc_parser.get_doc_chunks()
⋮----
doc_parser = ImagePdfParser(source, parser.config)
⋮----
# try getting as plain text; these will be chunked downstream
# -- could be a bytes object or a path
⋮----
content = source.decode()
⋮----
file_lines = content.splitlines()[:lines]
content = "\n".join(line.strip() for line in file_lines)
⋮----
file_lines = list(itertools.islice(f, lines))
⋮----
content = f.read()
soup = BeautifulSoup(content, "html.parser")
text = soup.get_text()
⋮----
doc = Document(
⋮----
def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]
⋮----
"""Yield each page in the PDF."""
⋮----
def get_document_from_page(self, page: Any) -> Document
⋮----
"""
        Get Langroid Document object (with possible metadata)
        corresponding to a given page.
        """
⋮----
def fix_text(self, text: str) -> str
⋮----
"""
        Fix text extracted from a PDF.

        Args:
            text (str): The extracted text.

        Returns:
            str: The fixed text.
        """
# Some pdf parsers introduce extra space before hyphen,
# so use regular expression to replace 'space-hyphen' with just 'hyphen'
⋮----
def get_doc(self) -> Document
⋮----
"""
        Get entire text from source as a single document.

        Returns:
            a `Document` object containing the content of the pdf file,
                and metadata containing source name (URL or path)
        """
⋮----
text = "".join(
⋮----
def get_doc_chunks(self) -> List[Document]
⋮----
"""
        Get document chunks from a pdf source,
        with page references in the document metadata.

        Returns:
            List[Document]: a list of `Document` objects,
                each containing a chunk of text
        """
⋮----
split = []  # tokens in curr split
pages: List[str] = []
docs: List[Document] = []
# metadata.id to be shared by ALL chunks of this document
common_id = ObjectRegistry.new_id()
n_chunks = 0  # how many chunk so far
⋮----
# not used but could be useful, esp to blend the
# metadata from the pages into the chunks
page_doc = self.get_document_from_page(page)
page_text = page_doc.content
⋮----
# split could be so long it needs to be split
# into multiple chunks. Or it could be so short
# that it needs to be combined with the next chunk.
⋮----
# pretty formatting of pages (e.g. 1-3, 4, 5-7)
p_0 = int(pages[0]) - self.config.page_number_offset
p_n = int(pages[-1]) - self.config.page_number_offset
page_str = f"pages {p_0}-{p_n}" if p_0 != p_n else f"page {p_0}"
text = self.tokenizer.decode(split[: self.config.chunk_size])
⋮----
split = split[self.config.chunk_size - self.config.overlap :]
pages = [str(i + 1)]
# there may be a last split remaining:
# if it's shorter than the overlap, we shouldn't make a chunk for it
# since it's already included in the prior chunk;
# the only exception is if there have been no chunks so far.
⋮----
class FitzPDFParser(DocumentParser)
⋮----
"""
    Parser for processing PDFs using the `fitz` library.
    """
⋮----
def iterate_pages(self) -> Generator[Tuple[int, "fitz.Page"], None, None]
⋮----
"""
        Yield each page in the PDF using `fitz`.

        Returns:
            Generator[fitz.Page]: Generator yielding each page.
        """
⋮----
doc = fitz.open(stream=self.doc_bytes, filetype="pdf")
⋮----
def get_document_from_page(self, page: "fitz.Page") -> Document
⋮----
"""
        Get Document object from a given `fitz` page.

        Args:
            page (fitz.Page): The `fitz` page object.

        Returns:
            Document: Document object, with content and possible metadata.
        """
⋮----
class PyMuPDF4LLMParser(DocumentParser)
⋮----
"""
    Parser for processing PDFs using the `pymupdf4llm` library.
    """
⋮----
import pymupdf4llm  # noqa
⋮----
doc: fitz.Document = fitz.open(stream=self.doc_bytes, filetype="pdf")
pages: List[Dict[str, Any]] = pymupdf4llm.to_markdown(doc, page_chunks=True)
⋮----
def get_document_from_page(self, page: Dict[str, Any]) -> Document
⋮----
"""
        Get Document object corresponding to a given "page-chunk"
        dictionary, see:
         https://pymupdf.readthedocs.io/en/latest/pymupdf4llm/api.html


        Args:
            page (Dict[str,Any]): The "page-chunk" dictionary.

        Returns:
            Document: Document object, with content and possible metadata.
        """
⋮----
# TODO could possible use other metadata from page, see above link.
⋮----
class DoclingParser(DocumentParser)
⋮----
"""
    Parser for processing PDFs using the `docling` library.
    """
⋮----
"""
        Yield each page in the PDF using `docling`.
        Code largely from this example:
        https://github.com/DS4SD/docling/blob/4d41db3f7abb86c8c65386bf94e7eb0bf22bb82b/docs/examples/export_figures.py

        Returns:
            Generator[docling.Page]: Generator yielding each page.
        """
⋮----
import docling  # noqa
⋮----
from docling.datamodel.base_models import InputFormat  # type: ignore
⋮----
from docling.document_converter import (  # type: ignore
⋮----
from docling_core.types.doc import ImageRefMode  # type: ignore
⋮----
IMAGE_RESOLUTION_SCALE = 2.0
pipeline_options = PdfPipelineOptions()
⋮----
converter = DocumentConverter(
doc_path = self.source
⋮----
# write to tmp file, then use that path
⋮----
doc_path = temp_file.name
⋮----
output_dir = Path(str(Path(doc_path).with_suffix("")) + "-pages")
⋮----
result: ConversionResult = converter.convert(doc_path)
⋮----
def n_page_elements(page) -> int:  # type: ignore
⋮----
page_element_count = [n_page_elements(i) for i in result.pages]
element_page_cutoff = list(accumulate([1] + page_element_count))
⋮----
page_start = element_page_cutoff[i]
page_end = element_page_cutoff[i + 1]
md_file = output_dir / f"page_{i}.md"
# we could have just directly exported to a markdown string,
# but we need to save to a file to force generation of image-files.
⋮----
def get_document_from_page(self, md_file: str) -> Document
⋮----
"""
        Get Document object from a given 1-page markdown file,
        possibly containing image refs.

        Args:
            md_file (str): The markdown file path for the page.

        Returns:
            Document: Document object, with content and possible metadata.
        """
⋮----
text = f.read()
⋮----
class PyPDFParser(DocumentParser)
⋮----
"""
    Parser for processing PDFs using the `pypdf` library.
    """
⋮----
def iterate_pages(self) -> Generator[Tuple[int, pypdf.PageObject], None, None]
⋮----
"""
        Yield each page in the PDF using `pypdf`.

        Returns:
            Generator[pypdf.pdf.PageObject]: Generator yielding each page.
        """
⋮----
reader = pypdf.PdfReader(self.doc_bytes)
⋮----
def get_document_from_page(self, page: pypdf.PageObject) -> Document
⋮----
"""
        Get Document object from a given `pypdf` page.

        Args:
            page (pypdf.pdf.PageObject): The `pypdf` page object.

        Returns:
            Document: Document object, with content and possible metadata.
        """
⋮----
class ImagePdfParser(DocumentParser)
⋮----
"""
    Parser for processing PDFs that are images, i.e. not "true" PDFs.
    """
⋮----
) -> Generator[Tuple[int, "Image"], None, None]:  # type: ignore
⋮----
images = convert_from_bytes(self.doc_bytes.getvalue())
⋮----
def get_document_from_page(self, page: "Image") -> Document:  # type: ignore
⋮----
"""
        Get Document object corresponding to a given `pdf2image` page.

        Args:
            page (Image): The PIL Image object.

        Returns:
            Document: Document object, with content and possible metadata.
        """
⋮----
text = pytesseract.image_to_string(page)
⋮----
class UnstructuredPDFParser(DocumentParser)
⋮----
"""
    Parser for processing PDF files using the `unstructured` library.
    """
⋮----
def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]:  # type: ignore
⋮----
# from unstructured.chunking.title import chunk_by_title
⋮----
elements = partition_pdf(file=self.doc_bytes, include_page_breaks=True)
⋮----
# elements = chunk_by_title(elements)
page_number = 1
page_elements = []  # type: ignore
⋮----
if page_elements:  # Avoid yielding empty pages at the start
⋮----
page_elements = []
⋮----
# Yield the last page if it's not empty
⋮----
"""
        Get Document object from a given `unstructured` element.

        Args:
            page (unstructured element): The `unstructured` element object.

        Returns:
            Document: Document object, with content and possible metadata.
        """
text = " ".join(el.text for el in page)
⋮----
class UnstructuredDocxParser(DocumentParser)
⋮----
"""
    Parser for processing DOCX files using the `unstructured` library.
    """
⋮----
elements = partition_docx(file=self.doc_bytes, include_page_breaks=True)
⋮----
"""
        Get Document object from a given `unstructured` element.

        Note:
            The concept of "pages" doesn't actually exist in the .docx file format in
            the same way it does in formats like .pdf. A .docx file is made up of a
            series of elements like paragraphs and tables, but the division into
            pages is done dynamically based on the rendering settings (like the page
            size, margin size, font size, etc.).

        Args:
            page (unstructured element): The `unstructured` element object.

        Returns:
            Document object, with content and possible metadata.
        """
⋮----
class UnstructuredDocParser(UnstructuredDocxParser)
⋮----
elements = partition_doc(file=self.doc_bytes, include_page_breaks=True)
⋮----
class PythonDocxParser(DocumentParser)
⋮----
"""
    Parser for processing DOCX files using the `python-docx` library.
    """
⋮----
"""
        Simulate iterating through pages.
        In a DOCX file, pages are not explicitly defined,
        so we consider each paragraph as a separate 'page' for simplicity.
        """
⋮----
doc = docx.Document(self.doc_bytes)
⋮----
"""
        Get Document object from a given 'page', which in this case is a single
        paragraph.

        Args:
            page (list): A list containing a single Paragraph object.

        Returns:
            Document: Document object, with content and possible metadata.
        """
paragraph = page[0]
⋮----
class MarkitdownDocxParser(DocumentParser)
⋮----
md = MarkItDown()
self.doc_bytes.seek(0)  # Reset to start
⋮----
# Direct conversion from stream works for DOCX (unlike XLSX)
result = md.convert_stream(self.doc_bytes, file_extension=".docx")
⋮----
# Split content into logical sections (paragraphs, sections, etc.)
# This approach differs from the strict page-based approach used for PDFs
sections = re.split(r"(?=# |\n## |\n### )", result.text_content)
⋮----
# Filter out empty sections
sections = [section for section in sections if section.strip()]
⋮----
def get_document_from_page(self, md_content: str) -> Document
⋮----
"""
        Get Document object from a given markdown section.

        Args:
            md_content (str): The markdown content for the section.

        Returns:
            Document: Document object, with content and possible metadata.
        """
⋮----
class MarkitdownXLSXParser(DocumentParser)
⋮----
# Save stream to a temp file since md.convert() expects a path or URL
# Temporary workaround until markitdown fixes convert_stream function
# for xls and xlsx files
# See issue here https://github.com/microsoft/markitdown/issues/321
⋮----
temp_file.flush()  # Ensure data is written before reading
result = md.convert(temp_file.name)
⋮----
sheets = re.split(r"(?=## Sheet\d+)", result.text_content)
⋮----
"""
        Get Document object from a given 1-page markdown string.

        Args:
            md_content (str): The markdown content for the page.

        Returns:
            Document: Document object, with content and possible metadata.
        """
⋮----
class MarkitdownPPTXParser(DocumentParser)
⋮----
result = md.convert_stream(self.doc_bytes, file_extension=".pptx")
slides = re.split(r"(?=<!-- Slide number: \d+ -->)", result.text_content)
⋮----
class LLMPdfParser(DocumentParser)
⋮----
"""
    This class converts PDFs to Markdown using multimodal LLMs.

    It extracts pages, converts them with the LLM (replacing images with
    detailed descriptions), and outputs Markdown page by page. The
    conversion follows `LLM_PDF_MD_SYSTEM_INSTRUCTION`. It employs
    multiprocessing for speed, async requests with rate limiting, and
    handles errors.

    It supports page-by-page splitting or chunking multiple pages into
    one, respecting page boundaries and a `max_token_limit`.
    """
⋮----
DEFAULT_MAX_TOKENS = 7000
OUTPUT_DIR = Path(".llm_pdfparser")  # Fixed output directory
⋮----
LLM_PDF_MD_SYSTEM_INSTRUCTION = """
⋮----
def __init__(self, source: Union[str, bytes], config: ParsingConfig)
⋮----
# Ensure output directory exists
⋮----
prefix = (
temp_file = tempfile.NamedTemporaryFile(
⋮----
"""
        If True, each PDF page is processed as a separate chunk,
        resulting in one LLM request per page. If False, pages are
        grouped into chunks based on `max_token_limit` before being sent
        to the LLM.
        """
⋮----
# Rate limiting parameters
⋮----
"""
        A semaphore to control the number of concurrent requests to the LLM,
        preventing rate limit errors.  A semaphore slot is acquired before
        making an LLM request and released after the request is complete.
        """
⋮----
self.retry_delay = 5  # seconds, for exponential backoff
⋮----
def _extract_page(self, page_num: int) -> Dict[str, Any]
⋮----
"""
        Extracts a single page and estimates token count.
        Opens the PDF from self.doc_bytes (a BytesIO object).
        """
⋮----
# Always open the document from in-memory bytes.
doc = fitz.open(stream=self.doc_bytes.getvalue(), filetype="pdf")
new_pdf = fitz.open()
⋮----
pdf_bytes = new_pdf.write()
text = doc[page_num].get_text("text")
token_count = len(text) // 4 if text else len(pdf_bytes) // 4
⋮----
"""Parallel PDF page extraction using self.doc_bytes."""
⋮----
total_pages = len(doc)
⋮----
num_workers = num_workers or cpu_count()
⋮----
results = []
⋮----
"""Groups pages into chunks where each chunk is approximately `max_tokens`."""
chunks: List[List[Dict[str, Any]]] = []
current_chunk: List[Dict[str, Any]] = []
current_tokens = 0
⋮----
current_chunk = []
⋮----
if current_chunk:  # Add remaining pages
⋮----
"""
        Merges grouped pages into a single binary chunk so that
        it does not exceed max token limit
        """
⋮----
merged_pdf = fitz.open()
page_numbers = []
⋮----
temp_pdf = fitz.open("pdf", page["pdf_bytes"])
⋮----
"pdf_bytes": merged_pdf.write(),  # Binary PDF data
"page_numbers": page_numbers,  # List of page numbers in this chunk
⋮----
"""
        Extracts, groups, and merges PDF pages into chunks with embedded page markers.
        """
⋮----
pages = self._extract_pdf_pages_parallel(num_workers)
⋮----
# Each page becomes its own chunk
⋮----
# Group pages based on token limit
chunks = self._group_pages_by_token_limit(pages, max_tokens)
⋮----
pdf_chunks = pool.map(self._merge_pages_into_pdf_with_metadata, chunks)
⋮----
@staticmethod
    def _page_num_str(page_numbers: Any) -> str
⋮----
"""
        Converts page numbers to a formatted string.
        """
⋮----
async def _send_chunk_to_llm(self, chunk: Dict[str, Any]) -> str
⋮----
"""
        Sends a PDF chunk to the LLM API and returns the response text.
        Uses retries with exponential backoff to handle transient failures.
        """
⋮----
async with self.semaphore:  # Limit concurrent API requests
⋮----
llm_config = OpenAIGPTConfig(
llm = OpenAIGPT(config=llm_config)
page_nums = self._page_num_str(chunk.get("page_numbers", "?"))
base64_string = base64.b64encode(chunk["pdf_bytes"]).decode("utf-8")
data_uri = f"data:application/pdf;base64,{base64_string}"
⋮----
file_content = dict(
⋮----
# optimistically try this: some API proxies like litellm
# support this, and others may not.
⋮----
# fallback: assume file upload is similar to OpenAI API
⋮----
prompt = (
system_prompt = (
⋮----
# Send the request with PDF content and system instructions
response = await llm.async_client.chat.completions.create(  # type: ignore
⋮----
dict(  # type: ignore
⋮----
# Return extracted text if available
⋮----
# Log error with page numbers for debugging
⋮----
# Apply exponential backoff before retrying
delay = self.retry_delay * (2**attempt)
⋮----
# Log failure after max retries
page_nums = chunk.get("page_numbers", "Unknown")
⋮----
return ""  # Return empty string if all retries fail
⋮----
async def process_chunks(self, chunks: List[Dict[str, Any]]) -> List[str]
⋮----
"""
        Processes PDF chunks by sending them to the LLM API and
        collecting the results.

        Args:
            chunks: A list of dictionaries, where each dictionary represents
                a PDF chunk and contains the PDF data and page numbers.
        """
# To show nice progress bar
⋮----
# Create a list of asynchronous tasks to send each chunk to the LLM.
# Chunk in this case might be single page or group of pages returned
# by prepare_pdf_chunks function
tasks = [self._send_chunk_to_llm(chunk) for chunk in chunks]
⋮----
# Gather the results from all tasks, allowing exceptions to be returned.
# tqdm_asyncio is wrapper around asyncio.gather
gathered_results = await tqdm_asyncio.gather(
⋮----
chunk = chunks[i]  # Get the corresponding chunk.
⋮----
# Handle exceptions that occurred during chunk processing.
⋮----
# Process successful results and append page/chunk markers.
markdown = str(result)
⋮----
return results  # Return the list of results.
⋮----
"""
        Iterates over the document pages, extracting content using the
        LLM API, saves them to a markdown file, and yields page numbers
        along with their corresponding content.

        Yields:
            A generator of tuples, where each tuple contains the page number
            (int) and the page content (Any).
        """
⋮----
# This involves extracting pages, grouping them according to the
# `max_tokens` limit (if `split_on_page` is False), and
# merging pages into larger PDF chunks. The result
# is a list of dictionaries, where each dictionary contains the
# PDF bytes and the associated page numbers or single page if
# `split_on_page` is true
⋮----
pdf_chunks = self._prepare_pdf_chunks_for_llm(
⋮----
# We asynchronously processes each chunk, sending it
# to the LLM and retrieving the Markdown output. It handles rate
# limiting and retries.
markdown_results = asyncio.run(self.process_chunks(pdf_chunks))
⋮----
# This file serves as an intermediate storage location for the
# complete Markdown output.
⋮----
# Read the full Markdown content from the temporary file.
⋮----
full_markdown = infile.read()
⋮----
# The splitting is based on the `split_on_page` setting. If True,
# the Markdown is split using the "Page-" marker. Otherwise, it's
# split using the "Chunk-" marker.
⋮----
pages = full_markdown.split("<!----Page-")
⋮----
pages = full_markdown.split("<!----Chunk-")
⋮----
# Remove the first element if it's empty (due to the split).
⋮----
pages = pages[1:]
⋮----
# Iterate over the pages or chunks and yield their content.
⋮----
# Check for errors during processing.
⋮----
page_content = page
⋮----
# Extract the actual page content by removing the marker.
page_content = (
⋮----
# Yield the page number and content.
⋮----
def get_document_from_page(self, page: str) -> Document
⋮----
"""
        Get a Document object from a given markdown page.
        """
⋮----
class MarkerPdfParser(DocumentParser)
⋮----
"""
    Parse PDF files using the `marker` library: https://github.com/VikParuchuri/marker
    """
⋮----
DEFAULT_CONFIG = {"paginate_output": True, "output_format": "markdown"}
⋮----
user_config = (
⋮----
"""
        Yield each page in the PDF using `marker`.
        """
⋮----
import marker  # noqa
⋮----
config_parser = ConfigParser(self.config_dict)
converter = PdfConverter(
⋮----
filename = Path(doc_path).stem + "_converted"
⋮----
rendered = converter(doc_path)
⋮----
file_path = output_dir / f"{filename}.md"
⋮----
full_markdown = f.read()
⋮----
# Regex for splitting pages
pages = re.split(r"\{\d+\}----+", full_markdown)
⋮----
page_no = 0
⋮----
"""
        Get Document object from a given 1-page markdown file,
        possibly containing image refs.

        Args:
            page (str): The page we get by splitting large md file from
            marker

        Returns:
            Document: Document object, with content and possible metadata.
        """
</file>

<file path="langroid/parsing/file_attachment.py">
class FileAttachment(BaseModel)
⋮----
"""Represents a file attachment to be sent to an LLM API."""
⋮----
content: bytes
filename: Optional[str] = None
mime_type: str = "application/octet-stream"
url: str | None = None
detail: str | None = None
⋮----
def __init__(self, **data: Any) -> None
⋮----
"""Initialize with sensible defaults for filename if not provided."""
⋮----
# Generate a more readable unique filename
unique_id = str(uuid.uuid4())[:8]
⋮----
"""Create a FileAttachment from a file path.

        Args:
            file_path: Path to the file to attach

        Returns:
            FileAttachment instance
        """
path = Path(file_path)
⋮----
content = f.read()
⋮----
mime_type = "application/octet-stream"
⋮----
"""Create a FileAttachment from a URL.

        Args:
            url: URL to the file
            content: Optional raw bytes content (if already fetched)
            filename: Optional name to use for the file
            mime_type: MIME type of the content, guessed from filename or url

        Returns:
            FileAttachment instance
        """
⋮----
# Extract filename from URL if possible
⋮----
parsed_url = urlparse(url)
path = parsed_url.path
filename = path.split("/")[-1] if path else None
⋮----
content=content or b"",  # Empty bytes if no content provided
⋮----
"""Create a FileAttachment from either a local file path or a URL.

        Args:
            path_or_url: Path to the file or URL to fetch

        Returns:
            FileAttachment instance
        """
# Convert to string if Path object
path_str = str(path)
⋮----
# Check if it's a URL
⋮----
# Assume it's a local file path
⋮----
"""Create a FileAttachment from bytes content.

        Args:
            content: Raw bytes content
            filename: Optional name to use for the file
            mime_type: MIME type of the content, guessed from filename if provided

        Returns:
            FileAttachment instance
        """
⋮----
"""Create a FileAttachment from a file-like object.

        Args:
            file_obj: File-like object with binary content
            filename: Optional name to use for the file
            mime_type: MIME type of the content, guessed from filename if provided

        Returns:
            FileAttachment instance
        """
content = file_obj.read()
⋮----
"""Create a FileAttachment from text content.

        Args:
            text: Text content to include
            filename: Optional name to use for the file
            mime_type: MIME type of the content
            encoding: Text encoding to use

        Returns:
            FileAttachment instance
        """
content = text.encode(encoding)
⋮----
def to_base64(self) -> str
⋮----
"""Convert content to base64 encoding.

        Returns:
            Base64 encoded string
        """
⋮----
def to_data_uri(self) -> str
⋮----
"""Convert content to a data URI.

        Returns:
            A data URI string containing the base64-encoded content with MIME type
        """
base64_content = self.to_base64()
⋮----
def to_dict(self, model: str) -> Dict[str, Any]
⋮----
"""
        Convert to a dictionary suitable for API requests.
        Tested only for PDF files.

        Returns:
            Dictionary with file data
        """
⋮----
# for gemini models, we use `image_url` for both pdf-files and images
⋮----
image_url_dict = {}
⋮----
# If we have a URL and it's a full http/https URL, use it directly
⋮----
# Otherwise use base64 data URI
⋮----
# Add detail parameter if specified
⋮----
# For non-image files
</file>

<file path="langroid/parsing/md_parser.py">
HEADER_CONTEXT_SEP = "\n...\n"
⋮----
# Pydantic model definition for a node in the markdown hierarchy
class Node(BaseModel)
⋮----
content: str  # The text of the header or content block
path: List[str]  # List of header texts from root to this node
children: List["Node"] = Field(default_factory=list)
# Nested children nodes
⋮----
def __repr__(self) -> str
⋮----
# for debug printing
⋮----
# Pydantic v1 requires forward references for self-referencing models
# Forward references will be resolved with the update_forward_refs call below.
⋮----
# Resolve forward references for Node (required for recursive models in Pydantic)
⋮----
def _cleanup_text(text: str) -> str
⋮----
# 1) Convert alternative newline representations (any CRLF or CR) to a single '\n'
text = text.replace("\r\n", "\n").replace("\r", "\n")
⋮----
# 2) Replace 3 or more consecutive newlines with exactly 2 newlines
text = re.sub(r"\n{3,}", "\n\n", text)
⋮----
HEADING_RE = re.compile(r"^(#{1,6})\s+(.*)$")
⋮----
def parse_markdown_headings(md_text: str) -> List[Node]
⋮----
"""
    Parse `md_text` to extract a heading-based hierarchy, skipping lines
    that look like headings inside fenced code blocks. Each heading node
    will have a child node for the text that appears between this heading
    and the next heading.

    Returns a list of top-level Node objects.

    Example structure:
        Node(content='# Chapter 1', path=['# Chapter 1'], children=[
            Node(content='Intro paragraph...', path=['# Chapter 1'], children=[]),
            Node(content='## Section 1.1', path=['# Chapter 1', '## Section 1.1'],
                 children=[
                  Node(content='Some text in Section 1.1.', path=[...], children=[])
            ]),
            ...
        ])
    """
# If doc is empty or only whitespace, return []
⋮----
lines = md_text.splitlines(True)  # keep the newline characters
⋮----
# We'll scan line-by-line, track code-fence status, collect headings
headings = []  # list of (level, heading_line, start_line_idx)
in_code_fence = False
fence_marker = None  # track which triple-backtick or ~~~ opened
⋮----
# Check if we're toggling in/out of a fenced code block
# Typically triple backtick or triple tilde: ``` or ~~~
# We do a *loose* check: a line that starts with at least 3 backticks or tildes
# ignoring trailing text. You can refine as needed.
fence_match = re.match(r"^(```+|~~~+)", line.strip())
⋮----
# If we are not in a fence, we enter one;
# If we are in a fence, we exit if the marker matches
marker = fence_match.group(1)  # e.g. "```" or "~~~~"
⋮----
in_code_fence = True
fence_marker = marker[:3]  # store triple backtick or triple tilde
⋮----
# only close if the fence_marker matches
# E.g. if we opened with ```, we close only on ```
⋮----
fence_marker = None
⋮----
# Check if the line is a heading
m = HEADING_RE.match(line)
⋮----
hashes = m.group(1)  # e.g. "##"
heading_text = line.rstrip("\n")  # entire line, exact
level = len(hashes)
⋮----
# If no headings found, return a single root node with the entire text
⋮----
# Add a sentinel heading at the end-of-file, so we can slice the last block
# after the final real heading. We'll use level=0 so it doesn't form a real node.
⋮----
# Now we build "heading blocks" with
# (level, heading_text, start_line, end_line, content)
heading_blocks = []
⋮----
# Content is everything after the heading line until the next heading
# i.e. lines[start_i+1 : next_start_i]
block_content_lines = lines[start_i + 1 : next_start_i]
block_content = "".join(block_content_lines).rstrip("\n")
⋮----
# (We skip the sentinel heading in the final result.)
⋮----
# We'll now convert heading_blocks into a tree using a stack-based approach
root_nodes: List[Node] = []
stack: List[Node] = []
header_path: List[str] = []
⋮----
level = hb["level"]  # type: ignore
heading_txt = hb["heading_text"]
content_txt = hb["content"]
⋮----
# --- Pop stack first! ---
⋮----
# build new path, create a node for the heading
new_path = header_path + [heading_txt]
heading_node = Node(
⋮----
content=heading_txt, path=new_path, children=[]  # type: ignore
⋮----
# Possibly create a content child for whatever lines were below the heading
if content_txt.strip():  # type: ignore
content_node = Node(
⋮----
content=content_txt, path=new_path, children=[]  # type: ignore
⋮----
# Attach heading_node to the stack top or as a root
⋮----
header_path.append(heading_txt)  # type: ignore
⋮----
# The Chunk model for the final enriched chunks.
class Chunk(BaseModel)
⋮----
text: str  # The chunk text (which includes header context)
path: List[str]  # The header path (list of header strings)
token_count: int
⋮----
# Configuration for chunking
class MarkdownChunkConfig(BaseModel)
⋮----
chunk_size: int = 200  # desired chunk size in tokens
overlap_tokens: int = 30  # number of tokens to overlap between chunks
variation_percent: float = 0.3  # allowed variation
rollup: bool = True  # whether to roll up chunks
header_context_sep: str = HEADER_CONTEXT_SEP  # separator for header context
⋮----
@field_validator("chunk_size", mode="before")
@classmethod
    def convert_chunk_size_to_int(cls, v: Any) -> int
⋮----
"""Convert chunk_size to int, maintaining backward compatibility
        with Pydantic V1.
        """
⋮----
# A simple tokenizer that counts tokens as whitespace-separated words.
def count_words(text: str) -> int
⋮----
def recursive_chunk(text: str, config: MarkdownChunkConfig) -> List[str]
⋮----
"""
    Enhanced chunker that:
      1. Splits by paragraph (top-level).
      2. Splits paragraphs by sentences if needed (never mid-sentence unless huge).
      3. Allows going over the upper bound rather than splitting a single sentence.
      4. Overlaps only once between consecutive chunks.
      5. Looks ahead to avoid a "dangling" final chunk below the lower bound.
      6. Preserves \n\n (and other original spacing) as best as possible.
    """
⋮----
# -------------------------------------------------
# Helpers
⋮----
def count_words(text_block: str) -> int
⋮----
lower_bound = int(config.chunk_size * (1 - config.variation_percent))
upper_bound = int(config.chunk_size * (1 + config.variation_percent))
⋮----
# Quick check: if the entire text is short enough, return as-is.
⋮----
# Split into paragraphs, preserving \n\n if it's there.
raw_paragraphs = text.split("\n\n")
paragraphs = []
⋮----
# Re-append the double-newline if not the last piece
⋮----
# Split paragraphs into "segments": each segment is either
# a full short paragraph or (if too big) a list of sentences.
sentence_regex = r"(?<=[.!?])\s+"
⋮----
def split_paragraph_into_sentences(paragraph: str) -> List[str]
⋮----
"""
        Return a list of sentence-sized segments. If a single sentence
        is bigger than upper_bound, do a word-level fallback.
        """
⋮----
sentences = re.split(sentence_regex, paragraph)
# Clean up stray whitespace
sentences = [s.strip() for s in sentences if s.strip()]
⋮----
expanded = []
⋮----
def _fallback_word_split(long_text: str, cfg: MarkdownChunkConfig) -> List[str]
⋮----
"""
        As a last resort, split extremely large 'sentence' by words.
        """
words = long_text.split()
pieces = []
start = 0
⋮----
end = start + cfg.chunk_size
chunk_words = words[start:end]
⋮----
start = end
⋮----
# Build a list of segments
segments = []
⋮----
# split into sentences
segs = split_paragraph_into_sentences(para)
⋮----
# Accumulate segments into final chunks
⋮----
chunks = []
current_chunk = ""
current_count = 0
⋮----
def flush_chunk() -> None
⋮----
trimmed = current_chunk.strip()
⋮----
def remaining_tokens_in_future(all_segments: List[str], current_index: int) -> int
⋮----
"""Sum of word counts from current_index onward."""
⋮----
seg_count = count_words(seg)
⋮----
# If this single segment alone exceeds upper_bound, we accept it as a big chunk.
⋮----
# If we have something in the current chunk, flush it first
⋮----
# Then store this large segment as its own chunk
⋮----
# Attempt to add seg to the current chunk
⋮----
# We would normally flush here, but let's see if we are nearing the end:
# If the remaining tokens (including this one) is < lower_bound,
# we just add it anyway to avoid creating a tiny final chunk.
future_tokens = remaining_tokens_in_future(segments, i)
⋮----
# Just add it (allowing to exceed upper bound)
⋮----
# Add space or preserve newline carefully
# We'll do a basic approach here:
⋮----
current_chunk += seg  # preserve double new line
⋮----
current_count = count_words(current_chunk)
⋮----
current_chunk = seg
current_count = seg_count
⋮----
# Normal flush
old_chunk = current_chunk
⋮----
# Overlap from old_chunk
overlap_tokens_list = (
overlap_str = (
⋮----
current_chunk = overlap_str + " " + seg
⋮----
# Just accumulate
⋮----
# Flush leftover
⋮----
# Return non-empty
⋮----
# Function to process a Node and produce enriched chunks.
def chunk_node(node: Node, config: MarkdownChunkConfig) -> List[Chunk]
⋮----
chunks: List[Chunk] = []
⋮----
# Check if this is a header-only node.
is_header_only = node.path and node.content.strip() == node.path[-1]
⋮----
# Only generate a chunk for the node if it has non-header content,
# or if it’s header-only AND has no children (i.e., it's a leaf header).
⋮----
header_prefix = (
content_chunks = recursive_chunk(node.content, config)
⋮----
full_text = header_prefix + chunk_text
⋮----
# Process children nodes recursively.
⋮----
child_chunks = chunk_node(child, config)
⋮----
# Function to process an entire tree of Nodes.
def chunk_tree(root_nodes: List[Node], config: MarkdownChunkConfig) -> List[Chunk]
⋮----
all_chunks: List[Chunk] = []
⋮----
def aggregate_content(node: Node) -> str
⋮----
"""
    Recursively aggregate the content from a node and all its descendants,
    excluding header-only nodes to avoid duplication.
    """
parts = []
⋮----
# Skip header-only nodes in content aggregation
is_header_only = node.path and node.content.strip() == node.path[-1].strip()
⋮----
# Recurse on children
⋮----
child_text = aggregate_content(child)
⋮----
def flatten_tree(node: Node, level: int = 0) -> str
⋮----
"""
    Flatten a node and its children back into proper markdown text.

    Args:
        node: The node to flatten
        level: The current heading level (depth in the tree)

    Returns:
        str: Properly formatted markdown text
    """
result = ""
⋮----
# Check if this is a header node (content matches last item in path)
is_header = node.path and node.content.strip().startswith("#")
⋮----
# For header nodes, don't duplicate the hash marks
⋮----
result = node.content.strip() + "\n\n"
⋮----
# Process all children
⋮----
"""
    Recursively produce rollup chunks from `node`, passing down a `prefix`
    (e.g., parent heading(s)).

    - If a node is heading-only (content == last path item) and has children,
      we skip creating a chunk for that node alone and instead add that heading
      to the `prefix` for child nodes.
    - If a node is NOT heading-only OR has no children, we try to fit all of its
      flattened content into a single chunk. If it's too large, we chunk it.
    - We pass the (possibly updated) prefix down to children, so each child's
      chunk is enriched exactly once with all ancestor headings.
    """
⋮----
# Check if the node is "heading-only" and has children
# e.g. node.content=="# Chapter 1" and node.path[-1]=="# Chapter 1"
is_heading_only_with_children = (
⋮----
# We do NOT create a chunk for this node alone.
# Instead, we add its heading to the prefix for child chunks.
new_prefix = prefix + node.content.strip()
⋮----
sep = "\n\n" if i == 0 else config.header_context_sep
⋮----
# If not heading-only-with-children, we handle this node's own content:
# Flatten the entire node (including sub-children) in standard Markdown form.
flattened = flatten_tree(node, level=len(node.path))
flattened_with_prefix = prefix + flattened
total_tokens = count_words(flattened_with_prefix)
⋮----
# Check if we can roll up everything (node + children) in a single chunk
⋮----
# One single chunk for the entire subtree
⋮----
# It's too large overall. We'll chunk the node's own content first (if any),
# then recurse on children.
node_content = node.content.strip()
⋮----
# If we have actual content that is not just a heading, chunk it with the prefix
# (like "preamble" text).
# Note: if this node is heading-only but has NO children,
# it will still land here
# (because is_heading_only_with_children was False due to zero children).
⋮----
# The node is actual content (not purely heading).
# We'll chunk it in paragraphs/sentences with the prefix.
content_chunks = recursive_chunk(node_content, config)
⋮----
block_with_prefix = prefix + text_block
⋮----
# Now recurse on children, passing the same prefix so they get it too
⋮----
# Create a dummy root node that contains everything.
dummy_root = Node(content="", path=[], children=root_nodes)
⋮----
# Now process just the dummy root node with an empty prefix.
chunks = rollup_chunk_node(dummy_root, config, prefix="")
⋮----
def chunk_markdown(markdown_text: str, config: MarkdownChunkConfig) -> List[str]
⋮----
tree = parse_markdown_headings(markdown_text)
⋮----
# Pure text, no hierarchy, so just use recursive_chunk
text_chunks = recursive_chunk(markdown_text, config)
⋮----
chunks = rollup_chunk_tree(tree, config)
⋮----
chunks = chunk_tree(tree, config)
⋮----
# Example usage:
markdown_text = """# Title
# Set up chunking config with very large chunk size.
# (you can adjust chunk_size, overlap_tokens, variation_percent)
config = MarkdownChunkConfig(
chunks = chunk_markdown(markdown_text, config)
⋮----
# with rollup_chunk_tree we get entire doc as 1 chunk
</file>

<file path="langroid/parsing/para_sentence_split.py">
def remove_extra_whitespace(s: str) -> str
⋮----
lines = s.split("\n")
cleaned_lines = [" ".join(line.split()) for line in lines]
⋮----
def custom_sent_tokenize(text: str) -> List[str]
⋮----
sentences = [
# append a period if the sentence does not end with one
⋮----
def _chunk_sentences(sentences: List[str], chunk_size: int) -> List[str]
⋮----
chunks = []
current_chunk: List[str] = []
current_chunk_length = 0
⋮----
sentence_length = length_fn(sentence)
⋮----
current_chunk = [sentence]
current_chunk_length = sentence_length
⋮----
new_chunk = " ".join(current_chunk).strip()
⋮----
soup = BeautifulSoup(text, "html.parser")
text = soup.get_text()
# First, try to split the document into paragraphs
paragraphs = text.split("\n\n")
⋮----
# If paragraphs are too long, split them into sentences
⋮----
sentences = custom_sent_tokenize(text)
chunks = _chunk_sentences(sentences, chunk_size)
⋮----
chunks = paragraphs
⋮----
chunks = [chunk.strip() for chunk in chunks if chunk.strip() != ""]
</file>

<file path="langroid/parsing/parser.py">
logger = logging.getLogger(__name__)
⋮----
class Splitter(str, Enum)
⋮----
TOKENS = "tokens"
PARA_SENTENCE = "para_sentence"
SIMPLE = "simple"
# "structure-aware" splitting with chunks enriched by header info
MARKDOWN = "markdown"
⋮----
class BaseParsingConfig(BaseSettings)
⋮----
"""Base class for document parsing configurations."""
⋮----
library: str
⋮----
model_config = SettingsConfigDict(extra="ignore")  # Ignore unknown settings
⋮----
class LLMPdfParserConfig(BaseSettings)
⋮----
"""Configuration for LLM-based parsing."""
⋮----
model_name: str = "gemini/gemini-2.0-flash"  # Default model
max_tokens: Optional[int] = None
split_on_page: Optional[bool] = True
requests_per_minute: Optional[int] = 5
timeout: int = 60
prompt: str = ""  # override with a domain-specific prompt
system_prompt: str = ""  # override with a domain-specific system prompt
⋮----
class MarkerConfig(BaseSettings)
⋮----
"""Configuration for Markitdown-based parsing."""
⋮----
config_dict: Dict[str, Any] = {}
⋮----
class PdfParsingConfig(BaseParsingConfig)
⋮----
library: Literal[
llm_parser_config: Optional[LLMPdfParserConfig] = None
marker_config: Optional[MarkerConfig] = None
⋮----
@model_validator(mode="before")
@classmethod
    def enable_configs(cls, values: Dict[str, Any]) -> Dict[str, Any]
⋮----
"""Ensure correct config is set based on library selection."""
library = values.get("library")
⋮----
class DocxParsingConfig(BaseSettings)
⋮----
library: Literal["python-docx", "unstructured", "markitdown-docx"] = "unstructured"
⋮----
class DocParsingConfig(BaseSettings)
⋮----
library: Literal["unstructured"] = "unstructured"
⋮----
class MarkitdownPPTXParsingConfig(BaseSettings)
⋮----
library: Literal["markitdown"] = "markitdown"
⋮----
class MarkitdownXLSXParsingConfig(BaseSettings)
⋮----
class MarkitdownXLSParsingConfig(BaseSettings)
⋮----
class ParsingConfig(BaseSettings)
⋮----
splitter: str = Splitter.MARKDOWN
chunk_by_page: bool = False  # split by page?
chunk_size: int = 200  # aim for this many tokens per chunk
chunk_size_variation: float = 0.30  # max variation from chunk_size
overlap: int = 50  # overlap between chunks
max_chunks: int = 10_000
⋮----
@field_validator("chunk_size", mode="before")
@classmethod
    def convert_chunk_size_to_int(cls, v: Any) -> int
⋮----
"""Convert chunk_size to int, maintaining backward compatibility
        with Pydantic V1.
        """
⋮----
# offset to subtract from page numbers:
# e.g. if physical page 12 is displayed as page 1, set page_number_offset = 11
page_number_offset: int = 0
# aim to have at least this many chars per chunk when truncating due to punctuation
min_chunk_chars: int = 350
discard_chunk_chars: int = 5  # discard chunks with fewer than this many chars
n_similar_docs: Optional[int] = None  # deprecated
n_neighbor_ids: int = 5  # window size to store around each chunk
separators: List[str] = ["\n\n", "\n", " ", ""]
token_encoding_model: str = "text-embedding-3-small"
pdf: PdfParsingConfig = PdfParsingConfig()
docx: DocxParsingConfig = DocxParsingConfig()
doc: DocParsingConfig = DocParsingConfig()
pptx: MarkitdownPPTXParsingConfig = MarkitdownPPTXParsingConfig()
xls: MarkitdownXLSParsingConfig = MarkitdownXLSParsingConfig()
xlsx: MarkitdownXLSXParsingConfig = MarkitdownXLSXParsingConfig()
⋮----
class Parser
⋮----
def __init__(self, config: ParsingConfig)
⋮----
def num_tokens(self, text: str) -> int
⋮----
return count_words(text)  # simple count based on whitespace-split
tokens = self.tokenizer.encode(text, allowed_special={"<|endoftext|>"})
⋮----
def truncate_tokens(self, text: str, max_tokens: int) -> str
⋮----
tokens = self.tokenizer.encode(text)
⋮----
def add_window_ids(self, chunks: List[Document]) -> None
⋮----
"""Chunks may belong to multiple docs, but for each doc,
        they appear consecutively. Add window_ids in metadata"""
⋮----
# discard empty chunks
chunks = [c for c in chunks if c.content.strip() != ""]
⋮----
# The original metadata.id (if any) is ignored since it will be same for all
# chunks and is useless. We want a distinct id for each chunk.
# ASSUMPTION: all chunks c of a doc have same c.metadata.id !
orig_ids = [c.metadata.id for c in chunks]
ids = [ObjectRegistry.new_id() for c in chunks]
id2chunk = {id: c for id, c in zip(ids, chunks)}
⋮----
# group the ids by orig_id
# (each distinct orig_id refers to a different document)
orig_id_to_ids: Dict[str, List[str]] = {}
⋮----
# now each orig_id maps to a sequence of ids within a single doc
⋮----
k = self.config.n_neighbor_ids
⋮----
# ids are consecutive chunks in a single doc
n = len(ids)
window_ids = [ids[max(0, i - k) : min(n, i + k + 1)] for i in range(n)]
⋮----
c = id2chunk[ids[i]]
⋮----
def split_simple(self, docs: List[Document]) -> List[Document]
⋮----
final_docs = []
⋮----
chunks = remove_extra_whitespace(d.content).split(self.config.separators[0])
# note we are ensuring we COPY the document metadata into each chunk,
# which ensures all chunks of a given doc have same metadata
# (and in particular same metadata.id, which is important later for
# add_window_ids)
chunk_docs = [
⋮----
def split_para_sentence(self, docs: List[Document]) -> List[Document]
⋮----
chunks = docs
⋮----
un_splittables = 0
split_chunks = []
⋮----
# small chunk: no need to split
⋮----
splits = self._split_para_sentence_once([c])
⋮----
max_len = max([self.num_tokens(p.content) for p in chunks])
⋮----
break  # we won't be able to shorten them with current settings
chunks = split_chunks.copy()
⋮----
def _split_para_sentence_once(self, docs: List[Document]) -> List[Document]
⋮----
final_chunks = []
⋮----
chunks = create_chunks(d.content, self.config.chunk_size, self.num_tokens)
⋮----
def split_chunk_tokens(self, docs: List[Document]) -> List[Document]
⋮----
chunks = chunk_markdown(
⋮----
# apply rough adjustment factor to convert from tokens to words,
# which is what the markdown chunker uses
⋮----
chunks = self.chunk_tokens(d.content)
⋮----
"""
        Split a text into chunks of ~CHUNK_SIZE tokens,
        based on punctuation and newline boundaries.
        Adapted from
        https://github.com/openai/chatgpt-retrieval-plugin/blob/main/services/chunks.py

        Args:
            text: The text to split into chunks.

        Returns:
            A list of text chunks, each of which is a string of tokens
            roughly self.config.chunk_size tokens long.
        """
# Return an empty list if the text is empty or whitespace
⋮----
# Tokenize the text
tokens = self.tokenizer.encode(text, disallowed_special=())
⋮----
# Initialize an empty list of chunks
chunks = []
⋮----
# Initialize a counter for the number of chunks
num_chunks = 0
⋮----
# Loop until all tokens are consumed
⋮----
# Take the first chunk_size tokens as a chunk
chunk = tokens[: self.config.chunk_size]
⋮----
# Decode the chunk into text
chunk_text = self.tokenizer.decode(chunk)
⋮----
# Skip the chunk if it is empty or whitespace
⋮----
# Remove the tokens corresponding to the chunk text
# from remaining tokens
tokens = tokens[len(chunk) :]
# Continue to the next iteration of the loop
⋮----
# Find the last period or punctuation mark in the chunk
punctuation_matches = [
⋮----
last_punctuation = max([pos for pos, _ in punctuation_matches] + [-1])
⋮----
# If there is a punctuation mark, and the last punctuation index is
# after MIN_CHUNK_SIZE_CHARS
⋮----
# Truncate the chunk text at the punctuation mark
chunk_text = chunk_text[: last_punctuation + 1]
⋮----
# Replace redundant (3 or more) newlines with 2 newlines to preser
# paragraph separation!
# But do NOT strip leading/trailing whitespace, to preserve formatting
# (e.g. code blocks, or in case we want to stitch chunks back together)
chunk_text_to_append = re.sub(r"\n{3,}", "\n\n", chunk_text)
⋮----
# Append the chunk text to the list of chunks
⋮----
# Remove the tokens corresponding to the chunk text
# from the remaining tokens
tokens = tokens[
⋮----
# Increment the number of chunks
⋮----
# There may be remaining tokens, but we discard them
# since we have already reached the maximum number of chunks
⋮----
def split(self, docs: List[Document]) -> List[Document]
⋮----
# create ids in metadata of docs if absent:
# we need this to distinguish docs later in add_window_ids
⋮----
# some docs are already splits, so don't split them further!
chunked_docs = [d for d in docs if d.metadata.is_chunk]
big_docs = [d for d in docs if not d.metadata.is_chunk]
⋮----
big_doc_chunks = self.split_chunk_tokens(big_docs)
⋮----
big_doc_chunks = self.split_para_sentence(big_docs)
⋮----
big_doc_chunks = self.split_simple(big_docs)
</file>

<file path="langroid/parsing/pdf_utils.py">
fitz = None
⋮----
"""Splits a PDF into individual pages or chunks in a temporary directory.

    Args:
        input_pdf: Input PDF file in bytes, binary mode, or a file path
        splits: Optional list of page numbers to split at.
                If provided, pages will be grouped into chunks ending at
                these page numbers.
                For example, if splits = [4, 9], the result will have pages 1-4, 5-9,
                and 10-end.
                If not provided, default to splitting into individual pages.
        max_workers: Maximum number of concurrent workers for parallel processing

    Returns:
        Tuple containing:
            - List of paths to individual PDF pages or chunks
            - Temporary directory object (caller must call cleanup())

    Example:
        paths, tmp_dir = split_pdf_temp("input.pdf")
        # Use paths...
        tmp_dir.cleanup()  # Clean up temp files when done
    """
tmp_dir = tempfile.TemporaryDirectory()
⋮----
doc = fitz.open(input_pdf)
⋮----
doc = fitz.open(stream=input_pdf, filetype="pdf")
paths = []
⋮----
total_pages = len(doc)
⋮----
# Split into individual pages (original behavior)
⋮----
new_doc = fitz.open()
⋮----
output = Path(tmp_dir.name) / f"page_{page_num + 1}.pdf"
⋮----
# Split according to specified page ranges
# Make sure the splits list is sorted and includes all valid splits
splits = sorted([s for s in splits if 1 <= s <= total_pages])
⋮----
# Create the ranges to process
ranges = []
start_page = 0
⋮----
start_page = end_page
⋮----
# Add the final range if there are pages after the last split
⋮----
# Process each range
⋮----
output = Path(tmp_dir.name) / f"pages_{from_page + 1}_to_{to_page + 1}.pdf"
</file>

<file path="langroid/parsing/repo_loader.py">
logger = logging.getLogger(__name__)
⋮----
def _get_decoded_content(content_file: "ContentFile") -> str
⋮----
def _has_files(directory: str) -> bool
⋮----
"""
    Recursively checks if there is at least one file in a directory.
    """
⋮----
# Pydantic model for GitHub issue data
class IssueData(BaseModel)
⋮----
state: str = Field(..., description="State of issue e.g. open or closed")
year: int = Field(..., description="Year issue was created")
month: int = Field(..., description="Month issue was created")
day: int = Field(..., description="Day issue was created")
assignee: Optional[str] = Field(..., description="Assignee of issue")
size: Optional[str] = Field(..., description="Size of issue, e.g. XS, S, M, L, XXL")
text: str = Field(..., description="Text of issue, i.e. description body")
⋮----
def get_issue_size(labels: List["Label"]) -> str | None
⋮----
sizes = ["XS", "S", "M", "L", "XL", "XXL"]
⋮----
class RepoLoaderConfig(BaseSettings)
⋮----
"""
    Configuration for RepoLoader.
    """
⋮----
non_code_types: List[str] = [
⋮----
file_types: List[str] = [
⋮----
exclude_dirs: List[str] = [
⋮----
class RepoLoader
⋮----
"""
    Class for recursively getting all file content in a repo.
    """
⋮----
"""
        Args:
            url: full github url of repo, or just "owner/repo"
            config: configuration for RepoLoader
        """
⋮----
self.repo: Optional["Repository"] = None  # Initialize repo as Optional
⋮----
log = json.load(f)
⋮----
# it's a core dependency, so we don't need to enclose in try/except
from github import Github  # Late import
⋮----
# authenticated calls to github api have higher rate limit
token = os.getenv("GITHUB_ACCESS_TOKEN")
⋮----
repo_name = self.url.split("github.com/")[1]
⋮----
repo_name = self.url
⋮----
g = Github(token)
⋮----
"""
        Get a repo from the GitHub API, retrying if the request fails,
        with exponential backoff.

        Args:
            g: GitHub object
            repo_name: name of repo
            max_retries: maximum number of retries
        Returns:
            Repo: GitHub repo object

        """
base_delay = 2  # base delay in seconds
max_delay = 60  # maximum delay in seconds
⋮----
delay = min(max_delay, base_delay * 2**attempt)
⋮----
def _get_dir_name(self) -> str
⋮----
def get_issues(self, k: int | None = 100) -> List[IssueData]
⋮----
"""Get up to k issues from the GitHub repo."""
⋮----
return []  # Return an empty list rather than raise an error in this case
⋮----
issues = self.repo.get_issues(state="all")
⋮----
issues = self.repo.get_issues(state="all")[:k]
issue_data_list = []
⋮----
issue_data = IssueData(
⋮----
@staticmethod
    def _file_type(name: str) -> str
⋮----
"""
        Get the file type of a file name.
        Args:
            name: name of file, can be "a", "a.b", or ".b"
        Returns:
            str: file type; "a" => "a", "a.b" => "b", ".b" => "b"
                some examples:
                "Makefile" => "Makefile",
                "script.py" => "py",
                ".gitignore" => "gitignore"
        """
# "a" -> ("a", ""), "a.b" -> ("a", ".b"), ".b" -> (".b", "")
file_parts = os.path.splitext(name)
⋮----
file_type = file_parts[0]  # ("a", "") => "a"
⋮----
file_type = file_parts[1][1:]  # (*,".b") => "b"
⋮----
def _is_code(self, file_type: str) -> bool
⋮----
"""
        Check if a file type is code.

        Args:
            file_type: file type, e.g. "py", "md", "txt"
        Returns:
            bool: whether file type is code
        """
⋮----
def _is_allowed(self, content: "ContentFile") -> bool
⋮----
"""
        Check if a file or directory content is allowed to be included.

        Args:
            content (ContentFile): The file or directory Content object.

        Returns:
            bool: Whether the file or directory is allowed to be included.
        """
⋮----
def default_clone_path(self) -> str
⋮----
def clone(self, path: Optional[str] = None) -> Optional[str]
⋮----
"""
        Clone a GitHub repository to a local directory specified by `path`,
        if it has not already been cloned.

        Args:
            path (str): The local directory where the repository should be cloned.
                If not specified, a temporary directory will be created.

        Returns:
            str: The path to the local directory where the repository was cloned.
        """
⋮----
log: Dict[str, str] = json.load(f)
⋮----
path = self.default_clone_path()
⋮----
"""
        Get a nested dictionary of GitHub repository file and directory names
        up to a certain depth, with file contents.

        Args:
            depth (int): The depth level.
            lines (int): The number of lines of file contents to include.

        Returns:
            Dict[str, Union[str, List[Dict]]]:
            A dictionary containing file and directory names, with file contents.
        """
⋮----
return {}  # Return an empty dict rather than raise an error in this case
⋮----
root_contents = self.repo.get_contents("")
⋮----
root_contents = [root_contents]
repo_structure = {
⋮----
# A queue of tuples (current_node, current_depth, parent_structure)
queue = deque([(root_contents, 0, repo_structure)])
⋮----
# Create a new sub-dictionary for this directory
new_dir = {
⋮----
contents = self.repo.get_contents(content.path)
⋮----
contents = [contents]
⋮----
file_content = "\n".join(
file_dict = {
⋮----
"""
        From a local folder `path` (if None, the repo clone path), get:
          a nested dictionary (tree) of dicts, files and contents
          a list of Document objects for each file.

        Args:
            path (str): The local folder path; if none, use self.clone_path()
            depth (int): The depth level.
            lines (int): The number of lines of file contents to include.

        Returns:
            Tuple of (dict, List_of_Documents):
                A dictionary containing file and directory names, with file
                contents, and a list of Document objects for each file.
        """
⋮----
path = self.clone_path
⋮----
"""
        From a local folder `path` (required), get:
          a nested dictionary (tree) of dicts, files and contents, restricting to
            desired file_types and excluding undesired directories.
          a list of Document objects for each file.

        Args:
            path (str): The local folder path, required.
            depth (int): The depth level. Optional, default 3.
            lines (int): The number of lines of file contents to include.
                    Optional, default 0 (no lines => empty string).
            file_types (List[str]): The file types to include.
                    Optional, default None (all).
            exclude_dirs (List[str]): The directories to exclude.
                    Optional, default None (no exclusions).
            url (str): Optional url, to be stored in docs as metadata. Default "".

        Returns:
            Tuple of (dict, List_of_Documents):
                A dictionary containing file and directory names, with file contents.
                A list of Document objects for each file.
        """
⋮----
folder_structure = {
# A queue of tuples (current_path, current_depth, parent_structure)
queue = deque([(path, 0, folder_structure)])
docs = []
exclude_dirs = exclude_dirs or []
⋮----
item_path = os.path.join(current_path, item)
relative_path = os.path.relpath(item_path, path)
⋮----
# Add the file to the current dictionary
⋮----
file_lines = list(itertools.islice(f, lines))
file_content = "\n".join(line.strip() for line in file_lines)
⋮----
"""
        Recursively get all files under a path as Document objects.

        Args:
            path (str|bytes): The path to the directory or file, or bytes content.
                The bytes option is meant to support the case where the content
                has already been read from a file in an upstream process
                (e.g. from an API or a database), and we want to avoid having to
                write it to a temporary file just to read it again.
                (which can be very slow for large files,
                especially in a docker container)
            parser (Parser): Parser to use to parse files.
            file_types (List[str], optional): List of file extensions OR
                filenames OR file_path_names to  include.
                Defaults to None, which includes all files.
            exclude_dirs (List[str], optional): List of directories to exclude.
                Defaults to None, which includes all directories.
            depth (int, optional): Max depth of recursion. Defaults to -1,
                which includes all depths.
            lines (int, optional): Number of lines to read from each file.
                Defaults to None, which reads all lines.
            doc_type (str|DocumentType | None, optional): The type of document to parse.
        Returns:
            List[Document]: List of Document objects representing files.

        """
⋮----
file_paths = []
⋮----
path_obj = Path(path).resolve()
⋮----
path_depth = len(path_obj.parts)
⋮----
# Exclude directories if needed
⋮----
current_depth = len(Path(root).resolve().parts) - path_depth
⋮----
file_path = str(Path(root) / file)
⋮----
"""
        Directly from GitHub, recursively get all files in a repo that have one of the
        extensions, possibly up to a max number of files, max depth, and max number
        of lines per file (if any of these are specified).

        Args:
            k (int): max number of files to load, or None for all files
            depth (int): max depth to recurse, or None for infinite depth
            lines (int): max number of lines to get, from a file, or None for all lines

        Returns:
            list of Document objects, each has fields `content` and `metadata`,
            and `metadata` has fields `url`, `filename`, `extension`, `language`
        """
⋮----
return []  # Return an empty list rather than raise an error
⋮----
contents = self.repo.get_contents("")
⋮----
stack = list(zip(contents, [0] * len(contents)))  # stack of (content, depth)
# recursively get all files in repo that have one of the extensions
⋮----
i = 0
⋮----
items = self.repo.get_contents(file_content.path)
⋮----
items = [items]
⋮----
# need to decode the file content, which is in bytes
contents = self.repo.get_contents(file_content.path)
⋮----
contents = contents[0]
text = _get_decoded_content(contents)
⋮----
text = "\n".join(text.split("\n")[:lines])
⋮----
# Note `source` is important, it may be used to cite
# evidence for an answer.
# See  URLLoader
# TODO we should use Pydantic to enforce/standardize this
⋮----
"""
        Filter a structure dictionary for certain directories and files.

        Args:
            structure (Dict[str, Union[str, List[Dict]]]): The structure dictionary.
            includes (List[str]): A list of desired directories and files.
                For files, either full file names or "file type" can be specified.
                E.g.  "toml" will include all files with the ".toml" extension,
                or "Makefile" will include all files named "Makefile".
            excludes (List[str]): A list of directories and files to exclude.
                Similar to `includes`, full file/dir names or "file type" can be
                specified. Optional, defaults to empty list.


        Returns:
            Dict[str, Union[str, List[Dict]]]: The filtered structure dictionary.
        """
filtered_structure = {
⋮----
# If the directory is in the select list, include the whole subtree
⋮----
# Otherwise, filter the directory's contents
filtered_dir = RepoLoader.select(dir, includes)
⋮----
):  # only add if not empty
⋮----
@staticmethod
    def ls(structure: Dict[str, Union[str, List[Dict]]], depth: int = 0) -> List[str]
⋮----
"""
        Get a list of names of files or directories up to a certain depth from a
        structure dictionary.

        Args:
            structure (Dict[str, Union[str, List[Dict]]]): The structure dictionary.
            depth (int, optional): The depth level. Defaults to 0.

        Returns:
            List[str]: A list of names of files or directories.
        """
names = []
⋮----
# A queue of tuples (current_structure, current_depth)
queue = deque([(structure, 0)])
⋮----
# add file names only if depth is less than the limit
⋮----
names = [n for n in names if n not in ["", None]]
⋮----
"""
        Recursively list all files in a directory, up to a certain depth.

        Args:
            dir (str): The directory path, relative to root.
            depth (int, optional): The depth level. Defaults to 1.
            include_types (List[str], optional): A list of file types to include.
                Defaults to empty list.
            exclude_types (List[str], optional): A list of file types to exclude.
                Defaults to empty list.
        Returns:
            List[str]: A list of file names.
        """
depth = depth if depth >= 0 else 200
output = []
⋮----
level = root.count(os.sep) - dir.count(os.sep)
sub_indent = " " * 4 * (level + 1)
⋮----
@staticmethod
    def show_file_contents(tree: Dict[str, Union[str, List[Dict[str, Any]]]]) -> str
⋮----
"""
        Print the contents of all files from a structure dictionary.

        Args:
            tree (Dict[str, Union[str, List[Dict]]]): The structure dictionary.
        """
contents = ""
⋮----
path = file["path"]
</file>

<file path="langroid/parsing/routing.py">
"""In a message-string containing possibly multiple @<recipient> occurrences,
    find the last addressee and extract their name,
    and the message content following it.

    E.g. "thank you @bob, now I will ask @alice again. @alice, where is the mirror?" =>
    ("alice", "where is the mirror?")

    Args:
        content (str): The message content.
        addressing (str, optional): The addressing character. Defaults to "@".

    Returns:
        Tuple[Optional[str], str]:
        A tuple containing the last addressee and the subsequent message content.
    """
# Regex to find all occurrences of the pattern
pattern = re.compile(rf"{re.escape(addressing)}(\w+)[^\w]")
matches = list(pattern.finditer(content))
⋮----
return None, content  # No addressee found, return None and original content
⋮----
# Get the last match
last_match = matches[-1]
last_addressee = last_match.group(1)
# Extract content after the last addressee
content_after = content[last_match.end() :].strip()
</file>

<file path="langroid/parsing/search.py">
"""
Utils to search for close matches in (a list of) strings.
Useful for retrieval of docs/chunks relevant to a query, in the context of
Retrieval-Augmented Generation (RAG), and SQLChat (e.g., to pull relevant parts of a
large schema).
See tests for examples: tests/main/test_string_search.py
"""
⋮----
"""
    Find approximate matches of the query in the docs and return surrounding
    characters.

    Args:
        query (str): The search string.
        docs (List[Document]): List of Document objects to search through.
        docs_clean (List[Document]): List of Document objects with cleaned content.
        k (int): Number of best matches to return.
        words_before (int|None): Number of words to include before each match.
            Default None => return max
        words_after (int|None): Number of words to include after each match.
            Default None => return max

    Returns:
        List[Tuple[Document,float]]: List of (Document, score) tuples.
    """
⋮----
best_matches = process.extract(
⋮----
real_matches = [(m, score) for m, score in best_matches if score > 50]
# find the original docs that corresponding to the matches
orig_doc_matches = []
⋮----
# If there are fields beyond just content and metadata,
# we do NOT want to create new document objects with content fields
# based on words_before and words_after, since we don't know how to
# set those other fields.
⋮----
contextual_matches = []
⋮----
choice_text = match.content
contexts = []
⋮----
words = choice_text.split()
end_pos = min(end_pos, len(words))
choice_text = " ".join(words[end_pos:])
⋮----
def preprocess_text(text: str) -> str
⋮----
"""
    Preprocesses the given text by:
    1. Lowercasing all words.
    2. Tokenizing (splitting the text into words).
    3. Removing punctuation.
    4. Removing stopwords.
    5. Lemmatizing words.

    Args:
        text (str): The input text.

    Returns:
        str: The preprocessed text.
    """
# Ensure the NLTK resources are available
⋮----
# Lowercase the text
text = text.lower()
⋮----
# Tokenize the text and remove punctuation
tokenizer = RegexpTokenizer(r"\w+")
tokens = tokenizer.tokenize(text)
⋮----
# Remove stopwords
stop_words = set(stopwords.words("english"))
tokens = [t for t in tokens if t not in stop_words]
⋮----
# Lemmatize words
lemmatizer = WordNetLemmatizer()
tokens = [lemmatizer.lemmatize(t) for t in tokens]
⋮----
# Join the words back into a string
text = " ".join(tokens)
⋮----
"""
    Finds the k closest approximate matches using the BM25 algorithm.

    Args:
        docs (List[Document]): List of Documents to search through.
        docs_clean (List[Document]): List of cleaned Documents
        query (str): The search query.
        k (int, optional): Number of matches to retrieve. Defaults to 5.

    Returns:
        List[Tuple[Document,float]]: List of (Document, score) tuples.
    """
⋮----
texts = [doc.content for doc in docs_clean]
query = preprocess_text(query)
⋮----
text_words = [text.split() for text in texts]
⋮----
bm25 = BM25Okapi(text_words)
query_words = query.split()
doc_scores = bm25.get_scores(query_words)
⋮----
# Get indices of top k scores
top_indices = sorted(range(len(doc_scores)), key=lambda i: -doc_scores[i])[:k]
⋮----
# return the original docs, based on the scores from cleaned docs
⋮----
"""
    Returns a portion of text containing the best approximate match of the query,
    including b words before and a words after the match.

    Args:
    query (str): The string to search for.
    text (str): The body of text in which to search.
    b (int): The number of words before the query to return.
    a (int): The number of words after the query to return.

    Returns:
    str: A string containing b words before, the match, and a words after
        the best approximate match position of the query in the text.
        The text is extracted from the original `text`, preserving formatting,
        whitespace, etc, so it does not disturb any downstream processing.
        If no match is found, returns empty string.
    int: The start position of the match in the text.
    int: The end position of the match in the text.

    Example:
    >>> get_context("apple", "The quick brown fox jumps over the apple.", 3, 2)
    # 'fox jumps over the apple.'
    """
⋮----
# If no word limits specified, return full text
⋮----
# return entire text since we're not asked to return a bounded context
⋮----
# make sure there is a good enough match to the query
⋮----
# Find best matching position of query in text
sequence_matcher = difflib.SequenceMatcher(None, text, query)
match = sequence_matcher.find_longest_match(0, len(text), 0, len(query))
⋮----
# Count words before match point
segments = text.split()
n_segs = len(segments)
start_segment_pos = len(text[: match.a].split())
⋮----
# Calculate word window boundaries
words_before = words_before or n_segs
words_after = words_after or n_segs
start_pos = max(0, start_segment_pos - words_before)
end_pos = min(len(segments), start_segment_pos + words_after + len(query.split()))
⋮----
# Find character positions where words start
word_positions = [m.start() for m in re.finditer(r"\S+", text)]
⋮----
# Convert word positions to character positions
start_char = word_positions[start_pos] if start_pos < len(word_positions) else 0
end_char = word_positions[min(end_pos, len(word_positions) - 1)] + len(
⋮----
# return exact substring with original formatting
⋮----
def eliminate_near_duplicates(passages: List[str], threshold: float = 0.8) -> List[str]
⋮----
"""
    Eliminate near duplicate text passages from a given list using MinHash and LSH.
    TODO: this has not been tested and the datasketch lib is not a dependency.
    Args:
        passages (List[str]): A list of text passages.
        threshold (float, optional): Jaccard similarity threshold to consider two
                                     passages as near-duplicates. Default is 0.8.

    Returns:
        List[str]: A list of passages after eliminating near duplicates.

    Example:
        passages = ["Hello world", "Hello, world!", "Hi there", "Hello world!"]
        print(eliminate_near_duplicates(passages))
        # ['Hello world', 'Hi there']
    """
⋮----
# Create LSH index
lsh = MinHashLSH(threshold=threshold, num_perm=128)
⋮----
# Create MinHash objects for each passage and insert to LSH
minhashes = {}
⋮----
m = MinHash(num_perm=128)
⋮----
unique_idxs = set()
⋮----
# Query for similar passages (including itself)
result = lsh.query(minhashes[idx])
⋮----
# If only the passage itself is returned, it's unique
</file>

<file path="langroid/parsing/spider.py">
from scrapy.spiders import CrawlSpider, Rule  # type: ignore
⋮----
@no_type_check
class DomainSpecificSpider(CrawlSpider):  # type: ignore
⋮----
name = "domain_specific_spider"
⋮----
custom_settings = {"DEPTH_LIMIT": 1, "CLOSESPIDER_ITEMCOUNT": 20}
⋮----
rules = (Rule(LxmlLinkExtractor(), callback="parse_item", follow=True),)
⋮----
def __init__(self, start_url: str, k: int = 20, *args, **kwargs):  # type: ignore
⋮----
"""Initialize the spider with start_url and k.

        Args:
            start_url (str): The starting URL.
            k (int, optional): The max desired final URLs. Defaults to 20.
        """
⋮----
def parse_item(self, response: TextResponse):  # type: ignore
⋮----
"""Extracts URLs that are within the same domain.

        Args:
            response: The scrapy response object.
        """
⋮----
@no_type_check
def scrapy_fetch_urls(url: str, k: int = 20) -> List[str]
⋮----
"""Fetches up to k URLs reachable from the input URL using Scrapy.

    Args:
        url (str): The starting URL.
        k (int, optional): The max desired final URLs. Defaults to 20.

    Returns:
        List[str]: List of URLs within the same domain as the input URL.
    """
urls = []
⋮----
def _collect_urls(spider)
⋮----
"""Handler for the spider_closed signal. Collects the visited URLs."""
⋮----
# Connect the spider_closed signal with our handler
⋮----
runner = CrawlerRunner(
⋮----
d = runner.crawl(DomainSpecificSpider, start_url=url, k=k)
⋮----
# Block until crawling is done and then stop the reactor
crawl_deferred = defer.Deferred()
⋮----
def _crawl_done(_)
⋮----
# Start the reactor, it will stop once the crawl is done
⋮----
# This will block until the deferred gets a result
⋮----
# Test the function
⋮----
fetched_urls = scrapy_fetch_urls("https://example.com", 5)
</file>

<file path="langroid/parsing/table_loader.py">
def read_tabular_data(path_or_url: str, sep: None | str = None) -> pd.DataFrame
⋮----
"""
    Reads tabular data from a file or URL and returns a pandas DataFrame.
    The separator is auto-detected if not specified.

    Args:
        path_or_url (str): Path or URL to the file to be read.

    Returns:
        pd.DataFrame: Data from file or URL as a pandas DataFrame.

    Raises:
        ValueError: If the data cannot be read or is misformatted.
    """
⋮----
# Read the first few lines to guess the separator
⋮----
first_lines = "".join(file_handler.handle.readlines(5))
sep = Sniffer().sniff(first_lines).delimiter
# If it's a local file, reset to the beginning
⋮----
# Read the data
⋮----
# get non-blank column names
⋮----
header_line = f.handle.readline().strip()
valid_cols = [col for col in header_line.split(sep) if col]
valid_cols = [c.replace('"', "").replace("'", "") for c in valid_cols]
⋮----
# use only those columns
data = pd.read_csv(path_or_url, sep=sep, usecols=valid_cols)
data.columns = data.columns.str.strip()  # e.g. "  column 1  " -> "column 1"
⋮----
"""
    Generates a description of the columns in the dataframe,
    along with a listing of up to `n_vals` unique values for each column.
    Intended to be used to insert into an LLM context so it can generate
    appropriate queries or filters on the df.

    Args:
    df (pd.DataFrame): The dataframe to describe.
    filter_fields (list): A list of fields that can be used for filtering.
        When non-empty, the values-list will be restricted to these.
    n_vals (int): How many unique values to show for each column.

    Returns:
    str: A description of the dataframe.
    """
description = []
⋮----
unique_values = df[column].dropna().unique()
unique_count = len(unique_values)
⋮----
values_desc = f"{unique_count} unique values"
⋮----
displayed_values = unique_values[:n_vals]
more_count = unique_count - n_vals
values_desc = f" Values - {displayed_values}, ... {more_count} more"
⋮----
values_desc = f" Values - {unique_values}"
col_type = "string" if df[column].dtype == "object" else df[column].dtype
col_desc = f"* {column} ({col_type}); {values_desc}"
⋮----
all_cols = "\n".join(description)
</file>

<file path="langroid/parsing/url_loader.py">
# Base crawler config and specific configurations
class BaseCrawlerConfig(BaseSettings)
⋮----
"""Base configuration for web crawlers."""
⋮----
parser: Optional[Parser] = None
⋮----
class TrafilaturaConfig(BaseCrawlerConfig)
⋮----
"""Configuration for Trafilatura crawler."""
⋮----
threads: int = 4
format: str = "markdown"  # or "xml" or "txt"
⋮----
class FirecrawlConfig(BaseCrawlerConfig)
⋮----
"""Configuration for Firecrawl crawler."""
⋮----
api_key: str = ""
mode: str = "scrape"
params: Dict[str, Any] = {}
timeout: Optional[int] = None
⋮----
model_config = SettingsConfigDict(env_prefix="FIRECRAWL_")
⋮----
class ExaCrawlerConfig(BaseCrawlerConfig)
⋮----
model_config = SettingsConfigDict(env_prefix="EXA_")
⋮----
class Crawl4aiConfig(BaseCrawlerConfig)
⋮----
"""Configuration for the Crawl4aiCrawler."""
⋮----
crawl_mode: Literal["simple", "deep"] = "simple"
extraction_strategy: Optional["ExtractionStrategy"] = None
markdown_strategy: Optional["MarkdownGenerationStrategy"] = None
deep_crawl_strategy: Optional["DeepCrawlStrategy"] = None
scraping_strategy: Optional["ContentScrapingStrategy"] = None
browser_config: Optional["BrowserConfig"] = None
run_config: Optional["CrawlerRunConfig"] = None
⋮----
model_config = SettingsConfigDict(arbitrary_types_allowed=True)
⋮----
# Resolve forward references for Crawl4aiConfig after the class is defined
⋮----
# Rebuild the model with resolved references
⋮----
# If crawl4ai is not installed, leave forward refs as strings
⋮----
class BaseCrawler(ABC)
⋮----
"""Abstract base class for web crawlers."""
⋮----
def __init__(self, config: BaseCrawlerConfig)
⋮----
"""Initialize the base crawler.

        Args:
            config: Configuration for the crawler
        """
⋮----
@property
@abstractmethod
    def needs_parser(self) -> bool
⋮----
"""Indicates whether the crawler requires a parser."""
⋮----
@abstractmethod
    def crawl(self, urls: List[str]) -> List[Document]
⋮----
def _process_document(self, url: str) -> List[Document]
⋮----
doc_parser = DocumentParser.create(url, self.parser.config)
new_chunks = doc_parser.get_doc_chunks()
⋮----
# If the document is empty, try to extract images
img_parser = ImagePdfParser(url, self.parser.config)
new_chunks = img_parser.get_doc_chunks()
⋮----
headers = requests.head(url).headers
⋮----
headers = CaseInsensitiveDict()
⋮----
content_type = headers.get("Content-Type", "").lower()
temp_file_suffix = None
⋮----
temp_file_suffix = ".pdf"
⋮----
temp_file_suffix = ".docx"
⋮----
temp_file_suffix = ".doc"
⋮----
response = requests.get(url)
⋮----
temp_file_path = temp_file.name
doc_parser = DocumentParser.create(
docs = doc_parser.get_doc_chunks()
⋮----
def _is_document_url(self, url: str) -> bool
⋮----
class CrawlerFactory
⋮----
"""Factory for creating web crawlers."""
⋮----
@staticmethod
    def create_crawler(config: BaseCrawlerConfig) -> BaseCrawler
⋮----
"""Create a crawler instance based on configuration type.

        Args:
            config: Configuration for the crawler

        Returns:
            A BaseCrawler instance

        Raises:
            ValueError: If config type is not supported
        """
⋮----
class TrafilaturaCrawler(BaseCrawler)
⋮----
"""Crawler implementation using Trafilatura."""
⋮----
def __init__(self, config: TrafilaturaConfig)
⋮----
"""Initialize the Trafilatura crawler.

        Args:
            config: Configuration for the crawler
        """
⋮----
@property
    def needs_parser(self) -> bool
⋮----
def crawl(self, urls: List[str]) -> List[Document]
⋮----
docs = []
dl_dict = add_to_compressed_dict(urls)
⋮----
parsed_doc = self._process_document(url)
⋮----
text = trafilatura.extract(
⋮----
with_metadata=True,  # Title, date, author... at start of text
⋮----
# heading_style="ATX" for markdown headings, i.e. #, ##, etc.
text = md.markdownify(text, heading_style="ATX")
⋮----
text = result
⋮----
class FirecrawlCrawler(BaseCrawler)
⋮----
"""Crawler implementation using Firecrawl."""
⋮----
def __init__(self, config: FirecrawlConfig) -> None
⋮----
"""Initialize the Firecrawl crawler.

        Args:
            config: Configuration for the crawler
        """
⋮----
# Code used verbatim from firecrawl blog with few modifications
# https://www.firecrawl.dev/blog/mastering-the-crawl-endpoint-in-firecrawl
⋮----
pbar = tqdm(desc="Pages saved", unit=" pages", dynamic_ncols=True)
⋮----
processed_urls: set[str] = set()
⋮----
# Check current status
status = app.check_crawl_status(crawl_id)
new_pages = 0
⋮----
# Save new pages
⋮----
url = page["metadata"]["url"]
⋮----
content = page.get("markdown", "")
filename = f"{output_dir}/{len(processed_urls)}.md"
⋮----
pbar.model_copy(update=new_pages)  # Update progress bar with new pages
⋮----
# Break if crawl is complete
⋮----
time.sleep(5)  # Wait before checking again
⋮----
app = FirecrawlApp(api_key=self.config.api_key)
⋮----
params = self.config.params.copy()  # Create a copy of the existing params
⋮----
params["timeout"] = self.config.timeout  # Add/override timeout in params
⋮----
result = app.scrape_url(url, params=params)
metadata = result.get(
⋮----
)  # Default to empty dict if missing
status_code = metadata.get("statusCode")
⋮----
# Start the crawl
crawl_status = app.async_crawl_url(url=urls[0], params=params)
⋮----
# Save results incrementally
docs = self._return_save_incremental_results(app, crawl_status["id"])
⋮----
class ExaCrawler(BaseCrawler)
⋮----
"""Crawler implementation using Exa API."""
⋮----
def __init__(self, config: ExaCrawlerConfig) -> None
⋮----
"""Initialize the Exa crawler.

        Args:
            config: Configuration for the crawler
        """
⋮----
"""Crawl the given URLs using Exa SDK.

        Args:
            urls: List of URLs to crawl

        Returns:
            List of Documents with content extracted from the URLs

        Raises:
            LangroidImportError: If the exa package is not installed
            ValueError: If the Exa API key is not set
        """
⋮----
exa = Exa(self.config.api_key)
⋮----
parsed_doc_chunks = self._process_document(url)
⋮----
results = exa.get_contents(
result = results.results[0]
⋮----
md_text = md.markdownify(result.text, heading_style="ATX")
# append a NON-chunked document
# (metadata.is_chunk = False, so will be chunked downstream)
⋮----
class Crawl4aiCrawler(BaseCrawler)
⋮----
"""
    Crawler implementation using the crawl4ai library.

    This crawler intelligently dispatches URLs. Standard web pages are rendered
    and scraped using the crawl4ai browser engine. Direct links to documents
    (PDF, DOCX, etc.) are delegated to the framework's internal DocumentParser.
    """
⋮----
def __init__(self, config: Crawl4aiConfig) -> None
⋮----
"""Initialize the Crawl4ai crawler."""
⋮----
"""
        Indicates that this crawler relies on the framework's DocumentParser
        for handling specific file types like PDF, DOCX, etc., which
        the browser engine cannot parse directly.
        """
⋮----
"""
        Executes the crawl by separating document URLs from web page URLs.

        - Document URLs (.pdf, .docx, etc.) are processed using `_process_document`.
        - Web page URLs are handled using the async crawl4ai engine.
        """
all_documents: List[Document] = []
webpage_urls: List[str] = []
⋮----
# Step 1: Separate URLs into documents and web pages
⋮----
# Step 2: Process web page URLs asynchronously
⋮----
loop = asyncio.get_running_loop()
⋮----
web_docs = asyncio.run(self._async_crawl(webpage_urls))
⋮----
"""Converts a crawl4ai CrawlResult into the framework's Document format."""
⋮----
content = ""
⋮----
content = result.extracted_content
⋮----
content = result.markdown.fit_markdown
⋮----
content = result.markdown.raw_markdown
⋮----
content = str(result.markdown)
⋮----
# Extract metadata safely
title = "Unknown Title"
published_date = "Unknown Date"
⋮----
title = result.metadata.get("title", "Unknown Title")
# Try common date field names
⋮----
published_date = result.metadata.get(date_field)
⋮----
meta = DocMetaData(
⋮----
# Note: source_content is meant for reference content, not metadata
# Keeping it minimal as other crawlers don't populate it
⋮----
async def _async_crawl(self, urls: List[str]) -> List[Document]
⋮----
# Import configs here for lazy loading
⋮----
# CHANGE 2: Handle the new optional config fields.
# Use the user-provided config if it exists, otherwise create a default one.
browser_config = self.config.browser_config or BrowserConfig()
run_config = self.config.run_config or CrawlerRunConfig()
⋮----
crawled_documents: List[Document] = []
⋮----
result = await crawler.arun(url, config=run_config)
doc = self._translate_result_to_document(result)
⋮----
# In deep crawl mode, `crawl4ai` will discover and crawl pages
# starting from the seed URL. It will not process direct document links
# found during the deep crawl; it is designed to follow hyperlinks.
crawl_results = await crawler.arun(urls[0], config=run_config)
⋮----
class URLLoader
⋮----
"""Loads URLs and extracts text using a specified crawler."""
⋮----
"""Initialize the URL loader.

        Args:
            urls: List of URLs to load
            parsing_config: Configuration for parsing
            crawler_config: Configuration for the crawler
        """
⋮----
crawler_config = TrafilaturaConfig(parser=Parser(parsing_config))
⋮----
def load(self) -> List[Document]
⋮----
"""Load the URLs using the specified crawler."""
</file>

<file path="langroid/parsing/urls.py">
logger = logging.getLogger(__name__)
⋮----
def url_to_tempfile(url: str) -> str
⋮----
"""
    Fetch content from the given URL and save it to a temporary local file.

    Args:
        url (str): The URL of the content to fetch.

    Returns:
        str: The path to the temporary file where the content is saved.

    Raises:
        HTTPError: If there's any issue fetching the content.
    """
⋮----
response = requests.get(url)
response.raise_for_status()  # Raise an exception for HTTP errors
⋮----
# Create a temporary file and write the content
⋮----
def get_user_input(msg: str, color: str = "blue") -> str
⋮----
"""
    Prompt the user for input.
    Args:
        msg: printed prompt
        color: color of the prompt
    Returns:
        user input
    """
color_str = f"[{color}]{msg} " if color else msg + " "
⋮----
"""
    Prompt the user for inputs.
    Args:
        prompt: printed prompt
        n: how many inputs to prompt for. If None, then prompt until done, otherwise
            quit after n inputs.
    Returns:
        list of input strings
    """
# Create an empty set to store the URLs.
input_set = set()
⋮----
# Use a while loop to continuously ask the user for URLs.
⋮----
# Prompt the user for input.
input_str = Prompt.ask(f"[blue]{prompt}")
⋮----
# Check if the user wants to exit the loop.
⋮----
# if it is a URL, ask how many to crawl
⋮----
url = input_str
input_str = Prompt.ask("[blue] How many new URLs to crawl?", default="0")
max_urls = int(input_str) + 1
tot_urls = list(find_urls(url, max_links=max_urls, max_depth=2))
tot_urls_str = "\n".join(tot_urls)
⋮----
class Url(BaseModel)
⋮----
url: HttpUrl
⋮----
def is_url(s: str) -> bool
⋮----
url_adapter = TypeAdapter(HttpUrl)
⋮----
"""
    Given a list of inputs, return a
    list of indices of URLs, list of indices of paths, list of indices of byte-contents.
    Args:
        inputs: list of strings or bytes
    Returns:
        list of Indices of URLs,
        list of indices of paths,
        list of indices of byte-contents
    """
urls = []
paths = []
byte_list = []
⋮----
def crawl_url(url: str, max_urls: int = 1) -> List[str]
⋮----
"""
    Crawl starting at the url and return a list of URLs to be parsed,
    up to a maximum of `max_urls`.
    This has not been tested to work as intended. Ignore.
    """
⋮----
# no need to crawl, just return the original list
⋮----
to_visit = None
known_urls = None
⋮----
# Create a RobotFileParser object
robots = urllib.robotparser.RobotFileParser()
⋮----
# Set the RobotFileParser object to the website's robots.txt file
⋮----
# Start or resume the crawl
⋮----
final_urls = [s.strip() for s in known_urls]
⋮----
"""
    Recursively find all URLs on a given page.

    Args:
        url (str): The URL to start from.
        max_links (int): The maximum number of links to find.
        visited (set): A set of URLs that have already been visited.
        depth (int): The current depth of the recursion.
        max_depth (int): The maximum depth of the recursion.
        match_domain (bool): Whether to only return URLs that are on the same domain.

    Returns:
        set: A set of URLs found on the page.
    """
⋮----
visited = set()
⋮----
base_domain = urlparse(url).netloc
⋮----
response = requests.get(url, timeout=5)
⋮----
soup = BeautifulSoup(response.text, "html.parser")
links = [
⋮----
urljoin(url, a["href"])  # type: ignore
⋮----
# Defrag links: discard links that are to portions of same page
defragged_links = list(
⋮----
set(urldefrag(link).url for link in links)  # type: ignore
⋮----
# Filter links based on domain matching requirement
domain_matching_links = [
⋮----
# ensure url is first, since below we are taking first max_links urls
domain_matching_links = [url] + [x for x in domain_matching_links if x != url]
⋮----
# If found links exceed max_links, return immediately
⋮----
def org_user_from_github(url: str) -> str
⋮----
parsed = urllib.parse.urlparse(url)
⋮----
# Example usage
found_urls = set(fire.Fire(find_urls))
</file>

<file path="langroid/parsing/utils.py">
logger = logging.getLogger(__name__)
⋮----
def download_nltk_resource(resource: str) -> None
⋮----
@cache
    def _download() -> None
⋮----
model = resource.split("/")[-1]
⋮----
T = TypeVar("T")
⋮----
def batched(iterable: Iterable[T], n: int) -> Iterable[Sequence[T]]
⋮----
"""Batch data into tuples of length n. The last batch may be shorter."""
# batched('ABCDEFG', 3) --> ABC DEF G
⋮----
it = iter(iterable)
⋮----
def generate_random_sentences(k: int) -> str
⋮----
# Load the sample text
⋮----
text = gutenberg.raw("austen-emma.txt")
⋮----
# Split the text into sentences
sentences = nltk.tokenize.sent_tokenize(text)
⋮----
# Generate k random sentences
random_sentences = random.choices(sentences, k=k)
⋮----
def generate_random_text(num_sentences: int) -> str
⋮----
fake = Faker()
text = ""
⋮----
def closest_string(query: str, string_list: List[str]) -> str
⋮----
"""Find the closest match to the query in a list of strings.

    This function is case-insensitive and ignores leading and trailing whitespace.
    If no match is found, it returns 'No match found'.

    Args:
        query (str): The string to match.
        string_list (List[str]): The list of strings to search.

    Returns:
        str: The closest match to the query from the list, or 'No match found'
             if no match is found.
    """
# Create a dictionary where the keys are the standardized strings and
# the values are the original strings.
str_dict = {s.lower().strip(): s for s in string_list}
⋮----
# Standardize the query and find the closest match in the list of keys.
closest_match = difflib.get_close_matches(
⋮----
# Retrieve the original string from the value in the dictionary.
original_closest_match = (
⋮----
def split_paragraphs(text: str) -> List[str]
⋮----
"""
    Split the input text into paragraphs using "\n\n" as the delimiter.

    Args:
        text (str): The input text.

    Returns:
        list: A list of paragraphs.
    """
# Split based on a newline, followed by spaces/tabs, then another newline.
paras = re.split(r"\n[ \t]*\n", text)
⋮----
def split_newlines(text: str) -> List[str]
⋮----
"""
    Split the input text into lines using "\n" as the delimiter.

    Args:
        text (str): The input text.

    Returns:
        list: A list of lines.
    """
lines = re.split(r"\n", text)
⋮----
def number_segments(s: str, granularity: int = 1) -> str
⋮----
"""
    Number the segments in a given text, preserving paragraph structure.
    A segment is a sequence of `len` consecutive "sentences", where a "sentence"
    is either a normal sentence, or if there isn't enough punctuation to properly
    identify sentences, then we use a pseudo-sentence via heuristics (split by newline
    or failing that, just split every 40 words). The goal here is simply to number
    segments at a reasonable granularity so the LLM can identify relevant segments,
    in the RelevanceExtractorAgent.

    Args:
        s (str): The input text.
        granularity (int): The number of sentences in a segment.
            If this is -1, then the entire text is treated as a single segment,
            and is numbered as <#1#>.

    Returns:
        str: The text with segments numbered in the style <#1#>, <#2#> etc.

    Example:
        >>> number_segments("Hello world! How are you? Have a good day.")
        '<#1#> Hello world! <#2#> How are you? <#3#> Have a good day.'
    """
⋮----
numbered_text = []
count = 0
⋮----
paragraphs = split_paragraphs(s)
⋮----
sentences = nltk.sent_tokenize(paragraph)
# Some docs are problematic (e.g. resumes) and have no (or too few) periods,
# so we can't split usefully into sentences.
# We try a series of heuristics to split into sentences,
# until the avg num words per sentence is less than 40.
avg_words_per_sentence = sum(
⋮----
sentences = split_newlines(paragraph)
⋮----
# Still too long, just split on every 40 words
sentences = []
⋮----
words = nltk.word_tokenize(sentence)
⋮----
# if there are less than 20 words left after this,
# just add them to the last sentence and break
⋮----
num = count // granularity + 1
number_prefix = f"<#{num}#>" if count % granularity == 0 else ""
sentence = f"{number_prefix} {sentence}"
⋮----
numbered_paragraph = " ".join(sentences)
⋮----
def number_sentences(s: str) -> str
⋮----
def parse_number_range_list(specs: str) -> List[int]
⋮----
"""
    Parse a specs string like "3,5,7-10" into a list of integers.

    Args:
        specs (str): A string containing segment numbers and/or ranges
                     (e.g., "3,5,7-10").

    Returns:
        List[int]: List of segment numbers.

    Example:
        >>> parse_number_range_list("3,5,7-10")
        [3, 5, 7, 8, 9, 10]
    """
spec_indices = set()  # type: ignore
⋮----
# some weak LLMs may generate <#1#> instead of 1, so extract just the digits
# or the "-"
part = "".join(char for char in part if char.isdigit() or char == "-")
⋮----
def strip_k(s: str, k: int = 2) -> str
⋮----
"""
    Strip any leading and trailing whitespaces from the input text beyond length k.
    This is useful for removing leading/trailing whitespaces from a text while
    preserving paragraph structure.

    Args:
        s (str): The input text.
        k (int): The number of leading and trailing whitespaces to retain.

    Returns:
        str: The text with leading and trailing whitespaces removed beyond length k.
    """
⋮----
# Count leading and trailing whitespaces
leading_count = len(s) - len(s.lstrip())
trailing_count = len(s) - len(s.rstrip())
⋮----
# Determine how many whitespaces to retain
leading_keep = min(leading_count, k)
trailing_keep = min(trailing_count, k)
⋮----
# Use slicing to get the desired output
⋮----
def clean_whitespace(text: str) -> str
⋮----
"""Remove extra whitespace from the input text, while preserving
    paragraph structure.
    """
paragraphs = split_paragraphs(text)
cleaned_paragraphs = [" ".join(p.split()) for p in paragraphs if p]
return "\n\n".join(cleaned_paragraphs)  # Join the cleaned paragraphs.
⋮----
def extract_numbered_segments(s: str, specs: str) -> str
⋮----
"""
    Extract specified segments from a numbered text, preserving paragraph structure.

    Args:
        s (str): The input text containing numbered segments.
        specs (str): A string containing segment numbers and/or ranges
                     (e.g., "3,5,7-10").

    Returns:
        str: Extracted segments, keeping original paragraph structures.

    Example:
        >>> text = "(1) Hello world! (2) How are you? (3) Have a good day."
        >>> extract_numbered_segments(text, "1,3")
        'Hello world! Have a good day.'
    """
# Use the helper function to get the list of indices from specs
⋮----
spec_indices = parse_number_range_list(specs)
⋮----
# Regular expression to identify numbered segments like
# <#1#> Hello world! This is me. <#2#> How are you? <#3#> Have a good day.
# Note we match any character between segment markers, including newlines.
segment_pattern = re.compile(r"<#(\d+)#>([\s\S]*?)(?=<#\d+#>|$)")
⋮----
# Split the text into paragraphs while preserving their boundaries
⋮----
extracted_paragraphs = []
⋮----
segments_with_numbers = segment_pattern.findall(paragraph)
⋮----
# Extract the desired segments from this paragraph
extracted_segments = [
⋮----
# If we extracted any segments from this paragraph,
# join them with ellipsis (...) and append to results.
⋮----
"""
    Extract the content from a file path or URL, or a list of file paths or URLs.

    Args:
        path (bytes | str | List[str]): The file path or URL, or a list of file paths or
            URLs, or bytes content. The bytes option is meant to support cases
            where upstream code may have already loaded the content (e.g., from a
            database or API) and we want to avoid having to copy the content to a
            temporary file.
        parsing (ParsingConfig): The parsing configuration.
        doc_type (str | DocumentType | None): The document type if known.
            If multiple paths are given, this MUST apply to ALL docs.

    Returns:
        str | List[str]: The extracted content if a single file path or URL is provided,
                or a list of extracted contents if a
                list of file paths or URLs is provided.
    """
⋮----
paths = [path]
⋮----
paths = path
⋮----
urls = [paths[i] for i in url_idxs]
path_list = [paths[i] for i in path_idxs]
byte_list = [paths[i] for i in byte_idxs]
⋮----
parser = Parser(parsing)
docs: List[Document] = []
⋮----
loader = URLLoader(urls=urls, parser=parser)  # type: ignore
docs = loader.load()
⋮----
path_docs = RepoLoader.get_documents(
</file>

<file path="langroid/prompts/__init__.py">
__all__ = [
</file>

<file path="langroid/prompts/dialog.py">
def collate_chat_history(inputs: List[tuple[str, str]]) -> str
⋮----
"""
    Collate (human, ai) pairs into a single, string
    Args:
        inputs:
    Returns:
    """
pairs = [
</file>

<file path="langroid/prompts/prompts_config.py">
class PromptsConfig(BaseSettings)
⋮----
max_tokens: int = 1000  # for output; NOT USED ANYWHERE
</file>

<file path="langroid/prompts/templates.py">
EXTRACT_RELEVANT = """
⋮----
EXTRACTION_PROMPT_GPT4 = f"""
⋮----
EXTRACTION_PROMPT = f"""
⋮----
SUMMARY_ANSWER_PROMPT_GPT4 = f"""
⋮----
ANSWER_PROMPT_USE_HISTORY_GPT4 = f"""
⋮----
SUMMARY_ANSWER_PROMPT = f"""
</file>

<file path="langroid/pydantic_v1/__init__.py">
"""
Compatibility layer for Langroid's Pydantic migration.

IMPORTANT: You are importing from langroid.pydantic_v1 but getting Pydantic v2 classes!
Langroid has fully migrated to Pydantic v2, and this compatibility layer is deprecated.
"""
⋮----
logger = logging.getLogger(__name__)
⋮----
# Only show the visual warning, not the standard deprecation warning
# The standard warning is too noisy and shows the import line
⋮----
# Import from pydantic v2 directly (not from pydantic.v1)
# This allows existing code to continue working if it's already v2-compatible
from pydantic import *  # noqa: F403, F401
⋮----
# BaseSettings has moved in v2, import it explicitly
⋮----
from pydantic_settings import BaseSettings  # noqa: F401
⋮----
# Fallback for older pydantic versions
from pydantic import BaseSettings  # type: ignore[no-redef] # noqa: F401
⋮----
# Explicitly export all items for mypy
__all__ = [
</file>

<file path="langroid/pydantic_v1/main.py">
"""
Compatibility layer for Pydantic v2 migration.

This module now imports directly from Pydantic v2 since all internal code
has been migrated to use Pydantic v2 patterns.
"""
⋮----
# Import from pydantic.main but don't trigger the warning again
# The warning is already shown when importing from langroid.pydantic_v1
from pydantic.main import *  # noqa: F403, F401
</file>

<file path="langroid/utils/algorithms/__init__.py">
__all__ = ["graph"]
</file>

<file path="langroid/utils/algorithms/graph.py">
"""
Graph algos.
"""
⋮----
@no_type_check
def topological_sort(order: np.array) -> List[int]
⋮----
"""
    Given a directed adjacency matrix, return a topological sort of the nodes.
    order[i,j] = -1 means there is an edge from i to j.
    order[i,j] = 0 means there is no edge from i to j.
    order[i,j] = 1 means there is an edge from j to i.

    Args:
        order (np.array): The adjacency matrix.

    Returns:
        List[int]: The topological sort of the nodes.

    """
n = order.shape[0]
⋮----
# Calculate the in-degrees
in_degree = [0] * n
⋮----
# Initialize the queue with nodes of in-degree 0
queue = [i for i in range(n) if in_degree[i] == 0]
result = []
⋮----
node = queue.pop(0)
⋮----
@no_type_check
def components(order: np.ndarray) -> List[List[int]]
⋮----
"""
    Find the connected components in an undirected graph represented by a matrix.

    Args:
        order (np.ndarray): A matrix with values 0 or 1 indicating
            undirected graph edges. `order[i][j] = 1` means an edge between `i`
            and `j`, and `0` means no edge.

    Returns:
        List[List[int]]: A list of List where each List contains the indices of
            nodes in the same connected component.

    Example:
        order = np.array([
            [1, 1, 0, 0],
            [1, 1, 1, 0],
            [0, 1, 1, 0],
            [0, 0, 0, 1]
        ])
        components(order)
        # [[0, 1, 2], [3]]
    """
⋮----
i2g: Dict[int, int] = {}  # index to group mapping
next_group = 0
⋮----
connected_groups = {i2g[j] for j in np.nonzero(order[i, :])[0] if j in i2g}
⋮----
# If the node is not part of any group
# and is not connected to any groups, assign a new group
⋮----
# If the node is connected to multiple groups, we merge them
main_group = min(connected_groups)
⋮----
# Convert i2g to a list of Lists
groups: Dict[int, List[int]] = {}
</file>

<file path="langroid/utils/output/__init__.py">
__all__ = [
</file>

<file path="langroid/utils/output/citations.py">
logger = logging.getLogger(__name__)
⋮----
def extract_markdown_references(md_string: str) -> List[int]
⋮----
"""
    Extracts markdown references (e.g., [^1], [^2]) from a string and returns
    them as a sorted list of integers.

    Args:
        md_string (str): The markdown string containing references.

    Returns:
        list[int]: A sorted list of unique integers from the markdown references.
    """
⋮----
# Regex to find all occurrences of [^<number>]
matches = re.findall(r"\[\^(\d+)\]", md_string)
# Convert matches to integers, remove duplicates with set, and sort
⋮----
def invalid_markdown_citations(md_string: str) -> List[str]
⋮----
"""
    Finds non-numeric markdown citations (e.g., [^a], [^xyz]) in a string.

    Args:
        md_string (str): The markdown string to search for invalid citations.

    Returns:
        List[str]: List of invalid citation strings (without brackets/caret).
    """
⋮----
# Find all citation references first
matches = re.findall(r"\[\^([^\]\s]+)\]", md_string)
⋮----
# Filter out purely numeric citations
invalid_citations = [match for match in matches if not match.isdigit()]
⋮----
def format_footnote_text(content: str, width: int = 0) -> str
⋮----
"""
    Formats the content so that each original line is individually processed.
    - If width=0, no wrapping is done (lines remain as is).
    - If width>0, lines are wrapped to that width.
    - Blank lines remain blank (with indentation).
    - Everything is indented by 4 spaces (for markdown footnotes).

    Args:
        content (str): The text of the footnote to be formatted.
        width (int): Maximum width of the text lines. If 0, lines are not wrapped.

    Returns:
        str: Properly formatted markdown footnote text.
    """
⋮----
indent = "    "  # 4 spaces for markdown footnotes
lines = content.split("\n")  # keep original line structure
⋮----
output_lines = []
⋮----
# If the line is empty (or just spaces), keep it blank (but indented)
⋮----
# Wrap each non-empty line to the specified width
wrapped = textwrap.wrap(line, width=width)
⋮----
# If textwrap gives nothing, add a blank (indented) line
⋮----
# No wrapping: just indent the original line
⋮----
# Join them with newline so we preserve the paragraph/blank line structure
⋮----
"""
    Given a list of (integer) citations, and a list of passages, return a string
    that can be added as a footer to the main text, to show sources cited.

    Args:
        citations (list[int]): list of citations, presumably from main text
        passages (list[Document]): list of passages (Document objects)

    Returns:
        str: formatted string of FULL citations (i.e. reference AND content)
            for footnote in markdown;
        str: formatted string of BRIEF citations (i.e. reference only)
            for footnote in markdown.
    """
citations_str = ""
full_citations_str = ""
⋮----
# append [i] source, content for each citation
good_citations = [c for c in citations if c > 0 and c <= len(passages)]
⋮----
# source and content for each citation
full_citations_str = "\n".join(
⋮----
# source for each citation
citations_str = "\n".join(
</file>

<file path="langroid/utils/output/printing.py">
def shorten_text(text: str, chars: int = 40) -> str
⋮----
text = " ".join(text.split())
⋮----
text = text[:chars] + "..." + text[-chars:] if len(text) > 2 * chars else text
styled_text = Text(text, style=style)
⋮----
class PrintColored
⋮----
"""Context to temporarily print in a desired color"""
⋮----
def __init__(self, color: str)
⋮----
def __enter__(self) -> None
⋮----
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None
⋮----
@contextmanager
def silence_stdout() -> Iterator[None]
⋮----
"""
    Temporarily silence all output to stdout and from rich.print.

    This context manager redirects all output written to stdout (which includes
    outputs from the built-in print function and rich.print) to /dev/null on
    UNIX-like systems or NUL on Windows. Once the context block exits, stdout is
    restored to its original state.

    Example:
        with silence_stdout_and_rich():
            print("This won't be printed")
            rich.print("This also won't be printed")

    Note:
        This suppresses both standard print functions and the rich library outputs.
    """
platform_null = "/dev/null" if sys.platform != "win32" else "NUL"
original_stdout = sys.stdout
fnull = open(platform_null, "w")
⋮----
class SuppressLoggerWarnings
⋮----
def __init__(self, logger: str | None = None)
⋮----
# If no logger name is given, get the root logger
⋮----
# Set the logging level to 'ERROR' to suppress warnings
⋮----
# Reset the logging level to its original value
</file>

<file path="langroid/utils/output/status.py">
console = Console()
logger = logging.getLogger(__name__)
⋮----
"""
    Displays a rich spinner if not in quiet mode, else optionally logs the message.
    """
stack = ExitStack()
logged = False
⋮----
logged = True
⋮----
# When using rich spinner, we enforce quiet mode
# (since output will be messy otherwise);
# We make an exception to this when debug is enabled.
</file>

<file path="langroid/utils/__init__.py">
__all__ = [
</file>

<file path="langroid/utils/configuration.py">
# Global reentrant lock to serialize any modifications to the global settings.
_global_lock = threading.RLock()
⋮----
class Settings(BaseSettings)
⋮----
debug: bool = False  # show debug messages?
max_turns: int = -1  # maximum number of turns in a task (to avoid inf loop)
progress: bool = False  # show progress spinners/bars?
stream: bool = True  # stream output?
cache: bool = True  # use cache?
cache_type: Literal["redis", "fakeredis", "none"] = "redis"  # cache type
chat_model: str = ""  # language model name, e.g. litellm/ollama/llama2
quiet: bool = False  # quiet mode (i.e. suppress all output)?
notebook: bool = False  # running in a notebook?
⋮----
model_config = SettingsConfigDict(extra="forbid")
⋮----
# Load environment variables from .env file.
⋮----
# The global (default) settings instance.
# This is updated by update_global_settings() and set_global().
_global_settings = Settings()
⋮----
# Thread-local storage for temporary (per-thread) settings overrides.
_thread_local = threading.local()
⋮----
class SettingsProxy
⋮----
"""
    A proxy for the settings that returns a thread‐local override if set,
    or else falls back to the global settings.
    """
⋮----
def __getattr__(self, name: str) -> Any
⋮----
# If the calling thread has set an override, use that.
⋮----
def __setattr__(self, name: str, value: Any) -> None
⋮----
# All writes go to the global settings.
⋮----
def update(self, new_settings: Settings) -> None
⋮----
def dict(self) -> Dict[str, Any]
⋮----
# Return a dict view of the settings as seen by the caller.
# Note that temporary overrides are not “merged” with global settings.
⋮----
settings = SettingsProxy()
⋮----
def update_global_settings(cfg: BaseSettings, keys: List[str]) -> None
⋮----
"""
    Update global settings so that modules can later access them via, e.g.,

        from langroid.utils.configuration import settings
        if settings.debug: ...

    This updates the global default.
    """
config_dict = cfg.model_dump()
filtered_config = {key: config_dict[key] for key in keys if key in config_dict}
new_settings = Settings(**filtered_config)
⋮----
def set_global(key_vals: Settings) -> None
⋮----
"""
    Update the global settings object.
    """
⋮----
@contextmanager
def temporary_settings(temp_settings: Settings) -> Iterator[None]
⋮----
"""
    Temporarily override the settings for the calling thread.

    Within the context, any access to "settings" will use the provided temporary
    settings. Once the context is exited, the thread reverts to the global settings.
    """
saved = getattr(_thread_local, "override", None)
⋮----
@contextmanager
def quiet_mode(quiet: bool = True) -> Iterator[None]
⋮----
"""
    Temporarily override settings.quiet for the current thread.
    This implementation builds on the thread‑local temporary_settings context manager.
    The effective quiet mode is merged:
    if quiet is already True (from an outer context),
    then it remains True even if a nested context passes quiet=False.
    """
current_effective = (
⋮----
)  # get the current thread's effective settings
# Create a new settings instance from the current effective state.
temp = Settings(**current_effective)
# Merge the new flag: once quiet is enabled, it stays enabled.
⋮----
def set_env(settings_instance: BaseSettings) -> None
⋮----
"""
    Set environment variables from a BaseSettings instance.

    Each field in the settings is written to os.environ.
    """
⋮----
env_var_name = field.alias or field_name.upper()
</file>

<file path="langroid/utils/constants.py">
# Define the ANSI escape sequences for various colors and reset
class Colors(BaseModel)
⋮----
RED: str = "\033[31m"
BLUE: str = "\033[34m"
GREEN: str = "\033[32m"
GREEN_DIMMER: str = "\033[38;5;22m"  # very dark green
GREEN_DIM: str = "\033[38;5;28m"  # medium-dim green
ORANGE: str = "\033[33m"  # no standard ANSI color for orange; using yellow
CYAN: str = "\033[36m"
MAGENTA: str = "\033[35m"
YELLOW: str = "\033[33m"
RESET: str = "\033[0m"
⋮----
NO_ANSWER = "DO-NOT-KNOW"
DONE = "DONE"
USER_QUIT_STRINGS = ["q", "x", "quit", "exit", "bye", DONE]
PASS = "__PASS__"
PASS_TO = PASS + ":"
SEND_TO = "__SEND__:"
TOOL = "TOOL"
# This is a recommended setting for TaskConfig.addressing_prefix if using it at all;
# prefer to use `RecipientTool` to allow agents addressing others.
# Caution the AT string should NOT contain any 'word' characters, i.e.
# it no letters, digits or underscores.
# See tests/main/test_msg_routing for example usage
AT = "|@|"
TOOL_BEGIN = "TOOL_BEGIN"
TOOL_END = "TOOL_END"
</file>

<file path="langroid/utils/git_utils.py">
logger = logging.getLogger(__name__)
⋮----
def git_read_file(repo: str, filepath: str) -> str
⋮----
"""
    Read the contents of a file from a GitHub repository.

    Args:
        repo (str): The GitHub repository in the format "owner/repo"
        filepath (str): The file path relative to the repository root

    Returns:
        str: The contents of the file as a string
    """
⋮----
g = Github()
github_repo = g.get_repo(repo)
file_content = github_repo.get_contents(filepath)
⋮----
def get_file_list(repo: str, dir: str, pat: str = "") -> List[str]
⋮----
"""
    Get a list of files in a specified directory of a GitHub repository.

    Args:
        repo (str): The GitHub repository in the format "owner/repo"
        dir (str): The directory path relative to the repository root
        pat (str): Optional wildcard pattern to filter file names (default: "")

    Returns:
        List[str]: A list of file paths in the specified directory
    """
⋮----
contents = github_repo.get_contents(dir)
⋮----
file_list = []
⋮----
file_list = [content.path for content in contents if content.type == "file"]
⋮----
file_list = [contents.path]
⋮----
file_list = [file for file in file_list if fnmatch.fnmatch(file, pat)]
⋮----
def git_init_repo(dir: str) -> git.Repo | None
⋮----
"""
    Set up a Git repository in the specified directory.

    Args:
        dir (str): Path to the directory where the Git repository should be initialized

    Returns:
        git.Repo: The initialized Git repository object
    """
repo_path = Path(dir).expanduser()
⋮----
repo = git.Repo.init(repo_path)
⋮----
gitignore_content = textwrap.dedent(
⋮----
gitignore_path = repo_path / ".gitignore"
⋮----
# Ensure the default branch is 'main'
# Check if we're on the master branch
⋮----
# Rename the branch
⋮----
def git_commit_file(repo: git.Repo, filepath: str, msg: str) -> None
⋮----
"""
    Commit a file to a Git repository.

    Args:
        repo (git.Repo): The Git repository object
        filepath (str): Path to the file to be committed
        msg (str): The commit message

    Returns:
        None
    """
⋮----
commit_msg = msg or f"Updated {filepath}"
⋮----
def git_commit_mods(repo: git.Repo, msg: str = "commit all changes") -> None
⋮----
"""
    Commit all modifications in the Git repository.
    Does not raise an error if there's nothing to commit.

    Args:
        repo (git.Repo): The Git repository object

    Returns:
        None
    """
⋮----
def git_restore_repo(repo: git.Repo) -> None
⋮----
"""
    Restore all unstaged, uncommitted changes in the Git repository.
    This function undoes any dirty files to the last commit.

    Args:
        repo (git.Repo): The Git repository object

    Returns:
        None
    """
⋮----
def git_restore_file(repo: git.Repo, file_path: str) -> None
⋮----
"""
    Restore a specific file in the Git repository to its state in the last commit.
    This function undoes changes to the specified file.

    Args:
        repo (git.Repo): The Git repository object
        file_path (str): Path to the file to be restored

    Returns:
        None
    """
⋮----
def git_create_checkout_branch(repo: git.Repo, branch: str) -> None
⋮----
"""
    Create and checkout a new branch in the given Git repository.
    If the branch already exists, it will be checked out.
    If we're already on the specified branch, no action is taken.

    Args:
        repo (git.Repo): The Git repository object
        branch (str): The name of the branch to create or checkout

    Returns:
        None
    """
⋮----
new_branch = repo.create_head(branch)
⋮----
def git_diff_file(repo: git.Repo, filepath: str) -> str
⋮----
"""
    Show diffs of file between the latest commit and the previous one if any.

    Args:
        repo (git.Repo): The Git repository object
        filepath (str): Path to the file to be diffed

    Returns:
        str: The diff output as a string
    """
⋮----
# Get the two most recent commits
commits = list(repo.iter_commits(paths=filepath, max_count=2))
⋮----
# Get the diff between the two commits for the specific file
diff = repo.git.diff(commits[1].hexsha, commits[0].hexsha, filepath)
</file>

<file path="langroid/utils/globals.py">
T = TypeVar("T", bound="GlobalState")
⋮----
class GlobalState(BaseModel)
⋮----
"""A base Pydantic model for global states."""
⋮----
_instance: Optional["GlobalState"] = None
⋮----
@classmethod
    def get_instance(cls: Type["GlobalState"]) -> "GlobalState"
⋮----
"""
        Get the global instance of the specific subclass.

        Returns:
            The global instance of the subclass.
        """
# Get the actual value from ModelPrivateAttr when accessing on class
instance_attr = getattr(cls, "_instance", None)
actual_instance: Optional["GlobalState"]
⋮----
default_value = instance_attr.default
⋮----
actual_instance = None
⋮----
actual_instance = cast(Optional["GlobalState"], default_value)
⋮----
actual_instance = instance_attr
⋮----
new_instance = cls()
⋮----
return actual_instance  # type: ignore
⋮----
@classmethod
    def set_values(cls: Type[T], **kwargs: Dict[str, Any]) -> None
⋮----
"""
        Set values on the global instance of the specific subclass.

        Args:
            **kwargs: The fields and their values to set.
        """
instance = cls.get_instance()
⋮----
@classmethod
    def get_value(cls: Type[T], name: str) -> Any
⋮----
"""
        Retrieve the value of a specific field from the global instance.

        Args:
            name (str): The name of the field to retrieve.

        Returns:
            str: The value of the specified field.
        """
</file>

<file path="langroid/utils/html_logger.py">
"""HTML Logger for Langroid Task System.

This module provides an HTML logger that creates self-contained HTML files
with collapsible log entries for better visualization of agent interactions.
"""
⋮----
class HTMLLogger
⋮----
"""Logger that outputs task logs as interactive HTML files."""
⋮----
"""Initialize the HTML logger.

        Args:
            filename: Base name for the log file (without extension)
            log_dir: Directory to store log files
            model_info: Information about the model being used
            append: Whether to append to existing file
        """
⋮----
# Logger for errors
⋮----
def _write_header(self) -> None
⋮----
"""Write the HTML header with CSS and JavaScript."""
timestamp = datetime.now().strftime("%m/%d/%Y, %I:%M:%S %p")
⋮----
html_content = f"""<!DOCTYPE html>
⋮----
def log(self, fields: BaseModel) -> None
⋮----
"""Log a message entry.

        Args:
            fields: ChatDocLoggerFields containing all log information
        """
⋮----
entry_html = self._format_entry(fields)
⋮----
def _format_entry(self, fields: BaseModel) -> str
⋮----
"""Format a log entry as HTML.

        Args:
            fields: ChatDocLoggerFields containing all log information

        Returns:
            HTML string for the entry
        """
entry_id = f"entry_{self.entry_counter}"
⋮----
# Get all relevant fields
responder = str(getattr(fields, "responder", "UNKNOWN"))
task_name = getattr(fields, "task_name", "root")
# TODO (CLAUDE) display sender_entity in parens right after responder,
# other than LLM, e.g. AGENT (USER)
sender_entity = str(getattr(fields, "sender_entity", ""))
tool = getattr(fields, "tool", "")
tool_type = getattr(fields, "tool_type", "")
content = getattr(fields, "content", "")
recipient = getattr(fields, "recipient", "")
⋮----
# Determine CSS class based on responder
responder_upper = responder.upper()
⋮----
css_class = "user"
⋮----
css_class = "llm"
⋮----
css_class = "agent"
⋮----
css_class = "system"
⋮----
css_class = "other"
⋮----
# Determine opacity class based on mark
mark = getattr(fields, "mark", "")
opacity_class = "important" if mark == "*" else "faded"
⋮----
# Start building the entry
html_parts = [
⋮----
# Build smart header
entity_parts = []  # Main header line with entity info
content_preview = ""  # Second line with content preview
⋮----
# Add task name if not root
⋮----
# Handle different responder types
⋮----
# Add responder with sender_entity in parens if different
⋮----
# Show user input preview on second line
⋮----
preview = content.replace("\n", " ")[:60]
⋮----
content_preview = f'"{preview}"'
⋮----
# Get model info from instance - don't uppercase it
model_label = "LLM"
⋮----
model_label = f"LLM ({self.model_info})"
⋮----
# LLM making a tool call - don't uppercase tool names
⋮----
# LLM generating plain text response
⋮----
# Show first line or first 60 chars on second line
first_line = content.split("\n")[0].strip()
⋮----
preview = first_line[:60]
⋮----
agent_label = "AGENT"
⋮----
agent_label = f"AGENT ({sender_entity})"
⋮----
# Agent responding (usually tool handling)
⋮----
# Show tool result preview on second line if available
⋮----
preview = content.replace("\n", " ")[:40]
⋮----
content_preview = f"→ {preview}"
⋮----
preview = content[:50]
⋮----
# Other responder types (like Task)
⋮----
# Add recipient info if present
⋮----
# Construct the two-line header
header_main = " ".join(entity_parts)
⋮----
# Build the header HTML with toggle, mark, and main content on same line
header_html = '<span class="toggle">[+]</span> '
⋮----
# Note: opacity_class already determined above
⋮----
# Add the main header content
⋮----
# Add preview on second line if present
⋮----
# Add expandable header
⋮----
# Add collapsible sections
⋮----
# System messages (if any)
system_content = self._extract_system_content(fields)
⋮----
section_id = f"{entry_id}_system_{idx}"
⋮----
# Tool information
tool = getattr(fields, "tool", None)
# Only add tool section if tool exists and is not empty
⋮----
tool_html = self._format_tool_section(fields, entry_id)
⋮----
# Main content
⋮----
):  # Don't duplicate content if it's a tool
⋮----
# Metadata (recipient, blocked)
metadata_parts = []
recipient = getattr(fields, "recipient", None)
⋮----
block = getattr(fields, "block", None)
⋮----
# Close entry content div
html_parts.append("</div>")  # Close entry-content
html_parts.append("</div>")  # Close entry
⋮----
def _extract_system_content(self, fields: BaseModel) -> List[tuple[str, str]]
⋮----
"""Extract system-related content from fields.

        Returns:
            List of (label, content) tuples
        """
system_content = []
⋮----
# Check for common system message patterns in content
⋮----
# Look for patterns like "[System Prompt]" or "System Reminder:"
⋮----
"""Create a collapsible section.

        Args:
            section_id: Unique ID for the section
            label: Label to display
            content: Content to show when expanded

        Returns:
            HTML string for the collapsible section
        """
⋮----
def _format_tool_section(self, fields: BaseModel, entry_id: str) -> str
⋮----
"""Format tool-related information.

        Args:
            fields: ChatDocLoggerFields containing tool information
            entry_id: Parent entry ID

        Returns:
            HTML string for the tool section
        """
⋮----
tool_id = f"{entry_id}_tool_{self.tool_counter}"
⋮----
# Try to parse content as JSON for better formatting
⋮----
content_dict = json.loads(content)
formatted_content = json.dumps(content_dict, indent=2)
content_html = (
⋮----
content_html = html.escape(content)
⋮----
# Build tool section
tool_name = f"{tool_type}({tool})" if tool_type else tool
⋮----
def _append_to_file(self, content: str) -> None
⋮----
"""Append content to the HTML file.

        Args:
            content: HTML content to append
        """
⋮----
def close(self) -> None
⋮----
"""Close the HTML file with footer."""
footer = """
</file>

<file path="langroid/utils/logging.py">
# Define a function to set up the colored logger
def setup_colored_logging() -> None
⋮----
# Define the log format with color codes
log_format = "%(log_color)s%(asctime)s - %(levelname)s - %(message)s%(reset)s"
# Create a color formatter
color_formatter = colorlog.ColoredFormatter(
# Configure the root logger to use the color formatter
handler = logging.StreamHandler()
⋮----
logger = logging.getLogger()
⋮----
# logger.setLevel(logging.DEBUG)
⋮----
"""
    Set up a logger of module `name` at a desired level.
    Args:
        name: module name
        level: desired logging level
    Returns:
        logger
    """
logger = logging.getLogger(name)
⋮----
formatter = logging.Formatter(
⋮----
def setup_console_logger(name: str) -> logging.Logger
⋮----
logger = setup_logger(name)
⋮----
file_mode = "a" if append else "w"
logger = setup_logger(name, terminal=False)
handler = logging.FileHandler(filename, mode=file_mode, encoding="utf-8")
⋮----
formatter = logging.Formatter("%(message)s")
⋮----
def setup_loggers_for_package(package_name: str, level: int) -> None
⋮----
"""
    Set up loggers for all modules in a package.
    This ensures that log-levels of modules outside the package are not affected.
    Args:
        package_name: main package name
        level: desired logging level
    Returns:
    """
⋮----
package = importlib.import_module(package_name)
⋮----
module = importlib.import_module(module_name)
⋮----
class RichFileLogger
⋮----
"""Singleton-per-path, ref-counted, thread-safe file logger.

    • Any number of calls to `RichFileLogger(path)` yield the same object.
    • A per-instance lock guarantees that the underlying file is opened only
      once, even when many threads construct the logger concurrently.
    • A reference counter tracks how many parts of the program are using the
      logger; the FD is closed only when the counter reaches zero.
    • All writes are serialised with a dedicated write-lock.
    """
⋮----
_instances: ClassVar[Dict[str, "RichFileLogger"]] = {}
_ref_counts: ClassVar[Dict[str, int]] = {}
# guards _instances & _ref_counts
_class_lock: ClassVar[threading.Lock] = threading.Lock()
⋮----
# ------------------------------------------------------------------ #
# construction / destruction
⋮----
inst = super().__new__(cls)
# create the per-instance init-lock *before* releasing class-lock
⋮----
def __init__(self, log_file: str, append: bool = False, color: bool = True) -> None
⋮----
# Double-checked locking: perform heavy init exactly once.
⋮----
mode = "a" if append else "w"
⋮----
except OSError as exc:  # EMFILE: too many open files
⋮----
# Fallback: reuse an already-open stream to avoid creating a new FD
⋮----
self._init_done = True  # set last
⋮----
# public API
⋮----
@no_type_check
    def log(self, message: str) -> None
⋮----
"""Thread-safe write to the log file."""
⋮----
def close(self) -> None
⋮----
"""Decrease ref-count; close FD only when last user is done."""
⋮----
count = self._ref_counts.get(self.log_file, 0) - 1
</file>

<file path="langroid/utils/object_registry.py">
# any derivative of BaseModel that has an id() method or an id attribute
ObjWithId: TypeAlias = ChatDocument | ChatAgent | Agent
⋮----
ObjWithId = BaseModel
⋮----
# Define a type variable that can be any subclass of BaseModel
T = TypeVar("T", bound=BaseModel)
⋮----
class ObjectRegistry
⋮----
"""A global registry to hold id -> object mappings."""
⋮----
registry: Dict[str, ObjWithId] = {}
⋮----
@classmethod
    def add(cls, obj: ObjWithId) -> str
⋮----
"""Adds an object to the registry, returning the object's ID."""
object_id = obj.id() if callable(obj.id) else obj.id
⋮----
@classmethod
    def get(cls, obj_id: str) -> Optional[ObjWithId]
⋮----
"""Retrieves an object by ID if it still exists."""
⋮----
@classmethod
    def register_object(cls, obj: ObjWithId) -> str
⋮----
"""Registers an object in the registry, returning the object's ID."""
⋮----
@classmethod
    def remove(cls, obj_id: str) -> None
⋮----
"""Removes an object from the registry."""
⋮----
@classmethod
    def cleanup(cls) -> None
⋮----
"""Cleans up the registry by removing entries where the object is None."""
to_remove = [key for key, value in cls.registry.items() if value is None]
⋮----
@staticmethod
    def new_id() -> str
⋮----
"""Generates a new unique ID."""
⋮----
def scheduled_cleanup(interval: int = 600) -> None
⋮----
"""Periodically cleans up the global registry every 'interval' seconds."""
</file>

<file path="langroid/utils/system.py">
logger = logging.getLogger(__name__)
⋮----
DELETION_ALLOWED_PATHS = [
⋮----
def pydantic_major_version() -> int
⋮----
pydantic_version = importlib.metadata.version("pydantic")
major_version = int(pydantic_version.split(".")[0])
⋮----
class LazyLoad
⋮----
"""Lazy loading of modules or classes."""
⋮----
def __init__(self, import_path: str) -> None
⋮----
def _load_target(self) -> None
⋮----
# Attempt to import as a module
self._target = importlib.import_module(self.import_path)  # type: ignore
⋮----
# If module import fails, attempt to import as a
# class or function from a module
⋮----
module = importlib.import_module(module_path)
⋮----
def __getattr__(self, name: str) -> Any
⋮----
def __call__(self, *args: Any, **kwargs: Any) -> Any
⋮----
def rmdir(path: str) -> bool
⋮----
"""
    Remove a directory recursively.
    Args:
        path (str): path to directory to remove
    Returns:
        True if a dir was removed, false otherwise. Raises error if failed to remove.
    """
⋮----
def caller_name() -> str
⋮----
"""
    Who called the function?
    """
frame = inspect.currentframe()
⋮----
caller_frame = frame.f_back
⋮----
# If there's no caller frame, the function was called from the global scope
⋮----
def friendly_error(e: Exception, msg: str = "An error occurred.") -> str
⋮----
tb = traceback.format_exc()
original_error_message: str = str(e)
full_error_message: str = (
⋮----
def generate_user_id(org: str = "") -> str
⋮----
"""
    Generate a unique user ID based on the username and machine name.
    Returns:
    """
# Get the username
username = getpass.getuser()
⋮----
# Get the machine's name
machine_name = socket.gethostname()
⋮----
org_pfx = f"{org}_" if org else ""
⋮----
# Create a consistent unique ID based on the username and machine name
unique_string = f"{org_pfx}{username}@{machine_name}"
⋮----
# Generate a SHA-256 hash of the unique string
user_id = hashlib.sha256(unique_string.encode()).hexdigest()
⋮----
def update_hash(hash: str | None = None, s: str = "") -> str
⋮----
"""
    Takes a SHA256 hash string and a new string, updates the hash with the new string,
    and returns the updated hash string.

    Args:
        hash (str): A SHA256 hash string.
        s (str): A new string to update the hash with.

    Returns:
        The updated hash in hexadecimal format.
    """
# Create a new hash object if no hash is provided
⋮----
hash_obj = hashlib.sha256()
⋮----
# Convert the hexadecimal hash string to a byte object
hash_bytes = bytes.fromhex(hash)
hash_obj = hashlib.sha256(hash_bytes)
⋮----
# Update the hash with the new string
⋮----
# Return the updated hash in hexadecimal format and the original string
⋮----
def hash(s: str) -> str
⋮----
"""
    Generate a SHA256 hash of a string.

    Args:
        s (str): The string to hash.

    Returns:
        str: The SHA256 hash of the string.
    """
⋮----
def generate_unique_id() -> str
⋮----
"""Generate a unique ID using UUID4."""
⋮----
"""
    Create, overwrite or append to a file, with the given content
    at the specified filepath.
    If content is empty, it will simply touch to create an empty file.

    Args:
        filepath (str|Path): The relative path of the file to be created
        content (str): The content to be written to the file
        if_exists (Literal["overwrite", "skip", "error", "append"]):
            Action to take if file exists
    """
filepath = Path(filepath)
⋮----
mode = "a"
else:  # overwrite
mode = "w"
⋮----
# the newline = '\n` argument is used to ensure that
# newlines in the content are written as actual line breaks
⋮----
action = "appended to" if mode == "a" else "created/updated in"
⋮----
def read_file(path: str, line_numbers: bool = False) -> str
⋮----
"""
    Read the contents of a file.

    Args:
        path (str): The path to the file to be read.
        line_numbers (bool, optional): If True, prepend line numbers to each line.
            Defaults to False.

    Returns:
        str: The contents of the file, optionally with line numbers.

    Raises:
        FileNotFoundError: If the specified file does not exist.
    """
# raise an error if the file does not exist
⋮----
file = Path(path).expanduser()
content = file.read_text()
⋮----
lines = content.splitlines()
numbered_lines = [f"{i+1}: {line}" for i, line in enumerate(lines)]
⋮----
def diff_files(file1: str, file2: str) -> str
⋮----
"""
    Find the diffs between two files, in unified diff format.
    """
⋮----
lines1 = f1.readlines()
lines2 = f2.readlines()
⋮----
differ = difflib.unified_diff(lines1, lines2, fromfile=file1, tofile=file2)
diff_result = "".join(differ)
⋮----
def list_dir(path: str | Path) -> list[str]
⋮----
"""
    List the contents of a directory.

    Args:
        path (str): The path to the directory.

    Returns:
        list[str]: A list of the files and directories in the specified directory.
    """
dir_path = Path(path)
</file>

<file path="langroid/utils/types.py">
logger = logging.getLogger(__name__)
PrimitiveType = Union[int, float, bool, str]
T = TypeVar("T")
⋮----
def is_instance_of(obj: Any, type_hint: Type[T] | Any) -> bool
⋮----
"""
    Check if an object is an instance of a type hint, e.g.
    to check whether x is of type `List[ToolMessage]` or type `int`
    """
⋮----
origin = get_origin(type_hint)
args = get_args(type_hint)
⋮----
if origin:  # e.g. List, Dict, Tuple, Set
⋮----
# check if all items in obj are of the required types
⋮----
def to_string(msg: Any) -> str
⋮----
"""
    Best-effort conversion of arbitrary msg to str.
    Return empty string if conversion fails.
    """
⋮----
# last resort: use json.dumps() or str() to make it a str
⋮----
def is_callable(obj: Any, k: int = 1) -> bool
⋮----
"""Check if object is callable and accepts exactly k args.

    Args:
        obj: Object to check

    Returns:
        bool: True if object is callable with k args, False otherwise
    """
⋮----
sig = signature(obj)
params = list(sig.parameters.values())
</file>

<file path="langroid/vector_store/__init__.py">
__all__ = [
⋮----
chromadb  # silence linters
⋮----
postgres  # silence linters
</file>

<file path="langroid/vector_store/chromadb.py">
logger = logging.getLogger(__name__)
⋮----
class ChromaDBConfig(VectorStoreConfig)
⋮----
collection_name: str = "temp"
storage_path: str = ".chroma/data"
distance: Literal["cosine", "l2", "ip"] = "cosine"
construction_ef: int = 100
search_ef: int = 100
max_neighbors: int = 16
embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
host: str = "127.0.0.1"
port: int = 6333
⋮----
class ChromaDB(VectorStore)
⋮----
def __init__(self, config: ChromaDBConfig = ChromaDBConfig())
⋮----
# chroma_db_impl="duckdb+parquet",
# is_persistent=bool(config.storage_path),
⋮----
def clear_all_collections(self, really: bool = False, prefix: str = "") -> int
⋮----
"""Clear all collections in the vector store with the given prefix."""
⋮----
coll = [c for c in self.client.list_collections() if c.name.startswith(prefix)]
⋮----
n_empty_deletes = 0
n_non_empty_deletes = 0
⋮----
def clear_empty_collections(self) -> int
⋮----
colls = self.client.list_collections()
n_deletes = 0
⋮----
def list_collections(self, empty: bool = False) -> List[str]
⋮----
"""
        List non-empty collections in the vector store.
        Args:
            empty (bool, optional): Whether to list empty collections.
        Returns:
            List[str]: List of non-empty collection names.
        """
⋮----
def create_collection(self, collection_name: str, replace: bool = False) -> None
⋮----
"""
        Create a collection in the vector store, optionally replacing an existing
            collection if `replace` is True.
        Args:
            collection_name (str): Name of the collection to create or replace.
            replace (bool, optional): Whether to replace an existing collection.
                Defaults to False.

        """
⋮----
# we could expose other configs, see:
# https://docs.trychroma.com/docs/collections/configure
⋮----
def add_documents(self, documents: Sequence[Document]) -> None
⋮----
contents: List[str] = [document.content for document in documents]
# convert metadatas to dicts so chroma can handle them
metadata_dicts: List[dict[str, Any]] = [
⋮----
# chroma does not handle non-atomic types in metadata
⋮----
ids = [str(d.id()) for d in documents]
⋮----
colls = self.list_collections(empty=True)
⋮----
# embedding_models=embedding_models,
⋮----
def get_all_documents(self, where: str = "") -> List[Document]
⋮----
filter = json.loads(where) if where else None
results = self.collection.get(
⋮----
def get_documents_by_ids(self, ids: List[str]) -> List[Document]
⋮----
# get them one by one since chroma mangles the order of the results
# when fetched from a list of ids.
results = [
final_results = {}
⋮----
def delete_collection(self, collection_name: str) -> None
⋮----
n = self.collection.count()
⋮----
results = self.collection.query(
docs = self._docs_from_results(results)
# chroma distances are 1 - cosine.
scores = [1 - s for s in results["distances"][0]]
⋮----
def _docs_from_results(self, results: Dict[str, Any]) -> List[Document]
⋮----
"""
        Helper function to convert results from ChromaDB to a list of Documents
        Args:
            results (dict): results from ChromaDB

        Returns:
            List[Document]: list of Documents
        """
⋮----
contents = results["documents"][0]
⋮----
metadatas = results["metadatas"][0]
⋮----
# restore the stringified list of window_ids into the original List[str]
⋮----
docs = [
</file>

<file path="langroid/vector_store/lancedb.py">
has_lancedb = True
⋮----
has_lancedb = False
⋮----
logger = logging.getLogger(__name__)
⋮----
class LanceDBConfig(VectorStoreConfig)
⋮----
cloud: bool = False
collection_name: str | None = "temp"
storage_path: str = ".lancedb/data"
embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
distance: str = "cosine"
⋮----
class LanceDB(VectorStore)
⋮----
def __init__(self, config: LanceDBConfig = LanceDBConfig())
⋮----
self.is_from_dataframe = False  # were docs ingested from a dataframe?
self.df_metadata_columns: List[str] = []  # metadata columns from dataframe
⋮----
new_storage_path = config.storage_path + ".new"
⋮----
def clear_empty_collections(self) -> int
⋮----
coll_names = self.list_collections()
n_deletes = 0
⋮----
nr = self.client.open_table(name).head(1).shape[0]
⋮----
def clear_all_collections(self, really: bool = False, prefix: str = "") -> int
⋮----
"""Clear all collections with the given prefix."""
⋮----
coll_names = [
⋮----
n_empty_deletes = 0
n_non_empty_deletes = 0
⋮----
def list_collections(self, empty: bool = False) -> List[str]
⋮----
"""
        Returns:
            List of collection names that have at least one vector.

        Args:
            empty (bool, optional): Whether to include empty collections.
        """
colls = self.client.table_names(limit=None)
⋮----
if empty:  # include empty tbls
return colls  # type: ignore
counts = [self.client.open_table(coll).head(1).shape[0] for coll in colls]
⋮----
def _create_lance_schema(self, doc_cls: Type[Document]) -> Type[BaseModel]
⋮----
"""
        NOTE: NOT USED, but leaving it here as it may be useful.

        Create a subclass of LanceModel with fields:
         - id (str)
         - Vector field that has dims equal to
            the embedding dimension of the embedding model, and a data field of type
            DocClass.
         - other fields from doc_cls

        Args:
            doc_cls (Type[Document]): A Pydantic model which should be a subclass of
                Document, to be used as the type for the data field.

        Returns:
            Type[BaseModel]: A new Pydantic model subclassing from LanceModel.

        Raises:
            ValueError: If `n` is not a non-negative integer or if `DocClass` is not a
                subclass of Document.
        """
⋮----
n = self.embedding_dim
⋮----
# Prepare fields for the new model
fields = {"id": (str, ...), "vector": (Vector(n), ...)}
⋮----
sorted_fields = dict(
# Add both statically and dynamically defined fields from doc_cls
⋮----
field_type = field.annotation if hasattr(field, "annotation") else field
⋮----
# Create the new model with dynamic fields
NewModel = create_model(
⋮----
)  # type: ignore
return NewModel  # type: ignore
⋮----
def create_collection(self, collection_name: str, replace: bool = False) -> None
⋮----
def add_documents(self, documents: Sequence[Document]) -> None
⋮----
colls = self.list_collections(empty=True)
⋮----
embedding_vecs = self.embedding_fn([doc.content for doc in documents])
coll_name = self.config.collection_name
⋮----
# self._maybe_set_doc_class_schema(documents[0])
table_exists = False
⋮----
# collection exists and  is not empty:
# if replace_collection is True, we'll overwrite the existing collection,
# else we'll append to it.
⋮----
table_exists = True
⋮----
ids = [str(d.id()) for d in documents]
# don't insert all at once, batch in chunks of b,
# else we get an API error
b = self.config.batch_size
⋮----
def make_batches() -> Generator[List[Dict[str, Any]], None, None]
⋮----
batch = [
⋮----
tbl = self.client.open_table(coll_name)
⋮----
batch_gen = make_batches()
batch = next(batch_gen)
# use first batch to create table...
tbl = self.client.create_table(
# ... and add the rest
⋮----
"""
        Add a dataframe to the collection.
        Args:
            df (pd.DataFrame): A dataframe
            content (str): The name of the column in the dataframe that contains the
                text content to be embedded using the embedding model.
            metadata (List[str]): A list of column names in the dataframe that contain
                metadata to be stored in the database. Defaults to [].
        """
⋮----
actual_metadata = metadata.copy()
self.df_metadata_columns = actual_metadata  # could be updated below
# get content column
content_values = df[content].values.tolist()
embedding_vecs = self.embedding_fn(content_values)
⋮----
# add vector column
⋮----
# rename content column to "content", leave existing column intact
df = df.rename(columns={content: "content"}, inplace=False)
⋮----
docs = dataframe_to_documents(df, content="content", metadata=metadata)
ids = [str(d.id()) for d in docs]
⋮----
# collection either doesn't exist or is empty, so replace it
# and set new schema from df
⋮----
doc_cls = dataframe_to_document_model(
self.config.document_class = doc_cls  # type: ignore
⋮----
# collection exists and is not empty, so append to it
tbl = self.client.open_table(self.config.collection_name)
⋮----
def delete_collection(self, collection_name: str) -> None
⋮----
df = result.to_pandas()
⋮----
records = result.to_arrow().to_pylist()
⋮----
def _records_to_docs(self, records: List[Dict[str, Any]]) -> List[Document]
⋮----
docs = [self.config.document_class(**rec) for rec in records]
⋮----
def get_all_documents(self, where: str = "") -> List[Document]
⋮----
pre_result = tbl.search(None).where(where or None).limit(None)
⋮----
def get_documents_by_ids(self, ids: List[str]) -> List[Document]
⋮----
_ids = [str(id) for id in ids]
⋮----
docs = []
⋮----
results = self._lance_result_to_docs(tbl.search().where(f"id == '{_id}'"))
⋮----
embedding = self.embedding_fn([text])[0]
⋮----
result = (
docs = self._lance_result_to_docs(result)
# note _distance is 1 - cosine
⋮----
scores = [
⋮----
scores = [1 - rec["_distance"] for rec in result.to_arrow().to_pylist()]
⋮----
doc_score_pairs = list(zip(docs, scores))
</file>

<file path="langroid/vector_store/meilisearch.py">
"""
MeiliSearch as a pure document store, without its
(experimental) vector-store functionality.
We aim to use MeiliSearch for fast lexical search.
Note that what we call "Collection" in Langroid is referred to as
"Index" in MeiliSearch. Each data-store has its own terminology,
but for uniformity we use the Langroid terminology here.
"""
⋮----
logger = logging.getLogger(__name__)
⋮----
class MeiliSearchConfig(VectorStoreConfig)
⋮----
cloud: bool = False
collection_name: str | None = None
primary_key: str = "id"
port: int = 7700
⋮----
class MeiliSearch(VectorStore)
⋮----
def __init__(self, config: MeiliSearchConfig = MeiliSearchConfig())
⋮----
# Note: Only create collection if a non-null collection name is provided.
# This is useful to delay creation of db until we have a suitable
# collection name (e.g. we could get it from the url or folder path).
⋮----
def clear_empty_collections(self) -> int
⋮----
"""All collections are treated as non-empty in MeiliSearch, so this is a
        no-op"""
⋮----
async def _async_delete_indices(self, uids: List[str]) -> List[bool]
⋮----
"""Delete any indicecs in `uids` that exist.
        Returns list of bools indicating whether the index has been deleted"""
⋮----
result = await asyncio.gather(
⋮----
def clear_all_collections(self, really: bool = False, prefix: str = "") -> int
⋮----
"""Delete all indices whose names start with `prefix`"""
⋮----
coll_names = [c for c in self.list_collections() if c.startswith(prefix)]
deletes = asyncio.run(self._async_delete_indices(coll_names))
n_deletes = sum(deletes)
⋮----
def _list_all_collections(self) -> List[str]
⋮----
"""
        List all collections, including empty ones.
        Returns:
            List of collection names.
        """
⋮----
async def _async_get_indexes(self) -> List[AsyncIndex]
⋮----
indexes = await client.get_indexes(limit=10_000)
return [] if indexes is None else indexes  # type: ignore
⋮----
async def _async_get_index(self, index_uid: str) -> "AsyncIndex"
⋮----
index = await client.get_index(index_uid)
return index  # type: ignore
⋮----
def list_collections(self, empty: bool = False) -> List[str]
⋮----
"""
        Returns:
            List of index names stored. We treat any existing index as non-empty.
        """
indexes = asyncio.run(self._async_get_indexes())
⋮----
async def _async_create_index(self, collection_name: str) -> "AsyncIndex"
⋮----
index = await client.create_index(
⋮----
async def _async_delete_index(self, collection_name: str) -> bool
⋮----
"""Delete index if it exists. Returns True iff index was deleted"""
⋮----
result = await client.delete_index_if_exists(uid=collection_name)
return result  # type: ignore
⋮----
def create_collection(self, collection_name: str, replace: bool = False) -> None
⋮----
"""
        Create a collection with the given name, optionally replacing an existing
            collection if `replace` is True.
        Args:
            collection_name (str): Name of the collection to create.
            replace (bool): Whether to replace an existing collection
                with the same name. Defaults to False.
        """
⋮----
collections = self.list_collections()
⋮----
collection_info = asyncio.run(self._async_get_index(collection_name))
⋮----
level = logger.getEffectiveLevel()
⋮----
index = client.index(collection_name)
⋮----
def add_documents(self, documents: Sequence[Document]) -> None
⋮----
colls = self._list_all_collections()
⋮----
docs = [
⋮----
def delete_collection(self, collection_name: str) -> None
⋮----
def _to_int_or_uuid(self, id: str) -> int | str
⋮----
async def _async_get_documents(self, where: str = "") -> "DocumentsInfo"
⋮----
filter = [] if where is None else where
⋮----
index = client.index(self.config.collection_name)
documents = await index.get_documents(limit=10_000, filter=filter)
⋮----
def get_all_documents(self, where: str = "") -> List[Document]
⋮----
docs = asyncio.run(self._async_get_documents(where))
⋮----
doc_results = docs.results
⋮----
async def _async_get_documents_by_ids(self, ids: List[str]) -> List[Dict[str, Any]]
⋮----
documents = await asyncio.gather(*[index.get_document(id) for id in ids])
⋮----
def get_documents_by_ids(self, ids: List[str]) -> List[Document]
⋮----
docs = asyncio.run(self._async_get_documents_by_ids(ids))
⋮----
results = await index.search(
return results.hits  # type: ignore
⋮----
neighbors: int = 0,  # ignored
⋮----
_docs = asyncio.run(self._async_search(text, k, filter))  # type: ignore
⋮----
scores = [h["_rankingScore"] for h in _docs]
⋮----
doc_score_pairs = list(zip(docs, scores))
</file>

<file path="langroid/vector_store/pineconedb.py">
# import dataclass
⋮----
logger = logging.getLogger(__name__)
⋮----
has_pinecone: bool = True
⋮----
class ServerlessSpec(BaseModel)
⋮----
"""
            Fallback Serverless specification configuration to avoid import errors.
            """
⋮----
cloud: str
region: str
⋮----
PineconeApiException = Any  # type: ignore
Pinecone = Any  # type: ignore
has_pinecone = False
⋮----
@dataclass(frozen=True)
class IndexMeta
⋮----
name: str
total_vector_count: int
⋮----
class PineconeDBConfig(VectorStoreConfig)
⋮----
cloud: bool = True
collection_name: str | None = "temp"
spec: ServerlessSpec = ServerlessSpec(cloud="aws", region="us-east-1")
deletion_protection: Literal["enabled", "disabled"] | None = None
metric: str = "cosine"
pagination_size: int = 100
⋮----
class PineconeDB(VectorStore)
⋮----
def __init__(self, config: PineconeDBConfig = PineconeDBConfig())
⋮----
key = os.getenv("PINECONE_API_KEY")
⋮----
def clear_empty_collections(self) -> int
⋮----
indexes = self._list_index_metas(empty=True)
n_deletes = 0
⋮----
def clear_all_collections(self, really: bool = False, prefix: str = "") -> int
⋮----
"""
        Returns:
            Number of Pinecone indexes that were deleted

        Args:
            really: Optional[bool] - whether to really delete all Pinecone collections
            prefix: Optional[str] - string to match potential Pinecone
                indexes for deletion
        """
⋮----
indexes = [
⋮----
def list_collections(self, empty: bool = False) -> List[str]
⋮----
"""
        Returns:
            List of Pinecone indices that have at least one vector.

        Args:
            empty: Optional[bool] - whether to include empty collections
        """
indexes = self.client.list_indexes()
res: List[str] = []
⋮----
index_meta = self.client.Index(name=index)
⋮----
def _list_index_metas(self, empty: bool = False) -> List[IndexMeta]
⋮----
"""
        Returns:
            List of objects describing Pinecone indices

        Args:
            empty: Optional[bool] - whether to include empty collections
        """
⋮----
res = []
⋮----
index_meta = self._fetch_index_meta(index)
⋮----
def _fetch_index_meta(self, index_name: str) -> IndexMeta
⋮----
"""
        Returns:
            A dataclass describing the input Index by name and vector count
            to save a bit on index description calls

        Args:
            index_name: str - Name of the index in Pinecone
        """
⋮----
index = self.client.Index(name=index_name)
stats = index.describe_index_stats()
⋮----
def create_collection(self, collection_name: str, replace: bool = False) -> None
⋮----
"""
        Create a collection with the given name, optionally replacing an existing
        collection if `replace` is True.

        Args:
            collection_name: str - Configuration of the collection to create.
            replace: Optional[Bool] - Whether to replace an existing collection
                with the same name. Defaults to False.
        """
pattern = re.compile(r"^[a-z0-9-]+$")
⋮----
index = self.client.Index(name=collection_name)
⋮----
status = self.client.describe_index(name=collection_name)
⋮----
payload = {
⋮----
def delete_collection(self, collection_name: str) -> None
⋮----
def add_documents(self, documents: Sequence[Document], namespace: str = "") -> None
⋮----
document_dicts = [doc.model_dump() for doc in documents]
document_ids = [doc.id() for doc in documents]
embedding_vectors = self.embedding_fn([doc.content for doc in documents])
vectors = [
⋮----
index = self.client.Index(name=self.config.collection_name)
batch_size = self.config.batch_size
⋮----
"""
        Returns:
            All documents for the collection currently defined in
            the configuration object

        Args:
            prefix: str - document id prefix to search for
            namespace: str - partition of vectors to search within the index
        """
⋮----
docs = []
⋮----
request_filters: Dict[str, Union[str, int]] = {
⋮----
response = index.list_paginated(**request_filters)
vectors = response.get("vectors", [])
⋮----
pagination_token = response.get("pagination", {}).get("next", None)
⋮----
"""
        Returns:
            Fetches document text embedded in Pinecone index metadata

        Args:
            ids: List[str] - vector data object ids to retrieve
            namespace: str - partition of vectors to search within the index
        """
⋮----
records = index.fetch(ids=ids, namespace=namespace)
⋮----
records = index.fetch(ids=ids)
⋮----
id_mapping = {key: value for key, value in records["vectors"].items()}
ordered_payloads = [id_mapping[_id] for _id in ids if _id in id_mapping]
⋮----
vector_search_request = {
⋮----
response = index.query(**vector_search_request)
doc_score_pairs = [
⋮----
max_score = max([pair[1] for pair in doc_score_pairs])
⋮----
def transform_pinecone_vector(self, metadata_dict: Dict[str, Any]) -> Document
⋮----
"""
        Parses the metadata response from the Pinecone vector query and
        formats it into a dictionary that can be parsed by the Document class
        associated with the PineconeDBConfig class

        Returns:
            Well formed dictionary object to be transformed into a Document

        Args:
            metadata_dict: Dict - the metadata dictionary from the Pinecone
                vector query match
        """
</file>

<file path="langroid/vector_store/postgres.py">
has_postgres: bool = True
⋮----
Engine = Any  # type: ignore
Connection = Any  # type: ignore
has_postgres = False
⋮----
logger = logging.getLogger(__name__)
⋮----
class PostgresDBConfig(VectorStoreConfig)
⋮----
collection_name: str = "embeddings"
cloud: bool = False
docker: bool = True
host: str = "127.0.0.1"
port: int = 5432
replace_collection: bool = False
embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
pool_size: int = 10
max_overflow: int = 20
hnsw_m: int = 16
hnsw_ef_construction: int = 200
⋮----
class PostgresDB(VectorStore)
⋮----
def __init__(self, config: PostgresDBConfig = PostgresDBConfig())
⋮----
def _create_engine(self) -> Engine
⋮----
"""Creates a SQLAlchemy engine based on the configuration."""
⋮----
connection_string: str | None = None  # Ensure variable is always defined
⋮----
connection_string = os.getenv("POSTGRES_CONNECTION_STRING")
⋮----
connection_string = connection_string.replace(
⋮----
username = os.getenv("POSTGRES_USER", "postgres")
password = os.getenv("POSTGRES_PASSWORD", "postgres")
database = os.getenv("POSTGRES_DB", "langroid")
⋮----
connection_string = (
self.config.cloud = False  # Ensures cloud is disabled if using Docker
⋮----
def _setup_table(self) -> None
⋮----
# Create HNSW index for embeddings column if it doesn't exist.
# This index enables efficient nearest-neighbor search using cosine similarity.
# PostgreSQL automatically builds the index after creation;
# no manual step required.
# Read more about pgvector hnsw index here:
# https://github.com/pgvector/pgvector?tab=readme-ov-file#hnsw
⋮----
index_name = f"hnsw_index_{self.config.collection_name}_embedding"
⋮----
create_index_query = text(
⋮----
def index_exists(self, connection: Connection, index_name: str) -> bool
⋮----
"""Check if an index exists."""
query = text(
result = connection.execute(query).scalar()
⋮----
@staticmethod
    def _create_vector_extension(conn: Engine) -> None
⋮----
# The number is a unique identifier used to lock a specific resource
# during transaction. Any 64-bit integer can be used for advisory locks.
# Acquire advisory lock to ensure atomic, isolated setup
# and prevent race conditions.
⋮----
statement = text(
⋮----
def set_collection(self, collection_name: str, replace: bool = False) -> None
⋮----
inspector = inspect(self.engine)
table_exists = collection_name in inspector.get_table_names()
⋮----
def list_collections(self, empty: bool = True) -> List[str]
⋮----
table_names = inspector.get_table_names()
⋮----
collections = []
⋮----
table = Table(table_name, self.metadata, autoload_with=self.engine)
⋮----
# Efficiently check for non-emptiness
⋮----
def create_collection(self, collection_name: str, replace: bool = False) -> None
⋮----
def delete_collection(self, collection_name: str) -> None
⋮----
"""
        Deletes a collection and its associated HNSW index, handling metadata
        synchronization issues.
        """
⋮----
index_name = f"hnsw_index_{collection_name}_embedding"
drop_index_query = text(f"DROP INDEX CONCURRENTLY IF EXISTS {index_name}")
⋮----
# 3. Now, drop the table using SQLAlchemy
table = Table(collection_name, self.metadata)
⋮----
# 4. Refresh metadata again after dropping the table
⋮----
def clear_all_collections(self, really: bool = False, prefix: str = "") -> int
⋮----
deleted_count = 0
⋮----
# Use delete_collection to handle index and table deletion
⋮----
def clear_empty_collections(self) -> int
⋮----
# Efficiently check for emptiness without fetching all rows
⋮----
# Use delete_collection to handle index and table deletion
⋮----
session.commit()  # Commit is likely not needed here
⋮----
def _parse_embedding_store_record(self, res: Any) -> Dict[str, Any]
⋮----
metadata = res.cmetadata or {}
⋮----
def get_all_documents(self, where: str = "") -> List[Document]
⋮----
query = session.query(self.embeddings_table)
⋮----
# Apply 'where' clause if provided
⋮----
where_json = json.loads(where)
query = query.filter(
⋮----
return []  # Return empty list or handle error as appropriate
⋮----
results = query.all()
documents = [
⋮----
def get_documents_by_ids(self, ids: List[str]) -> List[Document]
⋮----
# Add a CASE statement to preserve the order of IDs
case_stmt = case(
⋮----
query = (
⋮----
.order_by(case_stmt)  # Order by the CASE statement
⋮----
def add_documents(self, documents: Sequence[Document]) -> None
⋮----
embeddings = self.embedding_fn([doc.content for doc in documents])
⋮----
batch_size = self.config.batch_size
⋮----
batch_docs = documents[i : i + batch_size]
batch_embeddings = embeddings[i : i + batch_size]
⋮----
new_records = [
⋮----
stmt = insert(self.embeddings_table).values(new_records)
⋮----
@staticmethod
    def _id_to_uuid(id: str, obj: object) -> str
⋮----
doc_id = str(uuid.UUID(id))
⋮----
obj_repr = repr(obj)
⋮----
obj_hash = hashlib.sha256(obj_repr.encode()).hexdigest()
⋮----
combined = f"{id}-{obj_hash}"
⋮----
doc_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, combined))
⋮----
neighbors: int = 1,  # Parameter not used in this implementation
⋮----
embedding = self.embedding_fn([query])[0]
⋮----
# Calculate the score (1 - cosine_distance) and label it as "score"
score = (
⋮----
json_query = json.loads(where)
⋮----
results = (
⋮----
score,  # Select the calculated score
⋮----
.order_by(score.desc())  # Order by score in descending order
⋮----
documents_with_scores = [
⋮----
result.score,  # Use the score from the query result
</file>

<file path="langroid/vector_store/weaviatedb.py">
logger = logging.getLogger(__name__)
⋮----
class VectorDistances
⋮----
"""
    Fallback class when weaviate is not installed, to avoid import errors.
    """
⋮----
COSINE: str = "cosine"
DOTPRODUCT: str = "dot"
L2: str = "l2"
⋮----
class WeaviateDBConfig(VectorStoreConfig)
⋮----
collection_name: str | None = "temp"
embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
distance: str = VectorDistances.COSINE
cloud: bool = False
docker: bool = False
host: str = "127.0.0.1"
port: int = 8080
storage_path: str = ".weaviate_embedded/data"
⋮----
class WeaviateDB(VectorStore)
⋮----
def __init__(self, config: WeaviateDBConfig = WeaviateDBConfig())
⋮----
key = os.getenv("WEAVIATE_API_KEY")
url = os.getenv("WEAVIATE_API_URL")
⋮----
def clear_empty_collections(self) -> int
⋮----
colls = self.client.collections.list_all()
n_deletes = 0
⋮----
val = self.client.collections.get(coll_name)
⋮----
def list_collections(self, empty: bool = False) -> List[str]
⋮----
non_empty_colls = [
⋮----
def clear_all_collections(self, really: bool = False, prefix: str = "") -> int
⋮----
coll_names = [
⋮----
n_empty_deletes = 0
n_non_empty_deletes = 0
⋮----
info = self.client.collections.get(name)
points_count = len(info)
⋮----
def delete_collection(self, collection_name: str) -> None
⋮----
def create_collection(self, collection_name: str, replace: bool = False) -> None
⋮----
collection_name = WeaviateDB.validate_and_format_collection_name(
⋮----
coll = self.client.collections.get(name=collection_name)
⋮----
vector_index_config = Configure.VectorIndex.hnsw(
⋮----
vectorizer_config = Configure.Vectorizer.text2vec_openai(
⋮----
vectorizer_config = None
⋮----
collection_info = self.client.collections.create(
collection_info = self.client.collections.get(name=collection_name)
⋮----
level = logger.getEffectiveLevel()
⋮----
def add_documents(self, documents: Sequence[Document]) -> None
⋮----
colls = self.list_collections(empty=True)
⋮----
document_dicts = [doc.model_dump() for doc in documents]
embedding_vecs = self.embedding_fn([doc.content for doc in documents])
⋮----
coll_name = self.client.collections.get(self.config.collection_name)
⋮----
id = doc_dict["metadata"].pop("id", None)
⋮----
def get_all_documents(self, where: str = "") -> List[Document]
⋮----
# cannot use filter as client does not support json type queries
coll = self.client.collections.get(self.config.collection_name)
⋮----
def get_documents_by_ids(self, ids: List[str]) -> List[Document]
⋮----
docs = []
⋮----
result = coll_name.query.fetch_objects(
⋮----
id_to_doc = {}
⋮----
doc = self.weaviate_obj_to_doc(item)
⋮----
# Reconstruct the list of documents in the original order of input ids
docs = [id_to_doc[id] for id in ids if id in id_to_doc]
⋮----
embedding = self.embedding_fn([text])[0]
⋮----
response = coll.query.near_vector(
maybe_distances = [item.metadata.distance for item in response.objects]
similarities = [0 if d is None else 1 - d for d in maybe_distances]
docs = [self.weaviate_obj_to_doc(item) for item in response.objects]
⋮----
def _create_valid_uuid_id(self, id: str) -> Any
⋮----
id = get_valid_uuid(id)
⋮----
def weaviate_obj_to_doc(self, input_object: Any) -> Document
⋮----
content = input_object.properties.get("content", "")
metadata_dict = input_object.properties.get("metadata", {})
⋮----
window_ids = metadata_dict.pop("window_ids", [])
window_ids = [str(uuid) for uuid in window_ids]
⋮----
# Ensure the id is a valid UUID string
id_value = get_valid_uuid(input_object.uuid)
⋮----
metadata = DocMetaData(id=id_value, window_ids=window_ids, **metadata_dict)
⋮----
@staticmethod
    def validate_and_format_collection_name(name: str) -> str
⋮----
"""
        Formats the collection name to comply with Weaviate's naming rules:
        - Name must start with a capital letter.
        - Name can only contain letters, numbers, and underscores.
        - Replaces invalid characters with underscores.
        """
⋮----
formatted_name = re.sub(r"[^a-zA-Z0-9_]", "_", name)
⋮----
# Ensure the first letter is capitalized
⋮----
formatted_name = formatted_name.capitalize()
⋮----
# Check if the name now meets the criteria
⋮----
def __del__(self) -> None
⋮----
# Gracefully close the connection with local client
</file>

<file path="langroid/__init__.py">
"""
Main langroid package
"""
⋮----
__all__ = [
</file>

<file path="langroid/exceptions.py">
class XMLException(Exception)
⋮----
def __init__(self, message: str) -> None
⋮----
class InfiniteLoopException(Exception)
⋮----
def __init__(self, message: str = "Infinite loop detected", *args: object) -> None
⋮----
class LangroidImportError(ImportError)
⋮----
"""
        Generate helpful warning when attempting to import package or module.

        Args:
            package (str): The name of the package to import.
            extra (str): The name of the extras package required for this import.
            error (str): The error message to display. Depending on context, we
                can set this by capturing the ImportError message.

        """
⋮----
error = f"{package} is not installed by default with Langroid.\n"
⋮----
help_preamble = f"""
extra = extra[0]
⋮----
install_help = f"""
⋮----
install_help = """
msg = error + install_help
</file>

<file path="langroid/mytypes.py">
Number = Union[int, float]
Embedding = List[Number]
Embeddings = List[Embedding]
EmbeddingFunction = Callable[[List[str]], Embeddings]
⋮----
class Entity(str, Enum)
⋮----
"""
    Enum for the different types of entities that can respond to the current message.
    """
⋮----
AGENT = "Agent"
LLM = "LLM"
USER = "User"
SYSTEM = "System"
⋮----
def __eq__(self, other: object) -> bool
⋮----
"""Allow case-insensitive equality (==) comparison with strings."""
⋮----
def __ne__(self, other: object) -> bool
⋮----
"""Allow case-insensitive non-equality (!=) comparison with strings."""
⋮----
def __hash__(self) -> int
⋮----
"""Override this to ensure hashability of the enum,
        so it can be used sets and dictionary keys.
        """
⋮----
class DocMetaData(BaseModel)
⋮----
"""Metadata for a document."""
⋮----
source: str = "context"  # just reference
source_content: str = "context"  # reference and content
title: str = "Unknown Title"
published_date: str = "Unknown Date"
is_chunk: bool = False  # if it is a chunk, don't split
id: str = Field(default_factory=lambda: str(uuid4()))
window_ids: List[str] = []  # for RAG: ids of chunks around this one
⋮----
@field_validator("id", mode="before")
@classmethod
    def convert_id_to_string(cls, v: Any) -> str
⋮----
"""Convert id to string if it's not already."""
⋮----
def dict_bool_int(self, *args: Any, **kwargs: Any) -> Dict[str, Any]
⋮----
"""
        Special dict method to convert bool fields to int, to appease some
        downstream libraries,  e.g. Chroma which complains about bool fields in
        metadata.
        """
original_dict = super().model_dump(*args, **kwargs)
⋮----
def __str__(self) -> str
⋮----
title_str = (
date_str = ""
⋮----
# Try to parse the date string
date_obj = parser.parse(self.published_date)
# Format to include only the date part (year-month-day)
date_only = date_obj.strftime("%Y-%m-%d")
date_str = f"Date: {date_only}"
⋮----
# If parsing fails, just use the original date
date_str = f"Date: {self.published_date}"
components = [self.source] + (
⋮----
model_config = ConfigDict(extra="allow")
⋮----
class Document(BaseModel)
⋮----
"""Interface for interacting with a document."""
⋮----
content: str
metadata: DocMetaData
⋮----
def id(self) -> str
⋮----
class NonToolAction(str, Enum)
⋮----
"""
    Possible options to handle non-tool msgs from LLM.
    """
⋮----
FORWARD_USER = "user"  # forward msg to user
DONE = "done"  # task done
</file>

<file path="release-notes/v0-56-0-task-tool.md">
# Release Notes: TaskTool

## New Feature: `TaskTool` for Spawning Sub-Agents

We've added `TaskTool`, a new tool that enables agents to spawn sub-agents for handling specific tasks. This allows for dynamic task delegation with controlled tool access.

### Key Features:
- Agents can spawn sub-agents with specific tools and configurations
- Sub-agents run non-interactively and return results to the parent
- Supports nested operations and recursive task delegation
- Flexible tool access: delegate specific tools, all tools, or no tools

### Example Usage:
```python
# Agent spawns a sub-agent to handle a calculation
{
    "request": "task_tool",
    "system_message": "You are a calculator. Use multiply_tool to compute products.",
    "prompt": "Calculate 5 * 7",
    "tools": ["multiply_tool"],
    "model": "gpt-4o-mini"
}
```

### Documentation:
Full documentation with examples: [TaskTool Documentation](https://langroid.github.io/langroid/notes/task-tool/)

### Testing:
See working examples in [`tests/main/test_task_tool.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_task_tool.py)

## Update v0.56.3: Agent Naming Support

### Enhancement:
- Added optional `agent_name` parameter to TaskTool
- Sub-agents can now be given custom names for better logging and debugging
- If not specified, auto-generates unique names in format `agent-{uuid}`

### Example:
```python
{
    "request": "task_tool",
    "system_message": "You are a calculator.",
    "prompt": "Calculate 5 * 7",
    "tools": ["multiply_tool"],
    "agent_name": "calculator-agent"  # Optional: custom name for logging
}
```
</file>

<file path="release-notes/v0-56-11-openai-client-caching.md">
# Release Notes - v0.56.11

## OpenAI Client Connection Management

### HTTP Client Caching
- Implements intelligent client caching for OpenAI and compatible APIs (Groq, Cerebras, etc.)
- Agents with identical configurations now share underlying HTTP clients
- Prevents "too many open files" errors when creating many agents (e.g., 100 agents for 100 data rows)
- Thread-safe implementation allows safe client sharing across threads

### Performance Improvements
- Reduced latency through connection reuse
- Eliminates redundant TCP handshakes
- Decreases CPU usage and network round-trips
- Leverages httpx's built-in connection pooling

### Configuration
- New `use_cached_client` parameter in `OpenAIGPTConfig` (enabled by default)
- Can be disabled for specific use cases:
  ```python
  config = OpenAIGPTConfig(
      chat_model="gpt-4",
      use_cached_client=False  # Disable caching
  )
  ```

### When to Disable Client Caching
- Multiprocessing environments (each process needs its own client)
- When complete client isolation is required between agents
- Debugging client-related issues
- Legacy code that depends on unique client instances

### Technical Implementation
- SHA256-based cache key generation for configuration uniqueness
- Singleton pattern with lazy initialization
- Automatic cleanup via atexit hooks
- Compatible with both sync and async OpenAI clients
</file>

<file path="release-notes/v0-56-12-cached-tokens-support.md">
# v0.56.12: Cached Tokens Support

## Overview
This release adds support for tracking cached tokens in LLM API responses, enabling accurate cost calculations when using prompt caching features (e.g., OpenAI's prompt caching).

## Key Features

### 1. Cached Token Tracking
- Added `cached_tokens` field to `LLMTokenUsage` class
- Properly extracts cached token counts from OpenAI API responses (both streaming and non-streaming)
- Updated token usage string representation to show cached tokens

### 2. Cost Calculation Updates
- Enhanced cost calculation formula: `(prompt - cached) * input_cost + cached * cached_cost + completion * output_cost`
- Added `cached_cost_per_million` field to `ModelInfo` for all supported models
- Cached token costs typically 25-50% of regular input token costs

### 3. New Model Support
- **Gemini 2.5 Pro**: 1M context, $1.25/$0.31/$10.00 per million tokens
- **Gemini 2.5 Flash**: 1M context, $0.30/$0.075/$2.50 per million tokens
- **Gemini 2.5 Flash Lite Preview**: 64K context, $0.10/$0.025/$0.40 per million tokens

## Code Changes

### Updated Methods
- `compute_token_cost()` in `Agent` class now accepts cached token parameter
- `chat_cost()` returns 3-tuple: (input_cost, cached_cost, output_cost) per 1000 tokens
- `_cost_chat_model()` in OpenAI implementation properly accounts for cached tokens

### API Response Handling
```python
# Cached tokens extracted from OpenAI responses:
cached_tokens = usage.get("prompt_tokens_details", {}).get("cached_tokens", 0)
```

## Testing
- Added comprehensive tests for cached token tracking
- Verified cost calculations with cached tokens
- All existing tests pass without modification

## Breaking Changes
None - all changes are backward compatible.

## Credits
Original implementation by @alexagr in PR #882, with enhancements in PR #884.
</file>

<file path="release-notes/v0-56-13-done-sequences-parent-chain-fixes.md">
# v0.56.13: DoneSequences Parent Chain and Agent ID Fixes

## Summary

This release fixes critical issues with the DoneSequence implementation, parent pointer chain preservation in TaskTool, and agent ID initialization. These fixes ensure that task termination sequences work correctly with subtasks and that message lineage is properly maintained across agent boundaries.

## Key Fixes

### 1. Agent ID Initialization Fix

**Problem**: The `Agent.id` field was incorrectly returning a `FieldInfo` object instead of an actual ID string because `Agent` is not a Pydantic model but was using Pydantic's `Field` syntax.

**Solution**: Added proper ID initialization in `Agent.__init__()`:
```python
self.id = ObjectRegistry.new_id()  # Initialize agent ID
```

**Impact**: This ensures that `agent_id` is correctly set in `ChatDocument` metadata, which is crucial for tracking which agent owns which messages.

### 2. DoneSequence Message Chain Fix

**Problem**: The `_get_message_chain` method in `Task` was traversing parent pointers to build the message chain. When subtasks are involved, parent pointers can cross agent boundaries, incorrectly including messages from subtask agents in the parent task's chain.

**Solution**: Replaced parent pointer traversal with agent message history:
```python
def _get_message_chain(self, msg: ChatDocument | None, max_depth: Optional[int] = None) -> List[ChatDocument]:
    """Get the chain of messages using agent's message history."""
    # Get chat document IDs from message history
    doc_ids = [m.chat_document_id for m in self.agent.message_history 
               if m.chat_document_id]
    
    # Add current message ID if it exists and is not already the last one
    if msg:
        msg_id = msg.id()
        if not doc_ids or doc_ids[-1] != msg_id:
            doc_ids.append(msg_id)
    
    # Take only the last max_depth elements
    relevant_ids = doc_ids[-max_depth:]
    
    # Convert IDs to ChatDocuments
    return [doc for doc_id in relevant_ids 
            if (doc := ChatDocument.from_id(doc_id)) is not None]
```

**Impact**: DoneSequences now correctly check only messages from the current agent, preventing incorrect task termination when subtasks generate matching sequences.

### 3. Parent Pointer Preservation in Task.init()

**Problem**: When `ChatDocument.deepcopy()` is called during `task.init()`, it resets `parent_id` and `child_id` to empty strings, breaking the parent chain. This particularly affected TaskTool when creating subtasks with parent pointers.

**Solution**: Modified `task.init()` to preserve the original parent_id after deepcopy:
```python
if isinstance(msg, ChatDocument):
    original_parent_id = msg.metadata.parent_id
    self.pending_message = ChatDocument.deepcopy(msg)
    # Preserve the parent pointer from the original message
    self.pending_message.metadata.parent_id = original_parent_id
```

Additionally, added conditional logic to only override parent_id when necessary:
```python
if self.pending_message is not None and self.caller is not None:
    # Only override parent_id if it wasn't already set in the original message
    if not msg.metadata.parent_id:
        self.pending_message.metadata.parent_id = msg.metadata.id
```

**Impact**: Parent chains are now preserved when TaskTool creates subtasks, maintaining proper message lineage.

### 4. TaskTool Parent-Child Relationship

**Problem**: TaskTool was only setting the parent pointer on the prompt ChatDocument but not the corresponding child pointer on the TaskTool message.

**Solution**: Added bidirectional parent-child relationship in TaskTool handlers:
```python
if chat_doc is not None:
    prompt_doc = ChatDocument(
        content=self.prompt,
        metadata=ChatDocMetaData(
            parent_id=chat_doc.id(),
            agent_id=agent.id,
            sender=chat_doc.metadata.sender,
        )
    )
    # Set bidirectional parent-child relationship
    chat_doc.metadata.child_id = prompt_doc.id()
```

**Impact**: Complete bidirectional parent-child chains are maintained, improving message traceability.

## Tests Added

Added comprehensive test `test_task_init_preserves_parent_id()` in `test_task.py` that verifies:
- Parent IDs are preserved during ChatDocument deep copying
- Conditional parent_id override logic works correctly for subtasks
- Parent chains are maintained in various scenarios

## Breaking Changes

None. All changes are backward compatible bug fixes.

## Migration Guide

No migration needed. The fixes will automatically apply to existing code.

## Technical Details

The core issue was that DoneSequences were incorrectly checking messages across agent boundaries due to parent pointer traversal. Combined with the agent ID initialization bug and parent chain breaks in deepcopy, this caused incorrect task termination behavior. The fixes ensure:

1. Each agent's messages are properly tagged with the agent's ID
2. DoneSequence checking is confined to the current agent's message history
3. Parent chains are preserved through TaskTool subtask creation
4. Bidirectional parent-child relationships are maintained

These changes work together to ensure proper message lineage and task termination behavior in multi-agent systems.
</file>

<file path="release-notes/v0-56-15-response-sequence-tracking.md">
# v0.56.14 - Response Sequence Tracking for DoneSequence

## Overview
Improved DoneSequence implementation by introducing response sequence tracking at the Task level, replacing the previous approach that relied on parent pointer traversal or agent message history.

## Changes

### Task Response Sequence Tracking
- Added `response_sequence: List[ChatDocument]` to track messages as the task executes
- Messages are added to the sequence after each `step()` in the `run()` method
- Duplicate messages are prevented by checking if the pending message ID differs from the last element

### Simplified Message Chain Retrieval
- `_get_message_chain()` now simply returns the last N elements from `response_sequence`
- Eliminates complexity of parent pointer traversal and agent boundary issues
- More efficient and reliable message chain tracking

## Benefits
- Better encapsulation: Task maintains its own response sequence
- More explicit control over what gets added to the sequence
- Cleaner implementation without reaching into agent internals
- Fixes issues with DoneSequence incorrectly including messages from subtask agents

## Testing
All existing done sequence tests pass without modification, confirming backward compatibility.
</file>

<file path="release-notes/v0-56-2-table-chat-fix.md">
# Release Notes - v0.56.2

## TableChatAgent Enhancement: Data Cleaning Support with `df.assign()`

### Overview
This release enhances the TableChatAgent to better support data cleaning operations while maintaining security. Users can now perform column transformations using the safe `df.assign()` method.

### Key Changes

#### 1. Enabled `df.assign()` Method
- Added `assign` to the whitelist of allowed DataFrame methods
- Provides a secure way to create modified DataFrames without allowing arbitrary assignments
- Maintains the existing security model while enabling common data cleaning tasks

#### 2. Improved Agent Guidance
- Updated system message to proactively explain that assignment statements (`df['col'] = ...`) are not allowed
- Clear guidance to use `df.assign()` for data modifications
- Agent now correctly uses `df.assign()` on first attempt, avoiding error-correction cycles

### Example Usage
When asked to clean data, the agent will now use:
```python
df.assign(airline=df['airline'].str.replace('*', ''))
```
Instead of attempting:
```python
df['airline'] = df['airline'].str.replace('*', '')  # This would fail
```

### Security Considerations
- The `assign` method is safe as it returns a new DataFrame without side effects
- Cannot be used for arbitrary code execution, file I/O, or network access
- Expressions passed to `assign` still go through the same sanitization process
- Maintains the eval-only security model (no exec)

### Testing
- Added comprehensive test coverage for self-correction behavior
- Verified agent successfully handles data cleaning requests

This addresses issue #867 and improves the TableChatAgent's utility for data cleaning workflows.
</file>

<file path="release-notes/v0-56-4-handler-params.md">
# Langroid v0.56.4 Release Notes

## Improved Handler Parameter Analysis for Tool Messages

### Overview
This release enhances the internal mechanism for analyzing handler method parameters in `ToolMessage` handlers, providing more robust and accurate type detection.

### Key Improvements

#### Direct Type Checking for Handler Parameters
- **Agent parameter detection**: Now uses direct class checking with `inspect.isclass()` and `issubclass()` for more accurate detection of Agent-typed parameters
- **ChatDocument detection**: Uses direct identity comparison (`param.annotation is ChatDocument`) for exact type matching
- **Complex type support**: Maintains fallback to string-based detection for complex generic types like `Optional[Agent]`

#### Better Parameter Extraction
- Improved the method for removing the `self` parameter from handler signatures using index slicing instead of name-based filtering
- More reliable parameter analysis for both synchronous and asynchronous handlers

### Why This Matters
These improvements make handler parameter detection more robust, especially when working with:
- Subclasses of `Agent` 
- Tools that require specific agent or chat document context
- MCP (Model Context Protocol) tool handlers that use various parameter combinations

### Backward Compatibility
All existing handler patterns continue to work as before. The improvements are internal optimizations that enhance reliability without changing the API.

### Developer Impact
No code changes required. Handlers with type annotations like:
```python
def handle(self, agent: Agent, chat_doc: ChatDocument) -> str:
    ...
```
will benefit from more accurate parameter detection and routing.

### Related Changes
- Removed debug print statement from `_analyze_handler_params` method
- Enhanced test coverage for MCP tools with various handler signatures
</file>

<file path="release-notes/v0-56-6-doc-chat-refactor.md">
# v0.56.6: DocChatAgent Retrieval Configuration Refactor and Critical Fixes

## Summary
- Refactored retrieval parameter configuration in DocChatAgent for better clarity and control
- Fixed critical passages accumulation logic that could include incorrect documents in results
- Fixed reciprocal rank fusion (RRF) bias that unfairly penalized documents found by only one retrieval method
- Added intelligent configuration validation to prevent invalid retrieval setups
- Maintained backward compatibility with deprecated `n_similar_docs` parameter

## Changes Made

### 1. Moved Retrieval Parameters to Proper Location
- Added `n_relevant_chunks` and `n_similar_chunks` to `DocChatAgentConfig` where they logically belong
- Deprecated `n_similar_docs` in `ParsingConfig` (set to `None` by default)
- These parameters provide clearer semantics:
  - `n_similar_chunks`: number of chunks to retrieve by each method (semantic, BM25, fuzzy)
  - `n_relevant_chunks`: final number of chunks to return after all reranking

### 2. Backward Compatibility
- If users still set the deprecated `n_similar_docs` parameter, it will be used for both new parameters
- A deprecation warning is logged to encourage migration to the new parameters
- This ensures existing code continues to work while encouraging adoption of the new, clearer parameters

### 3. Added Smart Configuration Validation
The DocChatAgent initialization now includes intelligent validation to prevent invalid configurations:

#### Cross-Encoder and RRF Conflict Detection
- If both `cross_encoder_reranking_model` and `use_reciprocal_rank_fusion` are set, warns that RRF will be ignored
- Cross-encoder reranking takes precedence over RRF when both are configured

#### Automatic RRF Enablement
- Automatically enables RRF when all of the following conditions are met:
  - No cross-encoder reranking model is set
  - RRF is currently disabled
  - BM25 or fuzzy matching is enabled
  - `n_relevant_chunks` < `n_similar_chunks` × (number of retrieval methods)
- This prevents situations where multiple retrieval methods are used but there's no way to properly combine their results

### 4. Fixed Critical Passages Accumulation Logic
- Previously had a critical flaw where passages accumulation was inconsistent:
  - When using cross-encoder reranking, BM25 and fuzzy match results were appended to passages
  - But deduplication used `[id2doc[id] for id in id2doc.keys()]` which included ALL documents ever seen
  - This could incorrectly include documents from previous iterations not meant to be in the final result
- Fixed to properly handle passages accumulation:
  - When using RRF without cross-encoder: only collect ranks, don't accumulate passages
  - When using cross-encoder or neither RRF nor cross-encoder: properly accumulate passages
  - Ensures correct and consistent behavior across different configuration combinations

### 5. Fixed RRF Bias Issue
- Previously, documents not found by a retrieval method were assigned `float("inf")` as their rank
- This caused documents found by only one method to be unfairly penalized compared to documents found by multiple methods
- Now documents not found by a method get `max_rank = n_similar_chunks * retrieval_multiple`
- This ensures fair scoring while still giving some preference to documents found by multiple methods

Example of the bias that was fixed:
- Before: Document ranked #1 in semantic search only would score: 1/(1+c) ≈ 0.0164 (with c=60)
- Before: Document ranked #20 in all three methods would score: 3/(20+c) ≈ 0.0375
- The mediocre document would rank 2.3x higher despite being lower quality in each method
- After: The single-method document gets a fair chance by assigning reasonable ranks to missing methods

### 6. Updated Dependencies
- Updated all references throughout the codebase:
  - `DocChatAgent`: Uses new parameters throughout
  - `LanceDocChatAgent`: Updated to use `n_similar_chunks`
  - `ParsingConfig`: Made `n_similar_docs` Optional[int]
- Updated ruff pre-commit hook from v0.12.0 to v0.12.1

## Migration Guide

### Old Configuration
```python
config = DocChatAgentConfig(
    parsing=ParsingConfig(
        n_similar_docs=5  # This controlled both retrieval and final output
    )
)
```

### New Configuration
```python
config = DocChatAgentConfig(
    n_similar_chunks=5,    # Number of chunks each method retrieves
    n_relevant_chunks=3,   # Final number after reranking
    parsing=ParsingConfig(
        # n_similar_docs is deprecated, don't set it
    )
)
```

The new configuration provides more flexibility:
- You can retrieve more chunks initially (e.g., 10 per method)
- Then use reranking to select the best ones (e.g., top 3)
- This improves retrieval quality without increasing final context size

## Technical Details

### RRF Score Calculation (Fixed)
```python
# Old (biased) approach:
rank_semantic = id2_rank_semantic.get(id_, float("inf"))
rank_bm25 = id2_rank_bm25.get(id_, float("inf"))
rank_fuzzy = id2_rank_fuzzy.get(id_, float("inf"))

# New (fair) approach:
max_rank = self.config.n_similar_chunks * retrieval_multiple
rank_semantic = id2_rank_semantic.get(id_, max_rank)
rank_bm25 = id2_rank_bm25.get(id_, max_rank)
rank_fuzzy = id2_rank_fuzzy.get(id_, max_rank)
```

## Impact
- **Better Retrieval Quality**: The RRF fix ensures that high-quality documents found by a single method aren't unfairly discarded
- **Prevents Invalid Configurations**: Smart validation ensures users don't accidentally create setups that would produce poor results
- **Clearer Configuration**: Separating retrieval count from final output count provides more control
- **Fixes Critical Bug**: The passages accumulation fix prevents incorrect documents from appearing in results
- **Backward Compatible**: Existing code continues to work with deprecation warnings

## Related PR
- PR #874: https://github.com/langroid/langroid/pull/874
</file>

<file path="release-notes/v0-56-7-doc-chat-deprecation-fix.md">
# Release Notes for v0.56.7

## DocChatAgent Improvements

- Fixed test failures caused by deprecated `n_similar_docs` parameter interfering with `n_similar_chunks` and `n_relevant_chunks` settings
- Set `n_similar_docs` default to `None` to prevent backward compatibility code from overriding intended retrieval configurations
- Optimized reciprocal rank fusion passage selection using list slicing for better performance

## Bug Fixes

- Resolved issue where `n_similar_docs=4` (old default) was silently overriding test configurations that expected 3 chunks
</file>

<file path="release-notes/v0-56-8-task-tool-spawn-example.md">
# v0.56.8 Release Notes

## 🚀 New Features

### TaskTool Dynamic Sub-Agent Spawning Example

- Added `examples/basic/planner-workflow-spawn.py` demonstrating how to use `TaskTool` to dynamically spawn specialized sub-agents during execution
- Example shows a planner agent that solves multi-step math problems by spawning incremental and doubling agents as needed
- Showcases the power of dynamic agent creation without pre-defining sub-agents in the main script

## 🧪 Testing

- Added comprehensive tests for `TaskTool` including support for `tools="ALL"` option
- Enhanced test coverage for dynamic sub-agent spawning scenarios

## 🛠️ Development Improvements

### Ruff Auto-Fix for Examples

- Updated Makefile to run `ruff check examples/ --fix-only` to automatically fix code style issues in examples
- Removed F401 (unused imports) from ruff's ignore list to catch and fix unused imports
- Auto-fixed imports in 150+ example files for better code consistency
- Examples folder remains excluded from error reporting but benefits from automatic fixes

## 🔧 Configuration Changes

- Commented out flake8 in favor of ruff for linting (ruff is faster and covers all flake8 rules)
- Updated `pyproject.toml` to enable F401 checking
- Modified Makefile to add `--no-force-exclude` flag for ruff when processing examples
</file>

<file path="release-notes/v0-56-9-rrf-crossencoder-fixes.md">
# Release Notes - v0.56.9

## DocChatAgent Improvements

### Fixed Reciprocal Rank Fusion (RRF) Scoring
- Documents not found in a retrieval method now receive a rank of `max_rank + 1` instead of `max_rank`
- This ensures missing documents are properly penalized compared to documents that appear at the last position
- Improves the accuracy of RRF scoring when combining results from semantic search, BM25, and fuzzy matching

### Improved Cross-Encoder Reranking
- The `rerank_with_cross_encoder` method now only reorders passages without filtering
- Final selection of `n_relevant_chunks` is handled consistently in `get_relevant_chunks`
- This aligns cross-encoder behavior with other reranking methods (diversity, periphery)

### Simplified Conditional Logic
- Removed redundant checks for `cross_encoder_reranking_model` when `use_reciprocal_rank_fusion` is already being evaluated
- Clearer mutual exclusion between RRF and cross-encoder reranking
- Updated warning message for better clarity when both options are configured
</file>

<file path="release-notes/v0-58-0-crawl4ai-integration.md">
# Langroid Release 0.58.0

## 🎉 Major Features

### 🕷️ Crawl4AI Integration - Advanced Web Crawling with Browser Rendering

We're excited to introduce **Crawl4AI** as a new web crawling option in Langroid! This powerful crawler uses Playwright to render JavaScript-heavy websites, making it ideal for modern web applications.

#### Key Features:
- **Real Browser Rendering**: Handles dynamic content, SPAs, and JavaScript-heavy sites
- **No API Key Required**: Works locally without external dependencies
- **Multiple Extraction Strategies**:
  - CSS selector-based extraction for structured data
  - LLM-based extraction for unstructured content
  - Regex extraction for pattern matching
- **Advanced Markdown Generation**: Apply content filters to remove ads, sidebars, and irrelevant content
- **Deep Crawling**: Recursively crawl entire websites with customizable depth and filters
- **High Performance**: Optional LXML-based scraping for speed optimization

#### Installation:
```bash
pip install "langroid[crawl4ai]"
crawl4ai setup  # Note: Downloads Playwright browsers (~300MB, one-time)
crawl4ai doctor
```

#### Quick Example:
```python
from langroid.parsing.url_loader import URLLoader, Crawl4aiConfig

# Simple usage
config = Crawl4aiConfig()
loader = URLLoader(urls=["https://example.com"], crawler_config=config)
docs = loader.load()

# With extraction strategy
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy

css_strategy = JsonCssExtractionStrategy(schema={
    "name": "Articles",
    "baseSelector": "article",
    "fields": [
        {"name": "title", "selector": "h2", "type": "text"},
        {"name": "content", "selector": "p", "type": "text"}
    ]
})

config = Crawl4aiConfig(extraction_strategy=css_strategy)
loader = URLLoader(urls=["https://news.site.com"], crawler_config=config)
docs = loader.load()  # Returns structured JSON data
```

#### Using with DocChatAgent:
```python
# In chat_search.py or similar applications
python examples/docqa/chat_search.py -c crawl4ai
```

See the [full documentation](https://langroid.github.io/langroid/notes/crawl4ai/) for advanced usage including deep crawling, LLM-based extraction, and content filtering.

## 🔧 Improvements

### Enhanced URL Loader Framework
- Added `Crawl4aiConfig` to the URL loader configuration options
- Improved factory pattern to support multiple crawler backends
- Better separation between document URLs (PDF, DOCX) and web pages

### CLI Improvements
- `chat_search.py` now uses Fire instead of Typer for simpler CLI interface
- Updated help text to list all available crawlers: trafilatura, firecrawl, exa, crawl4ai

## 📚 Documentation
- Added comprehensive Crawl4AI documentation with examples
- Updated navigation in mkdocs.yml
- Added detailed examples in `examples/docqa/crawl4ai_examples.py`

## 🧪 Testing
- Added mocked tests for Crawl4AI functionality
- Added optional integration tests (skipped in CI to avoid Playwright download)
- Run integration tests locally with: `TEST_CRAWL4AI=1 pytest tests/main/test_url_loader.py::test_crawl4ai_integration`

## 🐛 Bug Fixes
- Fixed metadata extraction in crawl4ai implementation
- Improved error handling for missing crawl4ai dependencies
- Fixed import issues and duplicate code in examples

## 📦 Dependencies
- Added optional `crawl4ai>=0.6.3` dependency group
- No changes to core dependencies

## 🚀 Migration Guide
No breaking changes. To use the new Crawl4AI crawler:

1. Install the extra: `pip install "langroid[crawl4ai]"`
2. Run setup: `crawl4ai setup` (one-time Playwright download)
3. Use `Crawl4aiConfig()` instead of other crawler configs

## 🙏 Acknowledgments
Thanks to the contributors who helped improve this release, especially the integration of the powerful crawl4ai library for advanced web scraping capabilities.

---

**Full Changelog**: https://github.com/langroid/langroid/compare/v0.57.0...v0.58.0
</file>

<file path="release-notes/v0.57.0-html-logger.md">
# HTML Logger for Langroid Task System

## Summary
Added a new HTML logger that creates self-contained HTML files with collapsible log entries, providing an interactive way to navigate complex multi-agent conversations. The logger includes automatic refresh capabilities and persistent UI state management.

## Key Features
- **Self-contained HTML output**: Complete HTML files with embedded CSS and JavaScript
- **Collapsible entries**: Interactive expand/collapse for better navigation
- **Visual hierarchy**: Color-coded entities (USER, LLM, AGENT, SYSTEM)
- **Auto-refresh**: Pages refresh every 2 seconds to show new log entries
- **State persistence**: UI states preserved across refreshes using localStorage
- **Smart headers**: Two-line headers showing entity info and content preview
- **Tool display**: Collapsible tool sections with parameters and results

## Visual Design
- Dark theme with monospace font for consistency with terminal output
- Golden/amber header (#d4a017) with timestamp and log filename
- Color scheme:
  - USER: Blue (#00bfff)
  - LLM: Green (#00ff00)
  - AGENT: Orange (#ff9500)
  - SYSTEM: Gray (#888)
- Opacity-based importance indicators (1.0 for important, 0.4 for faded)

## Technical Implementation
- New `HTMLLogger` class in `langroid/utils/html_logger.py`
- Integration with existing task logging system via `init_loggers()`
- Configuration via `TaskConfig(enable_html_logging=True)`
- Automatic clickable file:// link generation at task start
- Proper HTML escaping for security
- Efficient streaming writes with flush() for immediate updates

## Testing
- Comprehensive test suite in `tests/main/test_html_logger.py`
- Tests for HTML generation, escaping, and task integration
- All existing tests pass with modifications
</file>

<file path="scripts/fix-pydantic-imports.sh">
#!/bin/bash

# Langroid currently has pydantic v2 compatibility, but internally uses v1,
# via langroid.pydantic_v1. However since `import pydantic` brings in v2,
# this script replaces all instances of 'from pydantic' and 'import pydantic' in
# Python files with 'from langroid.pydantic_v1' and 'import langroid.pydantic_v1'.
#
# It makes an exception if the line contains '# keep', and leaves the
# import untouched. Of course this should be used mainly in tests and examples,
# since we don't want to mix pydantic v1 and v2 within core langroid code.

# Define the directories to search in
directories=("langroid" "examples" "tests")

# Function to perform replacements and log changes
replace_and_log() {
    # Use find to locate all .py files in the specified directories, excluding .venv directories
    find "${directories[@]}" -type f -name '*.py' -not -path '*/.venv/*' | while read -r file; do
        # Check and replace lines starting with specific patterns
        if grep -q '^from pydantic ' "$file" && grep -v '# keep' "$file" | grep -q '^from pydantic '; then
            sed -i'' -e  '/^from pydantic .*# keep/!s/^from pydantic /from langroid.pydantic_v1 /' "$file"
            echo "Replaced 'from pydantic ' in $file"
        fi
        if grep -q '^from pydantic.v1 ' "$file" && grep -v '# keep' "$file" | grep -q '^from pydantic.v1 '; then
            sed -i'' -e '/^from pydantic .*# keep/s/^from pydantic.v1 /from langroid.pydantic_v1 /' "$file"
            echo "Replaced 'from pydantic.v1 ' in $file"
        fi
        if grep -q '^import pydantic' "$file" && grep -v '# keep' "$file" | grep -q '^import pydantic'; then
            sed -i'' -e '/^from pydantic .*# keep/!s/^import pydantic/import langroid.pydantic_v1/' "$file"
            echo "Replaced 'import pydantic' in $file"
        fi
    done
}

# Call the function to perform the replacements and logging
replace_and_log
</file>

<file path=".blackignore">
./examples/urlqa/chat-clear.py
</file>

<file path=".coveragerc">
[run]
source = langroid
omit =
    langroid/prompts/*
    langroid/language_models/utils.py
	langroid/parsing/para_sentence_split.py


[html]
directory = coverage_html_report
</file>

<file path=".env-template">
OPENAI_API_KEY=your-key-here-without-quotes
GITHUB_ACCESS_TOKEN=your-personal-access-token-no-quotes
CACHE_TYPE=redis # or momento
REDIS_PASSWORD=your-redis-password-no-quotes
REDIS_HOST=your-redis-hostname-no-quotes
REDIS_PORT=your-redis-port-no-quotes
MOMENTO_AUTH_TOKEN=your-momento-auth-token-no-quotes
QDRANT_API_KEY=your-key
QDRANT_API_URL=https://your.url.here:6333 # note port number must be included
AZURE_OPENAI_API_KEY=your-azure-openai-key-here-without-quotes
AZURE_OPENAI_API_BASE=https://endpoint.openai.azure.com/
AZURE_OPENAI_API_VERSION=2023-05-15
AZURE_OPENAI_DEPLOYMENT_NAME=deployment-name-of-your-model
AZURE_OPENAI_MODEL_NAME=gpt-35-turbo-16k # change according to your setup, remove this comment
AZURE_OPENAI_MODEL_VERSION=1106-Preview # is needed if the model name is `gpt-4`
NEO4J_USERNAME=typically neo4j
NEO4J_PASSWORD=your-neo4j-password
NEO4J_URI=uri-to-access-neo4j-dayabase
NEO4J_DATABASE=typically neo4j
EXA_API_KEY=your-exa-search-key
LANGDB_API_KEY=your-langdb-api-key
LANGDB_PROJECT_ID=your-langdb-project-id
</file>

<file path=".gitignore">
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

.logs/
**/logs/
**/*.log
.idea/
.qdrant/
.DS_Store

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
**/*.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# Chainlit
.chainlit/

.vscode

# Emacs
*~
\#*\#
.\#*
sessions/
commands/
.claude/

# Temporary files
to-be-deleted.md
test_debug.py
test_minimal.py
test_debug_full.py
test_agent_difference.py
test_isolated.py
</file>

<file path="bump_version.sh">
#!/bin/sh
cz bump --increment $1
git commit pyproject.toml -m "Bump version"
cz version -p | cut -d' ' -f2
</file>

<file path="chainlit.md">
# Welcome to Langroid 👋

![Langroid](public/langroid-card.png)

---
When it is your turn to enter a message, you can do one of two things:
- write `c` to tell the agent to continue,
    - This is provided as a safeguard against infinite loops, or to prevent a large 
    amount of text to be sent to the LLM (which can be costly + slow). 
    If you simply want to continue with normal operation, just enter c.
- write a response, question or feedback to the agent, depending on context.
</file>

<file path="CLAUDE.md">
# CLAUDE.md

This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.

## Commands

### Development
- Install core dependencies: `pip install -e .`
- Install dev dependencies: `pip install -e ".[dev]"`
- Install specific feature groups:
  - Document chat features: `pip install -e ".[doc-chat]"`
  - Database features: `pip install -e ".[db]"`
  - HuggingFace embeddings: `pip install -e ".[hf-embeddings]"`
  - All features: `pip install -e ".[all]"`
- Run linting and type checking: `make check`
- Format code: `make lint`

### Testing
- Run all tests: `pytest tests/`
- Run specific test: `pytest tests/main/test_file.py::test_function`
- Run tests with coverage: `pytest --cov=langroid tests/`
- Run only main tests: `make tests` (uses `pytest tests/main`)

### Linting and Type Checking
- Lint code: `make check` (runs black, ruff check, mypy)
- Format only: `make lint` (runs black and ruff fix)
- Type check only: `make type-check`
- Always use `make check` to run lints + mypy before trying to commit changes

### Version and Release Management
- Bump version: `./bump_version.sh [patch|minor|major]`
- Or use make commands:
  - `make all-patch` - Bump patch version, build, push, release
  - `make all-minor` - Bump minor version, build, push, release
  - `make all-major` - Bump major version, build, push, release

## Architecture

Langroid is a framework for building LLM-powered agents that can use tools and collaborate with each other.

### Core Components:

1. **Agents** (`langroid/agent/`):
   - `chat_agent.py` - Base ChatAgent that can converse and use tools
   - `task.py` - Handles execution flow for agents
   - `special/` - Domain-specific agents (doc chat, table chat, SQL chat, etc.)
   - `openai_assistant.py` - Integration with OpenAI Assistant API

2. **Tools** (`langroid/agent/tools/`):
   - Tool system for agents to interact with external systems
   - `tool_message.py` - Protocol for tool messages
   - Various search tools (Google, DuckDuckGo, Tavily, Exa, etc.)

3. **Language Models** (`langroid/language_models/`):
   - Abstract interfaces for different LLM providers
   - Implementations for OpenAI, Azure, local models, etc.
   - Support for hundreds of LLMs via LiteLLM

4. **Vector Stores** (`langroid/vector_store/`):
   - Abstract interface and implementations for different vector databases
   - Includes support for Qdrant, Chroma, LanceDB, Pinecone, PGVector, Weaviate

5. **Document Processing** (`langroid/parsing/`):
   - Parse and process documents from various formats
   - Chunk text for embedding and retrieval
   - Support for PDF, DOCX, images, and more

6. **Embedding Models** (`langroid/embedding_models/`):
   - Abstract interface for embedding generation
   - Support for OpenAI, HuggingFace, and custom embeddings

### Key Multi-Agent Patterns:

- **Task Delegation**: Agents can delegate tasks to other agents through hierarchical task structures
- **Message Passing**: Agents communicate by transforming and passing messages
- **Collaboration**: Multiple agents can work together on complex tasks

### Key Security Features:

- The `full_eval` flag in both `TableChatAgentConfig` and `VectorStoreConfig` controls code injection protection
- Defaults to `False` for security, set to `True` only in trusted environments

## Documentation

- Main documentation is in the `docs/` directory
- Examples in the `examples/` directory demonstrate usage patterns
- Quick start examples available in `examples/quick-start/`

## MCP (Model Context Protocol) Tools Integration

Langroid provides comprehensive support for MCP tools through the `langroid.agent.tools.mcp` module. Here are the key patterns and approaches:

### MCP Tool Creation Methods

#### 1. Using the `@mcp_tool` Decorator (Module Level)
```python
from langroid.agent.tools.mcp import mcp_tool
from fastmcp.client.transports import StdioTransport

transport = StdioTransport(command="...", args=[...])

@mcp_tool(transport, "tool_name")
class MyTool(lr.ToolMessage):
    async def handle_async(self):
        result = await self.call_tool_async()
        # custom processing
        return result
```

**Important**: The decorator creates the transport connection at module import time, so it must be used at module level (not inside async functions).

#### 2. Using `get_tool_async` (Inside Async Functions)
```python
from langroid.agent.tools.mcp.fastmcp_client import get_tool_async

async def main():
    transport = StdioTransport(command="...", args=[...])
    BaseTool = await get_tool_async(transport, "tool_name")
    
    class MyTool(BaseTool):
        async def handle_async(self):
            result = await self.call_tool_async()
            # custom processing
            return result
```

**Use this approach when**:
- Creating tools inside async functions
- Need to avoid event loop conflicts
- Want to delay transport creation until runtime

### Transport Types and Event Loop Considerations

- **StdioTransport**: Creates subprocess immediately, can cause "event loop closed" errors if created at module level in certain contexts
- **SSETransport**: HTTP-based, generally safer for module-level creation
- **Best Practice**: Create transports inside async functions when possible, use `asyncio.run()` wrapper for Fire CLI integration

### Tool Message Request Field and Agent Handlers

When you get an MCP tool named "my_tool", Langroid automatically:

1. **Sets the `request` field**: The dynamically created ToolMessage subclass has `request = "my_tool"`
2. **Enables custom agent handlers**: Agents can define these methods:
   - `my_tool()` - synchronous handler
   - `my_tool_async()` - async handler

The agent's message routing system automatically calls these handlers when the tool is used.

### Custom `handle_async` Method Override

Both decorator and non-decorator approaches support overriding `handle_async`:

```python
class MyTool(BaseTool):  # or use @mcp_tool decorator
    async def handle_async(self):
        # Get raw result from MCP server
        result = await self.call_tool_async()
        
        # Option 1: Return processed result to LLM (continues conversation)
        return f"<ProcessedResult>{result}</ProcessedResult>"
        
        # Option 2: Return ResultTool to terminate task
        return MyResultTool(answer=result)
```

### Common Async Issues and Solutions

**Problem**: "RuntimeError: asyncio.run() cannot be called from a running event loop"
**Solution**: Use `get_tool_async` instead of `@mcp_tool` decorator when already in async context

**Problem**: "RuntimeError: Event loop is closed"
**Solution**: 
- Move transport creation inside async functions
- Use `asyncio.run()` wrapper for Fire CLI integration:
```python
if __name__ == "__main__":
    import asyncio
    def run_main(**kwargs):
        asyncio.run(main(**kwargs))
    Fire(run_main)
```

### MCP Tool Integration Examples

See `examples/mcp/` for working examples:
- `gitmcp.py` - HTTP-based SSE transport
- `pyodide_code_executor.py` - Subprocess-based stdio transport with proper async handling

## Testing and Tool Message Patterns

### MockLM for Testing Tool Generation
- Use `MockLM` with `response_dict` to simulate LLM responses that include tool messages
- Set `tools=[ToolClass]` or `enable_message=[ToolClass]` on the agent to enable tool handling
- The `try_get_tool_messages()` method can extract tool messages from LLM responses with `all_tools=True`

### Task Termination Control
- `TaskConfig` has `done_if_tool` parameter to terminate tasks when any tool is generated
- `Task.done()` method checks `result.agent_response` for tool content when this flag is set
- Useful for workflows where tool generation signals task completion

### Testing Tool-Based Task Flows
```python
# Example: Test task termination on tool generation
config = TaskConfig(done_if_tool=True)
task = Task(agent, config=config)
response_dict = {"content": '{"request": "my_tool", "param": "value"}'}
```

## Multi-Agent System Development

### Important Patterns and Best Practices

#### 1. Pydantic Imports
**ALWAYS import Pydantic classes from `langroid.pydantic_v1`**, not from `pydantic` directly:
```python
# CORRECT
from langroid.pydantic_v1 import Field, BaseModel

# WRONG - will cause issues
from pydantic import Field, BaseModel
```

#### 2. Tool Name References in System Messages
When referencing tool names in f-strings within system messages, use the `.name()` method:
```python
system_message: str = f"""
Use {MyTool.name()} to perform the action.
"""
```
This works at module level in configs, but be aware that complex initialization at module level can sometimes cause issues.

#### 3. Agent Configuration with LLM
Always specify the LLM configuration explicitly in agent configs:
```python
class MyAgentConfig(lr.ChatAgentConfig):
    name: str = "MyAgent"
    llm: lm.OpenAIGPTConfig = lm.OpenAIGPTConfig(
        chat_model="gpt-4",  # or "gpt-4.1" etc.
    )
    system_message: str = "..."
```

#### 4. Tool Organization in Multi-Agent Systems
When tools delegate to agents:
- Define agent configs and agents BEFORE the tools that use them
- Tools can directly instantiate agents in their `handle()` methods:
```python
class MyTool(lr.ToolMessage):
    def handle(self) -> str:
        agent = MyAgent(MyAgentConfig())
        task = lr.Task(agent, interactive=False)
        result = task.run(prompt)
        return result.content
```

#### 5. Task Termination with Done Sequences
Use `done_sequences` for precise task termination control:
```python
# For a task that should complete after: Tool -> Agent handles -> LLM responds
task = lr.Task(
    agent,
    interactive=False,
    config=lr.TaskConfig(done_sequences=["T,A,L"]),
)
```

Common patterns:
- `"T,A"` - Tool used and handled by agent
- `"T,A,L"` - Tool used, handled, then LLM responds
- `"T[specific_tool],A"` - Specific tool used and handled

See `docs/notes/task-termination.md` for comprehensive documentation.

#### 6. Handling Non-Tool LLM Responses
Use `handle_llm_no_tool` in agent configs to handle cases where the LLM forgets to use a tool:
```python
class MyAgentConfig(lr.ChatAgentConfig):
    handle_llm_no_tool: str = "You FORGOT to use one of your TOOLs!"
```

#### 7. Agent Method Parameters
Note that `ChatAgentConfig` does not have a `use_tools` parameter. Instead, enable tools on the agent after creation:
```python
agent = MyAgent(config)
agent.enable_message([Tool1, Tool2, Tool3])  # Pass list of tool classes
```

## Commit and Pull Request Guidelines

- Never include "co-authored by Claude Code" or "created by Claude" in commit messages or pull request descriptions

## Codecov Badge Fix (June 2025)

- Fixed broken Codecov badge in README by removing the token parameter from the URL
- Changed from `https://codecov.io/gh/langroid/langroid/branch/main/graph/badge.svg?token=H94BX5F0TE` to `https://codecov.io/gh/langroid/langroid/graph/badge.svg`
- Tokens are not needed for public repositories and can cause GitHub rendering issues
</file>

<file path="CODE_OF_CONDUCT.md">
# Contributor Covenant Code of Conduct

## Our Pledge

We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, religion, or sexual identity
and orientation.

We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.

## Our Standards

Examples of behavior that contributes to a positive environment for our
community include:

* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
  and learning from the experience
* Focusing on what is best not just for us as individuals, but for the
  overall community

Examples of unacceptable behavior include:

* The use of sexualized language or imagery, and sexual attention or
  advances of any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email
  address, without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
  professional setting

## Enforcement Responsibilities

Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.

Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.

## Scope

This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
pchalasani@gmail.com.
All complaints will be reviewed and investigated promptly and fairly.

All community leaders are obligated to respect the privacy and security of the
reporter of any incident.

## Enforcement Guidelines

Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:

### 1. Correction

**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.

**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.

### 2. Warning

**Community Impact**: A violation through a single incident or series
of actions.

**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or
permanent ban.

### 3. Temporary Ban

**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.

**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.

### 4. Permanent Ban

**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior,  harassment of an
individual, or aggression toward or disparagement of classes of individuals.

**Consequence**: A permanent ban from any sort of public interaction within
the community.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.0, available at
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.

Community Impact Guidelines were inspired by [Mozilla's code of conduct
enforcement ladder](https://github.com/mozilla/diversity).

[homepage]: https://www.contributor-covenant.org

For answers to common questions about this code of conduct, see the FAQ at
https://www.contributor-covenant.org/faq. Translations are available at
https://www.contributor-covenant.org/translations.
</file>

<file path="CONTRIBUTING.md">
# Contributing to Langroid


Thank you for your interest in contributing to Langroid!
We want to fundamentally change how LLM applications are built, 
using Langroid's principled multi-agent framework. 
Together, let us build the future of LLM-apps!
We welcome contributions from everyone.

Below you will find guidelines and suggestions for contributing.
We explicitly designed Langroid with a transparent, flexible architecture to 
make it easier to build LLM-powered applications, as well as 
to make it easier to contribute to Langroid itself.
Feel free to join us on [Discord](https://discord.gg/ZU36McDgDs) 
for any questions or discussions.

# How can I Contribute?

There are many ways to contribute to Langroid. Here are some areas where you can help:

- Bug Reports
- Code Fixes
- Feature Requests
- Feature Implementations
- Documentation
- Testing
- UI/UX Improvements
- Translations
- Outreach

You are welcome to take on un-assigned open [issues](https://github.com/langroid/langroid/issues).

## Implementation Ideas

> **⚠️ Warning: The list of contribution ideas is not updated frequently
> and may be out of date.**  
> Please see the github issues for more up-to-date possibilities.


**INTEGRATIONS**

- Vector databases, e.g.:
    - [x] Qdrant
    - [x] Chroma
    - [x] LanceDB
    - [x] Pinecone 
    - [x] PostgresML (pgvector)
    - [x] Weaviate
    - [ ] Milvus 
    - [ ] Marqo 
    
- Other LLM APIs, e.g.: 
  - [ ] Anthropic 
  - [ ] Google 
  - [ ] Cohere
  
- Data Sources: 
  - [x] SQL DBs, 
  - [x] Neo4j knowledge-graph
  - [x] ArangoDB knowledge-graph
  - [ ] NoSQL DBs
- Query languages: GraphQL, ...


**SPECIALIZED AGENTS**

- [x] `SQLChatAgent`, analogous to `DocChatAgent`: adds ability to chat with SQL databases
- [x] `TableChatAgent`: adds ability to chat with a tabular dataset in a file. 
   This can derive from `RetrieverAgent`

**CORE LANGROID**

- [ ] **Long-running, loosely coupled agents, communicating over message queues**: Currently all agents run within a session,
  launched from a single script. Extend this so agents can run in different
  processes, machines, or envs or cloud, and communicate via message queues.
- [ ] **Improve observability:** we currently log all agent interactions into structured
  and unstructured forms. Add features on top, to improve inspection and
  diagnosis of issues.
- [ ] Implement a way to **backtrack** 1 step in a multi-agent task. 
For instance during a long multi-agent conversation, if we receive a bad response from the LLM,
when the user gets a chance to respond, they may insert a special code (e.g. `b`) so that 
the previous step is re-done and the LLM gets another chance to respond.
- [x] **Integrate LLM APIs:** There are a couple of libs that simulate OpenAI-like interface for other models: https://github.com/BerriAI/litellm
    and https://github.com/philschmid/easyllm. It would be useful to have Langroid work with these APIs.
- [ ] Implement Agents that communicate via REST APIs: Currently, all agents within 
the multi-agent system are created in a single script. 
We can remove this limitation, and add the ability to have agents running and 
listening to an end-point (e.g. a flask server). For example the LLM may 
generate a function-call or Langroid-tool-message, which the agent’s 
tool-handling method interprets and makes a corresponding request to an API endpoint. 
This request can be handled by an agent listening to requests at this endpoint, 
and the tool-handling method gets the result and returns it as the result of the handling method. 
This is roughly the mechanism behind OpenAI plugins, e.g. https://github.com/openai/chatgpt-retrieval-plugin

**DEMOS, POC, Use-cases**

- [ ] **Text labeling/classification:** Specifically do what this repo does: https://github.com/refuel-ai/autolabel, 
  but using Langroid instead of Langchain (which that repo uses).
- [ ] Data Analyst Demo: A multi-agent system that automates a data analysis workflow, e.g. 
feature-exploration, visualization, ML model training.
- [ ] Document classification based on rules in an unstructured “policy” document. 
    This is an actual use-case from a large US bank. The task is to classify 
    documents into categories “Public” or “Sensitive”. The classification must be 
    informed by a “policy” document which has various criteria. 
    Normally, someone would have to read the policy doc, and apply that to 
    classify the documents, and maybe go back and forth and look up the policy repeatedly. 
    This would be a perfect use-case for Langroid’s multi-agent system. 
    One agent would read the policy, perhaps extract the info into some structured form. 
    Another agent would apply the various criteria from the policy to the document in question, 
    and (possibly with other helper agents) classify the document, along with a detailed justification.

- [ ] Document classification and tagging: Given a collection of already labeled/tagged docs, 
which have been ingested into a vecdb (to allow semantic search), 
when given a new document to label/tag, we retrieve the most similar docs 
from multiple categories/tags from the vecdb and present these (with the labels/tags) 
as few-shot examples to the LLM, and have the LLM classify/tag the retrieved document.

- [ ] Implement the CAMEL multi-agent debate system : https://lablab.ai/t/camel-tutorial-building-communicative-agents-for-large-scale-language-model-exploration

- [ ] Implement Stanford’s Simulacra paper with Langroid.
Generative Agents: Interactive Simulacra of Human Behavior https://arxiv.org/abs/2304.03442

- [ ] Implement CMU's paper with Langroid.
Emergent autonomous scientific research capabilities of large language models https://arxiv.org/pdf/2304.05332.pdf

---

# Contribution Guidelines

## Set up dev env

We use [`uv`](https://docs.astral.sh/uv/getting-started/installation/)
to manage dependencies, and `python 3.11` for development.

First install `uv`, then create virtual env and install dependencies:

```bash
# clone this repo and cd into repo root
git clone ...
cd <repo_root>
# create a virtual env under project root, .venv directory
uv venv --python 3.11

# activate the virtual env
. .venv/bin/activate


# use uv to install dependencies (these go into .venv dir)
uv sync --dev 
```

Important note about dependencies management:
> As of version 0.33.0, we are starting to include the `uv.lock` file as part of 
> the repo. This ensures that all contributors are using the same versions of 
> dependencies. If you add a new dependency, `uv add` will automatically update 
> the `uv.lock` file. This will also update the `pyproject.toml` file.

To add packages, use `uv add <package-name>`. This will automatically
find the latest compatible version of the package and add it to `pyproject.
toml`. _Do not manually edit `pyproject.toml` to add packages._

## Set up environment variables (API keys, etc)

Copy the `.env-template` file to a new file `.env` and
insert secrets such as API keys, etc:
- OpenAI API key, Anthropic API key, etc.
- [Optional] GitHub Personal Access Token (needed by  PyGithub to analyze git repos;
  token-based API calls are less rate-limited).
- [Optional] Cache Configs
  - Redis : Password, Host, Port <br>
- Qdrant API key for the vector database.

```bash
cp .env-template .env
# now edit the .env file, insert your secrets as above
``` 

Currently only OpenAI models are supported. 
You are welcome to submit a PR to support other API-based or local models. 

## Run tests
To verify your env is correctly setup, run all tests using `make tests`.

## IMPORTANT: Please include tests, docs and possibly examples.

For any new features, please include:
- Tests in the `tests` directory (first check if there is a suitable test file to add to).
  _If fixing a bug, please add a regression test, i.e., 
   one which would have failed without your fix_
- A note in `docs/notes` folder, e.g. `docs/notes/weaviate.md` that is a
  (relatively) self-contained guide to using the feature, including any instructions
  on how to set up the environment or keys if needed. 
  See the [weaviate](https://langroid.github.io/langroid/notes/weaviate/) note as an example. Make sure you link to this note in the `mkdocs.yml` file under the `nav` section.
- Where possible and meaningful, add a simple example in the `examples` directory.

## Generate docs

Generate docs: `make docs`, then go to the IP address shown at the end, like
`http://127.0.0.1:8000/`
Note this runs a docs server in the background.
To stop it, run `make nodocs`. Also, running `make docs` next time will kill
any previously running `mkdocs` server.


## Coding guidelines

In this Python repository, we prioritize code readability and maintainability.
To ensure this, please adhere to the following guidelines when contributing:

1. **Type-Annotate Code:** Add type annotations to function signatures and
   variables to make the code more self-explanatory and to help catch potential
   issues early. For example, `def greet(name: str) -> str:`. We use [`mypy`](https://mypy.readthedocs.io/en/stable/) for
   type-checking, so please ensure your code passes `mypy` checks. 

2. **Google-Style Docstrings:** Use Google-style docstrings to clearly describe
   the purpose, arguments, and return values of functions. For example:

   ```python
   def greet(name: str) -> str:
       """Generate a greeting message.

       Args:
           name (str): The name of the person to greet.

       Returns:
           str: The greeting message.
       """
       return f"Hello, {name}!"
   ```

3. **PEP8-Compliant 80-Char Max per Line:** Follow the PEP8 style guide and keep
   lines to a maximum of 80 characters. This improves readability and ensures
   consistency across the codebase.

If you are using an LLM to write code for you, adding these
instructions will usually get you code compliant with the above:
```
use type-annotations, google-style docstrings, and pep8 compliant max 80 
     chars per line.
```     


By following these practices, we can create a clean, consistent, and
easy-to-understand codebase for all contributors. Thank you for your
cooperation!

## Submitting a PR

To check for issues locally, run `make check`, it runs linters `black`, `ruff`,
and type-checker `mypy`. It also installs a pre-commit hook, 
so that commits are blocked if there are style/type issues. The linting attempts to
auto-fix issues, and warns about those it can't fix.
(There is a separate `make lint` you could do, but that is already part of `make check`).
The `make check` command also looks through the codebase to see if there are any
direct imports from pydantic, and replaces them with importing from `langroid.pydantic_v1`
(this is needed to enable dual-compatibility with Pydantic v1 and v2).

So, typically when submitting a PR, you would do this sequence:
- run `make tests` or `pytest -xvs tests/main/my-specific-test.py` 
  - if needed use `-nc` means "no cache", i.e. to prevent using cached LLM API call responses
  - the `-xvs` option means "exit on first failure, verbose, show output"
- fix things so tests pass, then proceed to lint/style/type checks below.
- `make check` to see what issues there are (typically lints and mypy)
- manually fix any lint or type issues
- `make check` again to see what issues remain
- repeat if needed, until all clean.

When done with these, commit and push to github and submit the PR. If this
is an ongoing PR, just push to github again and the PR will be updated.

It is strongly recommended to use the `gh` command-line utility when working with git.
Read more [here](docs/development/github-cli.md).
</file>

<file path="Dockerfile">
FROM --platform=$TARGETPLATFORM python:3.11

# Set environment variables to non-interactive (this prevents some prompts)
ENV DEBIAN_FRONTEND=non-interactive \
    LANG=en_US.UTF-8 \
    LANGUAGE=en_US:en \
    LC_ALL=en_US.UTF-8

# Install necessary tools, zsh, and set up locale
RUN apt-get update && \
    apt-get install --no-install-recommends -y zsh wget git curl locales \
    libfreetype6-dev \
    libjpeg-dev \
    libopenjp2-7-dev \
    libssl-dev && \
    sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \
    locale-gen && \
    # Cleanup apt cache
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*

# Clone the langroid repository
RUN git clone https://github.com/langroid/langroid.git

# Set the working directory in the container
WORKDIR /langroid
RUN mv .env-template .env

RUN mkdir -p /root/.cache/uv

# workaround for pymupdf build error?
ENV MAKEFLAGS="-j1"
ENV PYTHONPYCACHEPREFIX="/tmp/pycache"
ENV DEBIAN_FRONTEND=non-interactive \
     LANG=en_US.UTF-8

# detect arch to customize pymupdf version
ARG TARGETPLATFORM
ARG TARGETARCH

# install uv then langroid
# Install uv and use it with cache mount
RUN --mount=type=cache,target=/root/.cache/uv,id=uv_cache \
    curl -LsSf https://astral.sh/uv/install.sh | sh && \
    export PATH="/root/.local/bin:$PATH" && \
    uv venv && \
    . .venv/bin/activate && \
    pip install --upgrade pip && \
    if [ "$TARGETARCH" = "arm64" ]; then \
         uv pip install --no-cache-dir "pymupdf==1.24.14"; \
     else \
         uv pip install --no-cache-dir "pymupdf>=1.25.3"; \
     fi && \
    uv pip install --no-cache-dir .

# Install oh-my-zsh and set up zsh configurations
RUN sh -c "$(wget https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh -O -)" || true && \
    sed -i -e 's/plugins=(git)/plugins=(git python)/' /root/.zshrc

CMD ["zsh"]
</file>

<file path="LICENSE">
MIT License

Copyright (c) 2023 langroid

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
</file>

<file path="Makefile">
.PHONY: setup check lint tests docs nodocs loc

SHELL := /bin/bash

.PHONY: setup update

setup: ## Setup the git pre-commit hooks
	uv run pre-commit install

update: ## Update the git pre-commit hooks
	uv run pre-commit autoupdate

.PHONY: type-check
type-check:
	@uv run pre-commit install
	@uv run pre-commit autoupdate
	@uv run pre-commit run --all-files
	@echo "Running black..."
	@uv run black --check .
	@echo "Running ruff check (without fix)..."
	@uv run ruff check .
	@echo "Running mypy...";
	@uv run mypy -p langroid
	@echo "All checks passed!"

.PHONY: lint
lint:
	uv run black .
	uv run ruff check . --fix
	@echo "Auto-fixing issues in examples folder..."
	@uv run ruff check examples/ --fix-only --no-force-exclude

.PHONY: stubs
stubs:
	@echo "Generating Python stubs for the langroid package..."
	@uv run stubgen -p langroid -o stubs
	@echo "Stubs generated in the 'stubs' directory"

.PHONY: tests
tests:
	pytest tests/main --basetemp=/tmp/pytest


docs:
	@# Kill any existing 'mkdocs serve' processes.
	@pkill -f "mkdocs serve" 2>/dev/null || true
	@# Build the documentation.
	mkdocs build
	@# Serve the documentation in the background.
	mkdocs serve &
	@echo "Documentation is being served in the background."
	@echo "You can access the documentation at http://127.0.0.1:8000/"

nodocs:
	@# Kill any existing 'mkdocs serve' processes.
	@pkill -f "mkdocs serve" 2>/dev/null || echo "No 'mkdocs serve' process found."
	@echo "Stopped serving documentation."


loc:
	@echo "Lines in git-tracked files python files:"
	@git ls-files | grep '\.py$$' | xargs cat | grep -v '^\s*$$' | wc -l

.PHONY: repomix repomix-no-tests repomix-all

repomix: ## Generate llms.txt and llms-compressed.txt (includes tests)
	@echo "Generating llms.txt (with tests)..."
	@git ls-files | repomix --stdin
	@echo "Generating llms-compressed.txt..."
	@git ls-files | repomix --stdin --compress -o llms-compressed.txt
	@echo "Generated llms.txt and llms-compressed.txt"

repomix-no-tests: ## Generate llms-no-tests.txt (excludes tests)
	@echo "Generating llms-no-tests.txt (without tests)..."
	@git ls-files | grep -v "^tests/" | repomix --stdin -o llms-no-tests.txt
	@echo "Generating llms-no-tests-compressed.txt..."
	@git ls-files | grep -v "^tests/" | repomix --stdin --compress -o llms-no-tests-compressed.txt
	@echo "Generated llms-no-tests.txt and llms-no-tests-compressed.txt"

repomix-no-tests-no-examples: ## Generate llms-no-tests-no-examples.txt (excludes tests and examples)
	@echo "Generating llms-no-tests-no-examples.txt (without tests and examples)..."
	@git ls-files | grep -v -E "^(tests|examples)/" | repomix --stdin -o llms-no-tests-no-examples.txt
	@echo "Generating llms-no-tests-no-examples-compressed.txt..."
	@git ls-files | grep -v -E "^(tests|examples)/" | repomix --stdin --compress -o llms-no-tests-no-examples-compressed.txt
	@echo "Generated llms-no-tests-no-examples.txt and llms-no-tests-no-examples-compressed.txt"

repomix-all: repomix repomix-no-tests repomix-no-tests-no-examples ## Generate all repomix variants

.PHONY: check
check: lint type-check repomix-all

.PHONY: revert-tag
revert-tag:
	@LATEST_TAG=$$(git describe --tags --abbrev=0) && \
	echo "Deleting tag: $$LATEST_TAG" && \
	git tag -d $$LATEST_TAG

.PHONY: revert-bump
revert-bump:
	@if git log -1 --pretty=%B | grep -q "bump"; then \
		git reset --hard HEAD~1; \
		echo "Reverted last commit (bump commit)"; \
	else \
		echo "Last commit was not a bump commit"; \
	fi

.PHONY: revert
revert: revert-bump revert-tag
	
.PHONY: bump-patch
bump-patch:
	@cz bump --increment PATCH

.PHONY: bump-minor
bump-minor:
	@cz bump --increment MINOR 

.PHONY: bump-major
bump-major:
	@cz bump --increment MAJOR 

.PHONY: build
build:
	@uv build

.PHONY: push
push:
	@git push origin main
	@git push origin --tags

.PHONY: clean
clean:
	-rm -rf dist/*

.PHONY: release
release:
	@VERSION=$$(cz version -p | cut -d' ' -f2) && gh release create $${VERSION} dist/*

.PHONY: bump-rc
bump-rc:
	@cz bump --prerelease rc

.PHONY: bump-beta
bump-beta:
	@cz bump --prerelease beta

.PHONY: bump-alpha
bump-alpha:
	@cz bump --prerelease alpha

.PHONY: all-patch
all-patch: bump-patch clean build push release

.PHONY: all-minor
all-minor: bump-minor clean build push release

.PHONY: all-major
all-major: bump-major clean build push release

.PHONY: all-rc
all-rc: bump-rc clean build push release

.PHONY: all-beta
all-beta: bump-beta clean build push release

.PHONY: all-alpha
all-alpha: bump-alpha clean build push release

.PHONY: pre-release-branch
pre-release-branch: ## Create and push pre-release from current branch
	@CURRENT_BRANCH=$$(git rev-parse --abbrev-ref HEAD) && \
	if [ "$$CURRENT_BRANCH" = "main" ]; then \
		echo "Error: Cannot create pre-release from main branch"; \
		exit 1; \
	fi && \
	PRERELEASE_TYPE=$${PRERELEASE_TYPE:-rc} && \
	cz bump --prerelease "$$PRERELEASE_TYPE" && \
	VERSION=$$(cz version -p | cut -d' ' -f2) && \
	echo "Creating pre-release $$VERSION from branch $$CURRENT_BRANCH" && \
	git push origin "$$CURRENT_BRANCH" --tags && \
	gh release create "$$VERSION" dist/* --target "$$CURRENT_BRANCH" --prerelease --title "Pre-release $$VERSION" --notes "Experimental pre-release from $$CURRENT_BRANCH"

.PHONY: pre-release-rc
pre-release-rc: ## Create release candidate from current branch
	@PRERELEASE_TYPE=rc make pre-release-branch

.PHONY: pre-release-beta
pre-release-beta: ## Create beta release from current branch
	@PRERELEASE_TYPE=beta make pre-release-branch

.PHONY: pre-release-alpha
pre-release-alpha: ## Create alpha release from current branch
	@PRERELEASE_TYPE=alpha make pre-release-branch

.PHONY: pre-release-push
pre-release-push: ## Push current branch and tags (for pre-releases)
	@CURRENT_BRANCH=$$(git rev-parse --abbrev-ref HEAD) && \
	if [ "$$CURRENT_BRANCH" = "main" ]; then \
		echo "Error: Cannot push pre-release from main branch"; \
		exit 1; \
	fi && \
	git push origin "$$CURRENT_BRANCH" --tags

.PHONY: pre-release-release
pre-release-release: ## Create GitHub pre-release (requires VERSION env var)
	@CURRENT_BRANCH=$$(git rev-parse --abbrev-ref HEAD) && \
	if [ "$$CURRENT_BRANCH" = "main" ]; then \
		echo "Error: Cannot create pre-release from main branch"; \
		exit 1; \
	fi && \
	VERSION=$$(cz version -p | cut -d' ' -f2) && \
	echo "Creating pre-release $$VERSION from branch $$CURRENT_BRANCH" && \
	gh release create "$$VERSION" dist/* --target "$$CURRENT_BRANCH" --prerelease --title "Pre-release $$VERSION" --notes "Experimental pre-release from $$CURRENT_BRANCH"

.PHONY: bump-rc-patch
bump-rc-patch: ## Bump to release candidate patch
	@cz bump --increment PATCH --prerelease rc

.PHONY: bump-rc-minor
bump-rc-minor: ## Bump to release candidate minor
	@cz bump --increment MINOR --prerelease rc

.PHONY: bump-rc-major
bump-rc-major: ## Bump to release candidate major
	@cz bump --increment MAJOR --prerelease rc

.PHONY: bump-beta-patch
bump-beta-patch: ## Bump to beta patch
	@cz bump --increment PATCH --prerelease beta

.PHONY: bump-beta-minor
bump-beta-minor: ## Bump to beta minor
	@cz bump --increment MINOR --prerelease beta

.PHONY: bump-alpha-patch
bump-alpha-patch: ## Bump to alpha patch
	@cz bump --increment PATCH --prerelease alpha

.PHONY: bump-alpha-minor
bump-alpha-minor: ## Bump to alpha minor
	@cz bump --increment MINOR --prerelease alpha

.PHONY: pre-release-rc-patch
pre-release-rc-patch: bump-rc-patch clean build pre-release-push pre-release-release

.PHONY: pre-release-rc-minor
pre-release-rc-minor: bump-rc-minor clean build pre-release-push pre-release-release

.PHONY: pre-release-rc-major
pre-release-rc-major: bump-rc-major clean build pre-release-push pre-release-release

.PHONY: pre-release-beta-patch
pre-release-beta-patch: bump-beta-patch clean build pre-release-push pre-release-release

.PHONY: pre-release-beta-minor
pre-release-beta-minor: bump-beta-minor clean build pre-release-push pre-release-release

.PHONY: pre-release-alpha-patch
pre-release-alpha-patch: bump-alpha-patch clean build pre-release-push pre-release-release

.PHONY: pre-release-alpha-minor
pre-release-alpha-minor: bump-alpha-minor clean build pre-release-push pre-release-release

.PHONY: publish
publish:
	uv publish
</file>

<file path="SECURITY.md">
# Security Policy

## ⚠️ Warning
**Always sanitize user input.**

Langroid executes Python code generated by Large Language Models (LLMs) (e.g., through `TableChatAgent` and `LanceDocChatAgent`). While this provides powerful data analysis capabilities, it can lead to unintended consequences if exposed unsafely. Malicious users may exploit LLM responses to execute harmful code, potentially resulting in sensitive data exposure, denial-of-service, or complete system compromise.

If your LLM application accepts untrusted input, implement input sanitization and sandboxing to mitigate these risks.

## Supported Versions

Security updates are supported on Langroid version >= 0.18.x

## Reporting a Vulnerability

If you discover a security vulnerability in this repository, **please report it privately**. Security issues should **not** be reported using GitHub Issues or any other public forum.

### How to Report Privately

To report a security vulnerability privately:

1. Go to the repository's **[Security Advisories](https://github.com/langroid/langroid/security/advisories)** section.
2. Click on **"Report a vulnerability"**.
3. Provide the necessary details about the vulnerability.

Your report will remain confidential, and we will respond as quickly as possible (usually within 48 hours) to evaluate the issue and work on a fix. We greatly appreciate your responsible disclosure.

Please **do not** report vulnerabilities through GitHub Issues, discussions, or other public channels as this could expose the issue to a wider audience before it is resolved.

### Security Fix Timeline

Once a security vulnerability is reported, we will work to:
- Acknowledge the report within 48 hours.
- Investigate and confirm the issue.
- Develop a patch or mitigation strategy.
- Publish the fix and disclose the advisory publicly after the resolution.
</file>

<file path="setup.cfg">
[flake8]
exclude = .*,.*/.*,.*/*,.*/*.*
max-line-length = 88
ignore = W291, W293, E501, E203, W503
</file>

<file path="docs/examples/guide.md">
# Guide to examples in `langroid-examples` repo

!!! warning "Outdated"
    This guide is from Feb 2024; there have been numerous additional examples
    since then. We recommend you visit the `examples` folder in the core `langroid`
    repo for the most up-to-date examples. These examples are periodically copied
    over to the `examples` folder in the `langroid-examples` repo.

The [`langroid-examples`](https://github.com/langroid/langroid-examples) repo
contains several examples of using
the [Langroid](https://github.com/langroid/langroid) agent-oriented programming 
framework for LLM applications.
Below is a guide to the examples. First please ensure you follow the
installation instructions in the `langroid-examples` repo README.

**At minimum a GPT4-compatible OpenAI API key is required.** As currently set
up, many of the examples will _not_ work with a weaker model. Weaker models may
require more detailed or different prompting, and possibly a more iterative
approach with multiple agents to verify and retry, etc — this is on our roadmap.

All the example scripts are meant to be run on the command line.
In each script there is a description and sometimes instructions on how to run
the script.

NOTE: When you run any script, it pauses for “human” input at every step, and
depending on the context, you can either hit enter to continue, or in case there
is a question/response expected from the human, you can enter your question or
response and then hit enter.

### Basic Examples
- [`/examples/basic/chat.py`](https://github.com/langroid/langroid-examples/blob/main/examples/basic/chat.py) This is a basic chat application.

    - Illustrates Agent task loop.

- [`/examples/basic/autocorrect.py`](https://github.com/langroid/langroid-examples/blob/main/examples/basic/autocorrect.py) Chat with autocorrect: type fast and carelessly/lazily and 
the LLM will try its best to interpret what you want, and offer choices when confused.

    - Illustrates Agent task loop.

- [`/examples/basic/chat-search.py`](https://github.com/langroid/langroid-examples/blob/main/examples/basic/chat-search.py)  This uses a `GoogleSearchTool` function-call/tool to answer questions using a google web search if needed.
  Try asking questions about facts known after Sep 2021 (GPT4 training cutoff),
  like  `when was llama2 released`

    - Illustrates Agent + Tools/function-calling + web-search

- [`/examples/basic/chat-search-seltz.py`](https://github.com/langroid/langroid/blob/main/examples/basic/chat-search-seltz.py) Similar to the above, but uses `SeltzSearchTool` for web search powered by [Seltz](https://seltz.ai/). Requires `SELTZ_API_KEY` and `pip install langroid[seltz]`. See [Seltz Search Tool docs](../notes/seltz_search.md) for setup details.

    - Illustrates Agent + Tools/function-calling + web-search via Seltz

- [`/examples/basic/chat-tree.py`](https://github.com/langroid/langroid-examples/blob/main/examples/basic/chat-tree.py) is a toy example of tree-structured multi-agent
  computation, see a detailed writeup [here.](https://langroid.github.io/langroid/examples/agent-tree/)
  
    - Illustrates multi-agent task collaboration, task delegation.

### Document-chat examples, or RAG (Retrieval Augmented Generation)

- [`/examples/docqa/chat.py`](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/chat.py) is a document-chat application. Point it to local file,
  directory or web url, and ask questions
    - Illustrates basic RAG
- [`/examples/docqa/chat-search.py`](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/chat-search.py): ask about anything and it will try to answer
  based on docs indexed in vector-db, otherwise it will do a Google search, and
  index the results in the vec-db for this and later answers.
    - Illustrates RAG + Function-calling/tools
- [`/examples/docqa/chat_multi.py`](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/chat_multi.py):  — this is a 2-agent system that will summarize
  a large document with 5 bullet points: the first agent generates questions for
  the retrieval agent, and is done when it gathers 5 key points.
    - Illustrates 2-agent collaboration + RAG to summarize a document
- [`/examples/docqa/chat_multi_extract.py`](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/chat_multi_extract.py):  — extracts structured info from a
  lease document: Main agent asks questions to a retrieval agent. 
    - Illustrates 2-agent collaboration, RAG, Function-calling/tools, Structured Information Extraction.

### Data-chat examples (tabular, SQL)

- [`/examples/data-qa/table_chat.py`](https://github.com/langroid/langroid-examples/blob/main/examples/data-qa/table_chat):  - point to a URL or local csv file and ask
  questions. The agent generates pandas code that is run within langroid.
    - Illustrates function-calling/tools and code-generation
- [`/examples/data-qa/sql-chat/sql_chat.py`](https://github.com/langroid/langroid-examples/blob/main/examples/data-qa/sql-chat/sql_chat.py):  — chat with a sql db — ask questions in
  English, it will generate sql code to answer them.
  See [tutorial here](https://langroid.github.io/langroid/tutorials/postgresql-agent/)
    - Illustrates function-calling/tools and code-generation
</file>

<file path="docs/notes/azure-openai-models.md">
# Azure OpenAI Models

To use OpenAI models deployed on Azure, first ensure a few environment variables
are defined (either in your `.env` file or in your environment):

- `AZURE_OPENAI_API_KEY`, from the value of `API_KEY`
- `AZURE_OPENAI_API_BASE` from the value of `ENDPOINT`, typically looks like `https://your_resource.openai.azure.com`.
- For `AZURE_OPENAI_API_VERSION`, you can use the default value in `.env-template`, and latest version can be found [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/whats-new#azure-openai-chat-completion-general-availability-ga)
- `AZURE_OPENAI_DEPLOYMENT_NAME` is an OPTIONAL deployment name which may be
  defined by the user during the model setup.
- `AZURE_OPENAI_CHAT_MODEL` Azure OpenAI allows specific model names when you select the model for your deployment. You need to put precisely the exact model name that was selected. For example, GPT-3.5 (should be `gpt-35-turbo-16k` or `gpt-35-turbo`) or GPT-4 (should be `gpt-4-32k` or `gpt-4`).
- `AZURE_OPENAI_MODEL_NAME` (Deprecated, use `AZURE_OPENAI_CHAT_MODEL` instead).

This page [Microsoft Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart?tabs=command-line&pivots=programming-language-python#environment-variables) 
provides more information on how to obtain these values.

To use an Azure-deployed model in Langroid, you can use the `AzureConfig` class:

```python
import langroid.language_models as lm
import langroid as lr

llm_config = lm.AzureConfig(
    chat_model="gpt-4o"
    # the other settings can be provided explicitly here, 
    # or are obtained from the environment
)
llm = lm.AzureGPT(config=llm_config)

response = llm.chat(
  messages=[
    lm.LLMMessage(role=lm.Role.SYSTEM, content="You are a helpful assistant."),
    lm.LLMMessage(role=lm.Role.USER, content="3+4=?"),
  ]
)

agent = lr.ChatAgent(
    lr.ChatAgentConfig(
        llm=llm_config,
        system_message="You are a helpful assistant.",
    )
)

response = agent.llm_response("is 4 odd?")
print(response.content)  # "Yes, 4 is an even number."
response = agent.llm_response("what about 2?")  # follow-up question
```

## Using Azure OpenAI API v1 with Standard OpenAI Clients

Azure's October 2025 API update allows using standard OpenAI clients instead of
Azure-specific ones. However, Azure deployment names often differ from actual
model identifiers, which can cause issues with model capability detection.

If your deployment name differs from the actual model name, use `chat_model_orig`
to specify the actual model for proper capability detection:

```python
import langroid.language_models as lm

llm_config = lm.OpenAIGPTConfig(
    chat_model="my-gpt4o-deployment",     # Your Azure deployment name
    chat_model_orig="gpt-4o",             # Actual model name for capability detection
    api_base="https://your-resource.openai.azure.com/",
)
```

This ensures Langroid correctly identifies model capabilities (context length,
supported features, etc.) even when the deployment name doesn't match the
underlying model.
</file>

<file path="docs/notes/gemini.md">
# Gemini LLMs & Embeddings via OpenAI client (without LiteLLM)

As of Langroid v0.21.0 you can use Langroid with Gemini LLMs directly
via the OpenAI client, without using adapter libraries like LiteLLM.

See details [here](https://langroid.github.io/langroid/tutorials/non-openai-llms/)

You can use also Google AI Studio Embeddings or Gemini Embeddings directly
which uses google-generativeai client under the hood.

```python

import langroid as lr
from langroid.agent.special import DocChatAgent, DocChatAgentConfig
from langroid.embedding_models import GeminiEmbeddingsConfig

# Configure Gemini embeddings
embed_cfg = GeminiEmbeddingsConfig(
    model_type="gemini",
    model_name="models/text-embedding-004",
    dims=768,
)

# Configure the DocChatAgent
config = DocChatAgentConfig(
    llm=lr.language_models.OpenAIGPTConfig(
        chat_model="gemini/" + lr.language_models.GeminiModel.GEMINI_1_5_FLASH_8B,
    ),
    vecdb=lr.vector_store.QdrantDBConfig(
        collection_name="quick_start_chat_agent_docs",
        replace_collection=True,
        embedding=embed_cfg,
    ),
    parsing=lr.parsing.parser.ParsingConfig(
        separators=["\n\n"],
        splitter=lr.parsing.parser.Splitter.SIMPLE,
    ),
    n_similar_chunks=2,
    n_relevant_chunks=2,
)

# Create the agent
agent = DocChatAgent(config)
```

## Vertex AI Support

Google Vertex AI uses project-specific URLs for its
[OpenAI compatibility layer](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/call-gemini-using-openai-library),
which differs from the fixed URL used by the standard Google AI (Gemini) API.
To use Gemini models through Vertex AI, set the endpoint via the
`GEMINI_API_BASE` environment variable or the `api_base` parameter in
`OpenAIGPTConfig`.

!!! note
    The `OPENAI_API_BASE` environment variable (commonly used for local
    proxies) is **not** applied to Gemini models. Use `GEMINI_API_BASE`
    or an explicit `api_base` in the config instead.

### Setup

1. Set up authentication. Vertex AI typically uses Google Cloud credentials
   rather than a simple API key. You can generate a short-lived access token:

    ```bash
    export GEMINI_API_KEY=$(gcloud auth print-access-token)
    ```

2. Set your Vertex AI endpoint URL, which includes your GCP project ID
   and region:

    ```bash
    export GEMINI_API_BASE=https://{REGION}-aiplatform.googleapis.com/v1beta1/projects/{PROJECT_ID}/locations/{REGION}/endpoints/openapi
    ```

### Usage

**Option 1: Environment variable (recommended for Vertex AI)**

```bash
export GEMINI_API_KEY=$(gcloud auth print-access-token)
export GEMINI_API_BASE=https://us-central1-aiplatform.googleapis.com/v1beta1/projects/my-gcp-project/locations/us-central1/endpoints/openapi
```

```python
import langroid.language_models as lm

# GEMINI_API_BASE is picked up automatically
config = lm.OpenAIGPTConfig(chat_model="gemini/gemini-2.0-flash")
llm = lm.OpenAIGPT(config)
response = llm.chat("Hello from Vertex AI!")
```

**Option 2: Explicit `api_base` in config**

```python
import langroid.language_models as lm

config = lm.OpenAIGPTConfig(
    chat_model="gemini/gemini-2.0-flash",
    api_base=(
        "https://us-central1-aiplatform.googleapis.com/v1beta1"
        "/projects/my-gcp-project/locations/us-central1/endpoints/openapi"
    ),
)
llm = lm.OpenAIGPT(config)
response = llm.chat("Hello from Vertex AI!")
```

When neither `GEMINI_API_BASE` nor an explicit `api_base` is set, Langroid
falls back to the default Google AI (Gemini) endpoint
(`https://generativelanguage.googleapis.com/v1beta/openai`).
</file>

<file path="docs/notes/handle-llm-no-tool.md">
# Handling a non-tool LLM message

A common scenario is to define a `ChatAgent`, enable it to use some tools
(i.e. `ToolMessages`s), wrap it in a Task, and call `task.run()`, e.g. 

```python
class MyTool(lr.ToolMessage)
    ...
    
import langroid as lr
config = lr.ChatAgentConfig(...)
agent = lr.ChatAgent(config)
agent.enable_message(MyTool)
task = lr.Task(agent, interactive=False)
task.run("Hello")
```

Consider what happens when you invoke `task.run()`. When the agent's `llm_response` 
returns a valid tool-call, the sequence of steps looks like this:

- `llm_response` -> tool $T$
- `aggent_response` handles $T$ -> returns results $R$
- `llm_response` responds to $R$ -> returns msg $M$
- and so on

If the LLM's response M contains a valid tool, then this cycle continues
with another tool-handling round. However, if the LLM's response M does _not_ contain
a tool-call, it is unclear whether:

- (1) the LLM "forgot" to generate a tool (or generated it wrongly, hence it was
   not recognized by Langroid as a tool), or 
- (2) the LLM's response M is an "answer" meant to be shown to the user 
    to continue the conversation, or
- (3) the LLM's response M is intended to be a "final" response, ending the task. 

Internally, when the `ChatAgent`'s `agent_response` method sees a message that does not
contain a tool, it invokes the `handle_message_fallback` method, which by default
does nothing (returns `None`). However you can override this method by deriving
from `ChatAgent`, as described in this [FAQ](https://langroid.github.io/langroid/FAQ/#how-can-i-handle-an-llm-forgetting-to-generate-a-toolmessage). As in that FAQ, 
in this fallback method, you would
typically have code that checks whether the message is a `ChatDocument`
and whether it came from the LLM, and if so, you would have the method return 
an appropriate message or tool (e.g. a reminder to the LLM, or an orchestration tool
such as [`AgentDoneTool`][langroid.agent.tools.orchestration.AgentDoneTool]).

To simplify the developer experience, as of version 0.39.2 Langroid also provides an
easier way to specify what this fallback method should return, via the
`ChatAgentConfig.handle_llm_no_tool` parameter, for example:
```python
config = lr.ChatAgentConfig(
    # ... other params
    handle_llm_no_tool="done", # terminate task if LLM sends non-tool msg
)
```
The `handle_llm_no_tool` parameter can have the following possible values:

- A special value from the [`NonToolAction`][langroid.mytypes.NonToolAction] Enum, e.g.:
    - `"user"` or `NonToolAction.USER` - this is interpreted by langroid to return 
     `ForwardTool(agent="user")`, meaning the message is passed to the user to await
     their next input.
    - `"done"` or `NonToolAction.DONE` - this is interpreted by langroid to return 
     `AgentDoneTool(content=msg.content, tools=msg.tool_messages)`, 
     meaning the task is ended, and any content and tools in the current message will
     appear in the returned `ChatDocument`.
- A callable, specifically a function that takes a `ChatDocument` and returns any value. 
  This can be useful when you want the fallback action to return a value 
  based on the current message, e.g. 
  `lambda msg: AgentDoneTool(content=msg.content)`, or it could a more 
  elaborate function, or a prompt that contains the content of the current message.
- Any `ToolMessage` (typically an [Orchestration](https://github.com/langroid/langroid/blob/main/langroid/agent/tools/orchestration.py) tool like 
  `AgentDoneTool` or `ResultTool`)
- Any string, meant to be handled by the LLM. 
  Typically this would be a reminder to the LLM, something like:
```python
"""Your intent is not clear -- 
- if you forgot to use a Tool such as `ask_tool` or `search_tool`, try again.
- or if you intended to return your final answer, use the Tool named `done_tool`,
  with `content` set to your answer.
"""
```

A simple example is in the [`chat-search.py`](https://github.com/langroid/langroid/blob/main/examples/basic/chat-search.py)
script, and in the `test_handle_llm_no_tool` test in
[`test_tool_messages.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_tool_messages.py).

## Important: Specialized agents and `handle_llm_no_tool`

!!! warning "Specialized agents have their own fallback logic"

    Several built-in Langroid agents — such as `TableChatAgent`,
    `SQLChatAgent`, `Neo4jChatAgent`, `ArangoChatAgent`,
    `QueryPlannerAgent`, and `CriticAgent` — override the
    `handle_message_fallback` method with their own specialized,
    **state-dependent** fallback logic. For example, `TableChatAgent`
    checks whether it has already sent an expression and reminds
    the LLM to use the `pandas_eval` tool, while `QueryPlannerAgent`
    tracks how many reminders it has sent and stops after a limit.

    **Setting `handle_llm_no_tool` on these specialized agents has
    no effect** — the specialized `handle_message_fallback` override
    takes precedence, and the config parameter is silently ignored.
    These two mechanisms are intentionally separate:
    `handle_llm_no_tool` is a simple declarative config knob for the
    base `ChatAgent`, while specialized agents use
    `handle_message_fallback` for context-aware fallback behavior
    that cannot be captured by a single config value.

If you are subclassing a specialized agent and want to customize
the fallback behavior, **override `handle_message_fallback`** in
your own subclass rather than setting `handle_llm_no_tool`.
You can call `super()` selectively if you want the parent's
specialized logic in some cases:

```python
from langroid.agent.special.table_chat_agent import (
    TableChatAgent,
    TableChatAgentConfig,
)
from langroid.agent.chat_document import ChatDocument
from langroid.mytypes import Entity


class MyTableAgent(TableChatAgent):
    def handle_message_fallback(
        self, msg: str | ChatDocument
    ) -> str | ChatDocument | None:
        if (
            isinstance(msg, ChatDocument)
            and msg.metadata.sender == Entity.LLM
        ):
            # Your custom fallback logic here
            return "Please use a tool to answer the question."
        # Or delegate to the parent's specialized logic:
        # return super().handle_message_fallback(msg)
        return None
```
</file>

<file path="docs/notes/llama-cpp-embeddings.md">
# Local embeddings provision via llama.cpp server

As of Langroid v0.30.0, you can use llama.cpp as provider of embeddings
to any of Langroid's vector stores, allowing access to a wide variety of
GGUF-compatible embedding models, e.g.
[nomic-ai's Embed Text V1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF).

## Supported Models

llama.cpp can generate embeddings from:

**Dedicated embedding models (RECOMMENDED):**

- [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF)
  (768 dims)
- [nomic-embed-text-v2-moe](https://huggingface.co/nomic-ai/nomic-embed-text-v2-moe-GGUF)
- [nomic-embed-code](https://huggingface.co/nomic-ai/nomic-embed-code-GGUF)
- Other GGUF embedding models

**Regular LLMs (also supported):**

- gpt-oss-20b, gpt-oss-120b
- Llama models
- Other language models

Note: Dedicated embedding models are recommended for best performance in
retrieval and semantic search tasks.

## Configuration

When defining a VecDB, you can provide an instance of
`LlamaCppServerEmbeddingsConfig` to the VecDB config to instantiate
the llama.cpp embeddings server handler.

To configure the `LlamaCppServerEmbeddingsConfig`, there are several
parameters that should be adjusted:

```python
from langroid.embedding_models.models import LlamaCppServerEmbeddingsConfig
from langroid.vector_store.qdrantdb import QdrantDBConfig

embed_cfg = LlamaCppServerEmbeddingsConfig(
    api_base="http://localhost:8080",  # IP + Port
    dims=768,  # Match the dimensions of your embedding model
    context_length=2048,  # Match the config of the model
    batch_size=2048,  # Safest to ensure this matches context_length
)

vecdb_config = QdrantDBConfig(
    collection_name="my-collection",
    embedding=embed_cfg,
    storage_path=".qdrant/",
)
```

## Running llama-server

The llama.cpp server must be started with the `--embeddings` flag to enable
embedding generation.

### For dedicated embedding models (RECOMMENDED):

```bash
./llama-server -ngl 100 -c 2048 \
  -m ~/nomic-embed-text-v1.5.Q8_0.gguf \
  --host localhost --port 8080 \
  --embeddings -b 2048 -ub 2048
```

### For LLM-based embeddings (e.g., gpt-oss):

```bash
./llama-server -ngl 99 \
  -m ~/.cache/llama.cpp/gpt-oss-20b.gguf \
  --host localhost --port 8080 \
  --embeddings
```

## Response Format Compatibility

Langroid automatically handles multiple llama.cpp response formats:

- Native `/embedding`: `{"embedding": [floats]}`
- OpenAI `/v1/embeddings`: `{"data": [{"embedding": [floats]}]}`
- Array formats: `[{"embedding": [floats]}]`
- Nested formats: `{"embedding": [[floats]]}`

You don't need to worry about which endpoint or format your llama.cpp server
uses - Langroid will automatically detect and handle the response correctly.

## Example Usage

An example setup can be found inside
[examples/docqa/chat.py](https://github.com/langroid/langroid/blob/main/examples/docqa/chat.py).

For a complete example using local embeddings with llama.cpp:

```python
from langroid.agent.special.doc_chat_agent import (
    DocChatAgent,
    DocChatAgentConfig,
)
from langroid.embedding_models.models import LlamaCppServerEmbeddingsConfig
from langroid.language_models.openai_gpt import OpenAIGPTConfig
from langroid.parsing.parser import ParsingConfig
from langroid.vector_store.qdrantdb import QdrantDBConfig

# Configure local embeddings via llama.cpp
embed_cfg = LlamaCppServerEmbeddingsConfig(
    api_base="http://localhost:8080",
    dims=768,  # nomic-embed-text-v1.5 dimensions
    context_length=8192,
    batch_size=1024,
)

# Configure vector store with local embeddings
vecdb_config = QdrantDBConfig(
    collection_name="doc-chat-local",
    embedding=embed_cfg,
    storage_path=".qdrant/",
)

# Create DocChatAgent
config = DocChatAgentConfig(
    vecdb=vecdb_config,
    llm=OpenAIGPTConfig(
        chat_model="gpt-4o",  # or use local LLM
    ),
)

agent = DocChatAgent(config)
```

## Troubleshooting

**Error: "Failed to connect to embedding provider"**

- Ensure llama-server is running with the `--embeddings` flag
- Check that the `api_base` URL is correct
- Verify the server is accessible from your machine

**Error: "Unsupported embedding response format"**

- This error includes the first 500 characters of the response to help debug
- Check your llama-server logs for any errors
- Ensure you're using a compatible llama.cpp version

**Embeddings seem low quality:**

- Use a dedicated embedding model instead of an LLM
- Ensure the `dims` parameter matches your model's output dimensions
- Try different GGUF quantization levels (Q8_0 generally works well)

## Additional Resources

- [llama.cpp GitHub](https://github.com/ggml-org/llama.cpp)
- [llama.cpp server documentation](https://github.com/ggml-org/llama.cpp/blob/master/examples/server/README.md)
- [nomic-embed models on Hugging Face](https://huggingface.co/nomic-ai)
- [Issue #919 - Implementation details](https://github.com/langroid/langroid/blob/main/issues/issue-919-llamacpp-embeddings.md)
</file>

<file path="docs/notes/message-routing.md">
# Message Routing in Multi-Agent Systems

This document covers how messages are routed between agents in Langroid's
multi-agent systems.

## Recommended Approach: Orchestration Tools

The recommended way to route messages between agents is using **orchestration
tools**. These provide explicit, type-safe routing that is easier to debug and
reason about.

### Available Orchestration Tools

Langroid provides several tools in `langroid.agent.tools.orchestration`:

- **`SendTool`** - Send a message to a specific agent by name
- **`DoneTool`** - Signal task completion with a result
- **`PassTool`** - Pass control to another agent
- **`DonePassTool`** - Combine done and pass behaviors
- **`AgentDoneTool`** - Signal completion from a specific agent

Example:

```python
from langroid.agent.tools.orchestration import SendTool

# Enable the tool on your agent
agent.enable_message(SendTool)

# LLM can then use the tool to route messages:
# {"request": "send_message", "to": "AnalysisAgent", "content": "Please analyze this"}
```

**Benefits of tool-based routing:**

- Explicit and predictable behavior
- Type-safe with validation
- Easier to debug (tool calls are logged)
- Works consistently across all LLM providers

## Text-Based Routing (Alternative)

Langroid also supports text-based routing patterns, where the LLM can embed
routing information directly in its response text. This is controlled by the
`recognize_recipient_in_content` setting.

**Note:** While convenient, text-based routing is less explicit than tool-based
routing and may lead to accidental routing if the LLM's response happens to
match the patterns.

### `ChatAgentConfig.recognize_recipient_in_content`

Controls whether recipient routing patterns in LLM response text are parsed.

```python
from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig

# Default: recipient patterns are parsed
agent = ChatAgent(ChatAgentConfig(
    recognize_recipient_in_content=True
))

# Disable: patterns treated as plain text
agent = ChatAgent(ChatAgentConfig(
    recognize_recipient_in_content=False
))
```

**Recognized patterns:**

1. **TO-bracket format**: `TO[AgentName]: message content`
2. **JSON format**: `{"recipient": "AgentName", "content": "message"}`

**When `True` (default):**

- Patterns are parsed and recipient is extracted to `ChatDocument.metadata.recipient`
- The pattern prefix/wrapper is stripped from the message content
- Enables LLM-driven routing in multi-agent systems

**When `False`:**

- Patterns are preserved as literal text in the message content
- `metadata.recipient` remains empty
- Useful when you want explicit tool-based routing only

### OpenAI Assistant Support

The `recognize_recipient_in_content` setting is also honored by `OpenAIAssistant`:

```python
from langroid.agent.openai_assistant import OpenAIAssistant, OpenAIAssistantConfig

assistant = OpenAIAssistant(OpenAIAssistantConfig(
    name="MyAssistant",
    recognize_recipient_in_content=False,
))
```

## Related: String Signals for Routing

The `TaskConfig.recognize_string_signals` setting controls parsing of signals
like `DONE`, `PASS`, and `DONE_PASS`. While `DONE` is primarily about task
termination, `PASS` is a routing signal that passes control to another agent.

See [Task Termination - Text-Based Termination Signals](task-termination.md#text-based-termination-signals)
for details on `recognize_string_signals`.

## Disabling All Text-Based Routing

To completely disable text-based routing and rely solely on orchestration tools,
set both flags to `False`:

```python
from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig
from langroid.agent.task import Task, TaskConfig

agent = ChatAgent(ChatAgentConfig(
    name="MyAgent",
    recognize_recipient_in_content=False,  # No TO[...] or JSON recipient parsing
))

task = Task(
    agent,
    config=TaskConfig(
        recognize_string_signals=False,  # No DONE/PASS parsing
    ),
)
```

This configuration ensures:

- LLM responses are treated as literal text
- No accidental routing based on text patterns
- All routing must be explicit via orchestration tools
</file>

<file path="docs/notes/openai-http-client.md">
# OpenAI HTTP Client Configuration

When using OpenAI models through Langroid in corporate environments or behind proxies, you may encounter SSL certificate verification errors. Langroid provides three flexible options to configure the HTTP client used for OpenAI API calls.

## Configuration Options

### 1. Simple SSL Verification Bypass

The quickest solution for development or trusted environments:

```python
import langroid.language_models as lm

config = lm.OpenAIGPTConfig(
    chat_model="gpt-4",
    http_verify_ssl=False  # Disables SSL certificate verification
)

llm = lm.OpenAIGPT(config)
```

!!! warning "Security Notice"
    Disabling SSL verification makes your connection vulnerable to man-in-the-middle attacks. Only use this in trusted environments.

### 2. HTTP Client Configuration Dictionary

For common scenarios like proxies or custom certificates, use a configuration dictionary:

```python
import langroid.language_models as lm

config = lm.OpenAIGPTConfig(
    chat_model="gpt-4",
    http_client_config={
        "verify": False,  # Or path to CA bundle: "/path/to/ca-bundle.pem"
        "proxy": "http://proxy.company.com:8080",
        "timeout": 30.0,
        "headers": {
            "User-Agent": "MyApp/1.0"
        }
    }
)

llm = lm.OpenAIGPT(config)
```

**Benefits**: This approach enables client caching, improving performance when creating multiple agents.

### 3. Custom HTTP Client Factory

For advanced scenarios requiring dynamic behavior or custom authentication:

```python
import langroid.language_models as lm
from httpx import Client

def create_custom_client():
    """Factory function to create a custom HTTP client."""
    client = Client(
        verify="/path/to/corporate-ca-bundle.pem",
        proxies={
            "http": "http://proxy.corp.com:8080",
            "https": "http://proxy.corp.com:8080"
        },
        timeout=30.0
    )

    # Add custom event hooks for logging
    def log_request(request):
        print(f"API Request: {request.method} {request.url}")

    client.event_hooks = {"request": [log_request]}

    return client

config = lm.OpenAIGPTConfig(
    chat_model="gpt-4",
    http_client_factory=create_custom_client
)

llm = lm.OpenAIGPT(config)
```

If you are using `async` methods, return a tuple of `(Client, AsyncClient)` from your factory:

```python
from httpx import AsyncClient, Client

def create_custom_client():
    """Factory function to create a custom sync and async HTTP clients."""
    client_args = {
        "verify": "/path/to/corporate-ca-bundle.pem",
        "proxy": "http://proxy.corp.com:8080",
        "timeout": 30.0,
    }
    client = Client(**client_args)
    async_client = AsyncClient(**client_args)

    return client, async_client
```

**Note**: Custom factories bypass client caching. Each `OpenAIGPT` instance creates a new client.

## Priority Order

When multiple options are specified, they are applied in this order:
1. `http_client_factory` (highest priority)
2. `http_client_config`
3. `http_verify_ssl` (lowest priority)

## Common Use Cases

### Corporate Proxy with Custom CA Certificate

```python
config = lm.OpenAIGPTConfig(
    chat_model="gpt-4",
    http_client_config={
        "verify": "/path/to/corporate-ca-bundle.pem",
        "proxies": {
            "http": "http://proxy.corp.com:8080",
            "https": "https://proxy.corp.com:8443"
        }
    }
)
```

### Debugging API Calls

```python
def debug_client_factory():
    from httpx import Client

    client = Client(verify=False)

    def log_response(response):
        print(f"Status: {response.status_code}")
        print(f"Headers: {response.headers}")

    client.event_hooks = {
        "response": [log_response]
    }

    return client

config = lm.OpenAIGPTConfig(
    chat_model="gpt-4",
    http_client_factory=debug_client_factory
)
```

### Local Development with Self-Signed Certificates

```python
# For local OpenAI-compatible APIs
config = lm.OpenAIGPTConfig(
    chat_model="gpt-4",
    api_base="https://localhost:8443/v1",
    http_verify_ssl=False
)
```


## Best Practices

1. **Use the simplest option that meets your needs**:
   - Development/testing: `http_verify_ssl=False`
   - Corporate environments: `http_client_config` with proper CA bundle
   - Complex requirements: `http_client_factory`

2. **Prefer configuration over factories for better performance** - configured clients are cached and reused

3. **Always use proper CA certificates in production** instead of disabling SSL verification

4. **Test your configuration** with a simple API call before deploying:
   ```python
   llm = lm.OpenAIGPT(config)
   response = llm.chat("Hello")
   print(response.content)
   ```

## Troubleshooting

### SSL Certificate Errors
```
ssl.SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED]
```
**Solution**: Use one of the three configuration options above.


### Proxy Connection Issues
- Verify proxy URL format: `http://proxy:port` or `https://proxy:port`
- Check if proxy requires authentication
- Ensure proxy allows connections to `api.openai.com`

## See Also

- [OpenAI API Reference](https://platform.openai.com/docs/api-reference) - Official OpenAI documentation
</file>

<file path="docs/notes/reasoning-content.md">
# Stream and capture reasoning content in addition to final answer, from Reasoning LLMs

As of v0.35.0, when using certain Reasoning LLM APIs (e.g. `deepseek/deepseek-reasoner`):

- You can see both the reasoning (dim green) and final answer (bright green) text in the streamed output.
- When directly calling the LLM (without using an Agent), the `LLMResponse` object will now contain a `reasoning` field,
  in addition to the earlier `message` field.
- when using a `ChatAgent.llm_response`, extract the reasoning text from the `ChatDocument` object's `reasoning` field
  (in addition to extracting final answer as usual from the `content` field)

Below is a simple example, also in this [script](https://github.com/langroid/langroid/blob/main/examples/reasoning/agent-reasoning.py):

Some notes: 

- To get reasoning trace from Deepseek-R1 via OpenRouter, you must include
the `extra_body` parameter with `include_reasoning` as shown below.
- When using the OpenAI `o3-mini` model, you can set the `resoning_effort` parameter
  to "high", "medium" or "low" to control the reasoning effort.
- As of Feb 9, 2025, OpenAI reasoning models (o1, o1-mini, o3-mini) 
  do *not* expose reasoning trace in the API response.
  
```python
import langroid as lr
import langroid.language_models as lm

llm_config = lm.OpenAIGPTConfig(
  chat_model="deepseek/deepseek-reasoner",
  # inapplicable params are automatically removed by Langroid
  params=lm.OpenAICallParams(
    reasoning_effort="low",  # only supported by o3-mini
    # below lets you get reasoning when using openrouter/deepseek/deepseek-r1
    extra_body=dict(include_reasoning=True),
  ),
)

# (1) Direct LLM interaction
llm = lm.OpenAIGPT(llm_config)

response = llm.chat("Is 9.9 bigger than 9.11?")

# extract reasoning
print(response.reasoning)
# extract answer
print(response.message)

# (2) Using an agent
agent = lr.ChatAgent(
    lr.ChatAgentConfig(
        llm=llm_config,
        system_message="Solve the math problem given by the user",
    )
)

response = agent.llm_response(
    """
    10 years ago, Jack's dad was 5 times as old as Jack.
    Today, Jack's dad is 40 years older than Jack.
    How old is Jack today?
    """
)

# extract reasoning
print(response.reasoning)
# extract answer
print(response.content)
```

## Displaying Reasoning in UI Callbacks

When using Langroid with UI frameworks like Chainlit, the reasoning content from LLM
responses is automatically passed to the callback methods. This allows you to display
the chain-of-thought reasoning in your UI.

The following callback methods receive a `reasoning` parameter:

- `show_llm_response(content, tools_content, is_tool, cached, language, reasoning)` -
  For non-streaming LLM responses
- `finish_llm_stream(content, tools_content, is_tool, reasoning)` -
  For streaming LLM responses

### Chainlit Integration

When using `ChainlitAgentCallbacks` or `ChainlitTaskCallbacks`, reasoning content is
automatically displayed as a nested message under the main LLM response. The reasoning
appears with a "💭 Reasoning" label in the author field.

### Custom Callback Implementation

If you're implementing custom callbacks, you can access the reasoning parameter to
display it however you prefer:

```python
from langroid.agent.base import Agent

def my_show_llm_response(
    content: str,
    tools_content: str = "",
    is_tool: bool = False,
    cached: bool = False,
    language: str | None = None,
    reasoning: str = "",
) -> None:
    # Display the main response
    print(f"Response: {content}")

    # Display reasoning if available
    if reasoning:
        print(f"Reasoning: {reasoning}")

# Attach to an agent
agent = Agent(config)
agent.callbacks.show_llm_response = my_show_llm_response
```
</file>

<file path="docs/notes/seltz_search.md">
---

# **Using Seltz Search with Langroid**

---

## **1. Set Up Seltz**

1. **Access Seltz Platform**
   Go to [Seltz](https://seltz.ai/).

2. **Sign Up or Log In**
   Create an account or log in if you already have one.

3. **Get Your API Key**
   - Navigate to your dashboard
   - Copy your API key

4. **Set Environment Variable**
   Add the following variable to your `.env` file:
   ```env
   SELTZ_API_KEY=<your_api_key>
   ```

---

## **2. Install**

```bash
pip install langroid[seltz]
# or
uv pip install langroid[seltz]
```

---

## **3. Use Seltz Search with Langroid**

```python
import langroid as lr
from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig
from langroid.agent.tools.seltz_search_tool import SeltzSearchTool

# Configure the ChatAgent
config = ChatAgentConfig(
    name="search-agent",
    llm=lr.language_models.OpenAIGPTConfig(
        chat_model=lr.language_models.OpenAIChatModel.GPT4o
    ),
)

# Create the agent and enable the Seltz search tool
agent = ChatAgent(config)
agent.enable_message(SeltzSearchTool)
```

---

## **4. Perform Web Searches**

Use the agent to perform web searches using Seltz.

```python
# Simple search query
response = agent.llm_response(
    "What are the latest developments in quantum computing?"
)
print(response)
```

---

## **5. Direct Tool Usage**

You can also use the tool directly without an agent:

```python
from langroid.agent.tools.seltz_search_tool import SeltzSearchTool

# Create a search request
search_request = SeltzSearchTool(
    query="Latest breakthroughs in fusion energy",
    num_results=3,
)

# Get search results
results = search_request.handle()
print(results)
```

---

## **6. Full Example**

See the complete working example at
[`examples/basic/chat-search-seltz.py`](https://github.com/langroid/langroid/blob/main/examples/basic/chat-search-seltz.py).

Run it with:
```bash
python3 examples/basic/chat-search-seltz.py
```

---
</file>

<file path="docs/notes/task-termination.md">
# Task Termination in Langroid

## Why Task Termination Matters

When building agent-based systems, one of the most critical yet challenging aspects is determining when a task should complete. Unlike traditional programs with clear exit points, agent conversations can meander, loop, or continue indefinitely. Getting termination wrong leads to two equally problematic scenarios:

**Terminating too early** means missing crucial information or cutting off an agent mid-process. Imagine an agent that searches for information, finds it, but terminates before it can process or summarize the results. The task completes "successfully" but fails to deliver value.

**Terminating too late** wastes computational resources, frustrates users, and can lead to repetitive loops where agents keep responding without making progress. We've all experienced chatbots that won't stop talking or systems that keep asking "Is there anything else?" long after the conversation should have ended. Even worse, agents can fall into infinite loops—repeatedly exchanging the same messages, calling the same tools, or cycling through states without making progress. These loops not only waste resources but can rack up significant costs when using paid LLM APIs.

The challenge is that the "right" termination point depends entirely on context. A customer service task might complete after resolving an issue and confirming satisfaction. A research task might need to gather multiple sources, synthesize them, and present findings. A calculation task should end after computing and presenting the result. Each scenario requires different termination logic.

Traditionally, developers would subclass `Task` and override the `done()` method with custom logic. While flexible, this approach scattered termination logic across multiple subclasses, making systems harder to understand and maintain. It also meant that common patterns—like "complete after tool use" or "stop when the user says goodbye"—had to be reimplemented repeatedly.

This guide introduces Langroid's declarative approach to task termination, culminating in the powerful `done_sequences` feature. Instead of writing imperative code, you can now describe *what* patterns should trigger completion, and Langroid handles the *how*. This makes your agent systems more predictable, maintainable, and easier to reason about.

## Table of Contents
- [Overview](#overview)
- [Basic Termination Methods](#basic-termination-methods)
- [Done Sequences: Event-Based Termination](#done-sequences-event-based-termination)
  - [Concept](#concept)
  - [DSL Syntax (Recommended)](#dsl-syntax-recommended)
  - [Full Object Syntax](#full-object-syntax)
  - [Event Types](#event-types)
  - [Examples](#examples)
- [Implementation Details](#implementation-details)
- [Best Practices](#best-practices)
- [Reference](#reference)
- [Text-Based Termination Signals](#text-based-termination-signals)

## Overview

In Langroid, a `Task` wraps an `Agent` and manages the conversation flow. Controlling when a task terminates is crucial for building reliable agent systems. Langroid provides several methods for task termination, from simple flags to sophisticated event sequence matching.

## Basic Termination Methods

### 1. Turn Limits
```python
# Task runs for exactly 5 turns
result = task.run("Start conversation", turns=5)
```

### 2. Single Round Mode
```python
# Task completes after one exchange
config = TaskConfig(single_round=True)
task = Task(agent, config=config)
```

### 3. Done If Tool
```python
# Task completes when any tool is generated
config = TaskConfig(done_if_tool=True)
task = Task(agent, config=config)
```

### 4. Done If Response/No Response
```python
# Task completes based on response from specific entities
config = TaskConfig(
    done_if_response=[Entity.LLM],      # Done if LLM responds
    done_if_no_response=[Entity.USER]   # Done if USER doesn't respond
)
```

### 5. String Signals
```python
# Task completes when special strings like "DONE" are detected
# (enabled by default with recognize_string_signals=True)
```

See [Text-Based Routing and Signal Control](#text-based-routing-and-signal-control)
for detailed documentation on controlling text-based routing behavior.

### 6. Orchestration Tools
```python
# Using DoneTool, FinalResultTool, etc.
from langroid.agent.tools.orchestration import DoneTool
agent.enable_message(DoneTool)
```

## Done Sequences: Event-Based Termination

### Concept

The `done_sequences` feature allows you to specify sequences of events that trigger task completion. This provides fine-grained control over task termination based on conversation patterns.

**Key Features:**

- Specify multiple termination sequences
- Use convenient DSL syntax or full object syntax
- Strict consecutive matching (no skipping events)
- Efficient implementation using message parent pointers

### DSL Syntax (Recommended)

The DSL (Domain Specific Language) provides a concise way to specify sequences:

```python
from langroid.agent.task import Task, TaskConfig

config = TaskConfig(
    done_sequences=[
        "T, A",                    # Tool followed by agent response
        "T[calculator], A",        # Specific calculator tool by name
        "T[CalculatorTool], A",    # Specific tool by class reference (NEW!)
        "L, T, A, L",              # LLM, tool, agent, LLM sequence
        "C[quit|exit|bye]",        # Content matching regex
        "U, L, A",                 # User, LLM, agent sequence
    ]
)
task = Task(agent, config=config)
```

#### DSL Pattern Reference

| Pattern | Description | Event Type |
|---------|-------------|------------|
| `T` | Any tool | `TOOL` |
| `T[name]` | Specific tool by name | `SPECIFIC_TOOL` |
| `T[ToolClass]` | Specific tool by class (NEW!) | `SPECIFIC_TOOL` |
| `A` | Agent response | `AGENT_RESPONSE` |
| `L` | LLM response | `LLM_RESPONSE` |
| `U` | User response | `USER_RESPONSE` |
| `N` | No response | `NO_RESPONSE` |
| `C[pattern]` | Content matching regex | `CONTENT_MATCH` |

**Examples:**

- `"T, A"` - Any tool followed by agent handling
- `"T[search], A, T[calculator], A"` - Search tool, then calculator tool
- `"T[CalculatorTool], A"` - Specific tool class followed by agent handling (NEW!)
- `"L, C[complete|done|finished]"` - LLM response containing completion words
- `"TOOL, AGENT"` - Full words also supported

### Full Object Syntax

For more control, use the full object syntax:

```python
from langroid.agent.task import (
    Task, TaskConfig, DoneSequence, AgentEvent, EventType
)

config = TaskConfig(
    done_sequences=[
        DoneSequence(
            name="tool_handled",
            events=[
                AgentEvent(event_type=EventType.TOOL),
                AgentEvent(event_type=EventType.AGENT_RESPONSE),
            ]
        ),
        DoneSequence(
            name="specific_tool_pattern",
            events=[
                AgentEvent(
                    event_type=EventType.SPECIFIC_TOOL,
                    tool_name="calculator",
                    # Can also use tool_class for type-safe references (NEW!):
                    # tool_class=CalculatorTool
                ),
                AgentEvent(event_type=EventType.AGENT_RESPONSE),
            ]
        ),
    ]
)
```

### Event Types

The following event types are available:

| EventType | Description | Additional Parameters |
|-----------|-------------|----------------------|
| `TOOL` | Any tool message generated | - |
| `SPECIFIC_TOOL` | Specific tool by name or class | `tool_name`, `tool_class` (NEW!) |
| `LLM_RESPONSE` | LLM generates a response | - |
| `AGENT_RESPONSE` | Agent responds (e.g., handles tool) | - |
| `USER_RESPONSE` | User provides input | - |
| `CONTENT_MATCH` | Response matches regex pattern | `content_pattern` |
| `NO_RESPONSE` | No valid response from entity | - |

### Examples

#### Example 1: Tool Completion
Task completes after any tool is used and handled:

```python
config = TaskConfig(done_sequences=["T, A"])
```

This is equivalent to `done_if_tool=True` but happens after the agent handles the tool.

#### Example 2: Multi-Step Process
Task completes after a specific conversation pattern:

```python
config = TaskConfig(
    done_sequences=["L, T[calculator], A, L"]
)
# Completes after: LLM response → calculator tool → agent handles → LLM summary
```

#### Example 3: Multiple Exit Conditions
Different ways to complete the task:

```python
config = TaskConfig(
    done_sequences=[
        "C[quit|exit|bye]",           # User says quit
        "T[calculator], A",           # Calculator used
        "T[search], A, T[search], A", # Two searches performed
    ]
)
```

#### Example 4: Tool Class References (NEW!)
Use actual tool classes instead of string names for type safety:

```python
from langroid.agent.tool_message import ToolMessage

class CalculatorTool(ToolMessage):
    request: str = "calculator"
    # ... tool implementation

class SearchTool(ToolMessage):
    request: str = "search"
    # ... tool implementation

# Enable tools on the agent
agent.enable_message([CalculatorTool, SearchTool])

# Use tool classes in done sequences
config = TaskConfig(
    done_sequences=[
        "T[CalculatorTool], A",  # Using class name
        "T[SearchTool], A, T[CalculatorTool], A",  # Multiple tools
    ]
)
```

**Benefits of tool class references:**
- **Type-safe**: IDE can validate tool class names
- **Refactoring-friendly**: Renaming tool classes automatically updates references
- **No string typos**: Compiler/linter catches invalid class names
- **Better IDE support**: Autocomplete and go-to-definition work

#### Example 5: Mixed Syntax
Combine DSL strings and full objects:

```python
config = TaskConfig(
    done_sequences=[
        "T, A",  # Simple DSL
        "T[CalculatorTool], A",  # Tool class reference (NEW!)
        DoneSequence(  # Full control
            name="complex_check",
            events=[
                AgentEvent(
                    event_type=EventType.SPECIFIC_TOOL,
                    tool_name="database_query",
                    tool_class=DatabaseQueryTool,  # Can use class directly (NEW!)
                    responder="DatabaseAgent"
                ),
                AgentEvent(event_type=EventType.AGENT_RESPONSE),
            ]
        ),
    ]
)
```

## Implementation Details

### How Done Sequences Work

Done sequences operate at the **task level** and are based on the **sequence of valid responses** generated during a task's execution. When a task runs, it maintains a `response_sequence` that tracks each message (ChatDocument) as it's processed.

**Key points:**
- Done sequences are checked only within a single task's scope
- They track the temporal order of responses within that task
- The response sequence is built incrementally as the task processes each step
- Only messages that represent valid responses are added to the sequence

### Response Sequence Building
The task builds its response sequence during execution:

```python
# In task.run(), after each step:
if self.pending_message is not None:
    if (not self.response_sequence or 
        self.pending_message.id() != self.response_sequence[-1].id()):
        self.response_sequence.append(self.pending_message)
```

### Message Chain Retrieval
Done sequences are checked against the response sequence:

```python
def _get_message_chain(self, msg: ChatDocument, max_depth: Optional[int] = None):
    """Get the chain of messages from response sequence"""
    if max_depth is None:
        max_depth = 50  # default
        if self._parsed_done_sequences:
            max_depth = max(len(seq.events) for seq in self._parsed_done_sequences)
    
    # Simply return the last max_depth elements from response_sequence
    return self.response_sequence[-max_depth:]
```

**Note:** The response sequence used for done sequences is separate from the parent-child pointer system. Parent pointers track causal relationships and lineage across agent boundaries (important for debugging and understanding delegation patterns), while response sequences track temporal order within a single task for termination checking.

### Strict Matching
Events must occur consecutively without intervening messages:

```python
# This sequence: [TOOL, AGENT_RESPONSE]
# Matches: USER → LLM(tool) → AGENT
# Does NOT match: USER → LLM(tool) → USER → AGENT
```

### Performance

- Efficient O(n) traversal where n is sequence length
- No full history scan needed
- Early termination on first matching sequence

## Best Practices

1. **Use DSL for Simple Cases**
   ```python
   # Good: Clear and concise
   done_sequences=["T, A"]
   
   # Avoid: Verbose for simple patterns
   done_sequences=[DoneSequence(events=[...])]
   ```

2. **Name Your Sequences**
   ```python
   DoneSequence(
       name="calculation_complete",  # Helps with debugging
       events=[...]
   )
   ```

3. **Order Matters**
   - Put more specific sequences first
   - General patterns at the end

4. **Test Your Sequences**
   ```python
   # Use MockLM for testing
   agent = ChatAgent(
       ChatAgentConfig(
           llm=MockLMConfig(response_fn=lambda x: "test response")
       )
   )
   ```

5. **Combine with Other Methods**
   ```python
   config = TaskConfig(
       done_if_tool=True,      # Quick exit on any tool
       done_sequences=["L, L, L"],  # Or after 3 LLM responses
       max_turns=10,           # Hard limit
   )
   ```

## Reference

### Code Examples
- **Basic example**: [`examples/basic/done_sequences_example.py`](../../examples/basic/done_sequences_example.py)
- **Test cases**: [`tests/main/test_done_sequences.py`](../../tests/main/test_done_sequences.py) (includes tool class tests)
- **DSL tests**: [`tests/main/test_done_sequences_dsl.py`](../../tests/main/test_done_sequences_dsl.py)
- **Parser tests**: [`tests/main/test_done_sequence_parser.py`](../../tests/main/test_done_sequence_parser.py)

### Core Classes
- `TaskConfig` - Configuration including `done_sequences`
- `DoneSequence` - Container for event sequences
- `AgentEvent` - Individual event in a sequence
- `EventType` - Enumeration of event types

### Parser Module
- `langroid.agent.done_sequence_parser` - DSL parsing functionality

### Task Methods
- `Task.done()` - Main method that checks sequences
- `Task._matches_sequence_with_current()` - Sequence matching logic
- `Task._classify_event()` - Event classification
- `Task._get_message_chain()` - Message traversal

## Migration Guide

If you're currently overriding `Task.done()`:

```python
# Before: Custom done() method
class MyTask(Task):
    def done(self, result=None, r=None):
        if some_complex_logic(result):
            return (True, StatusCode.DONE)
        return super().done(result, r)

# After: Use done_sequences
config = TaskConfig(
    done_sequences=["T[my_tool], A, L"]  # Express as sequence
)
task = Task(agent, config=config)  # No subclassing needed
```

**NEW: Using Tool Classes Instead of Strings**

If you have tool classes defined, you can now reference them directly:

```python
# Before: Using string names (still works)
config = TaskConfig(
    done_sequences=["T[calculator], A"]  # String name
)

# After: Using tool class references (recommended)
config = TaskConfig(
    done_sequences=["T[CalculatorTool], A"]  # Class name
)
```

This provides better type safety and makes refactoring easier.

## Troubleshooting

**Sequence not matching?**

- Check that events are truly consecutive (no intervening messages)
- Use logging to see the actual message chain
- Verify tool names match exactly

**Type errors with DSL?**

- Ensure you're using strings for DSL patterns
- Check that tool names in `T[name]` don't contain special characters

**Performance concerns?**

- Sequences only traverse as deep as needed
- Consider shorter sequences for better performance
- Use specific tool names to avoid unnecessary checks

## Text-Based Termination Signals

### `TaskConfig.recognize_string_signals`

Controls whether the task recognizes text-based orchestration signals like `DONE`,
`PASS`, `DONE_PASS`, etc.

```python
from langroid.agent.task import Task, TaskConfig

# Default: signals are recognized
task = Task(agent, config=TaskConfig(recognize_string_signals=True))

# Disable: signals treated as plain text
task = Task(agent, config=TaskConfig(recognize_string_signals=False))
```

**When `True` (default):**

- `DONE` in a response signals task completion
- `PASS` signals passing control to another agent
- `DONE_PASS` combines both behaviors

**When `False`:**

- These strings are treated as literal text
- Useful when LLM responses might accidentally contain these keywords
- Task termination must use other mechanisms (tools, `done_sequences`, etc.)

Note that `PASS` also relates to message routing between agents. For more details
on text-based routing and the related `recognize_recipient_in_content` setting,
see [Message Routing](message-routing.md).

## Summary

The `done_sequences` feature provides a powerful, declarative way to control task
termination based on conversation patterns. The DSL syntax makes common cases
simple while the full object syntax provides complete control when needed. This
approach eliminates the need to subclass `Task` and override `done()` for most
use cases, leading to cleaner, more maintainable code.
</file>

<file path="docs/tutorials/langroid-tour.md">
# A quick tour of Langroid

This is a quick tour of some Langroid features. For a more detailed guide,
see the [Getting Started guide](https://langroid.github.io/langroid/quick-start/).
There are many more features besides the ones shown here. To explore langroid more,
see the sections of the main [docs](https://langroid.github.io/langroid/),
and a 
[Colab notebook](https://colab.research.google.com/github/langroid/langroid/blob/main/examples/Langroid_quick_start.ipynb) 
you can try yourself.  


## Chat directly with LLM

Imports:

```python
import langroid as lr
import langroid.language_models as lm
```


Set up the LLM; note how you can specify the chat model -- if omitted, defaults
to OpenAI `GPT4o`. See the guide to using Langroid with 
[local/open LLMs](https://langroid.github.io/langroid/tutorials/local-llm-setup/),
and with [non-OpenAI LLMs](https://langroid.github.io/langroid/tutorials/non-openai-llms/).
    
```python
llm_config = lm.OpenAIGPTConfig( 
   chat_model="gpt-5-mini"
)
llm = lm.OpenAIGPT(llm_config)
```

Chat with bare LLM -- no chat accumulation, i.e. follow-up responses will *not*
be aware of prior conversation history (you need an Agent for that, see below).

```python
llm.chat("1 2 4 7 11 ?")
# ==> answers 16, with some explanation
```

## Agent

Make a [`ChatAgent`][langroid.agent.chat_agent.ChatAgent], 
and chat with it; now accumulates conv history

```python
agent = lr.ChatAgent(lr.ChatAgentConfig(llm=llm_config))
agent.llm_response("Find the next number: 1 2 4 7 11 ?")
# => responds 16
agent.llm_response("and then?)
# => answers 22
```

## Task

Make a [`Task`][langroid.agent.task.Task] and create a chat loop with the user:

```python
task = lr.Task(agent, interactive=True)
task.run()
```

## Tools/Functions/Structured outputs:

Define a [`ToolMessage`][langroid.agent.tool_message.ToolMessage] 
using Pydantic (v1) -- this gets transpiled into system-message instructions
to the LLM, so you never have to deal with writing a JSON schema.
(Besides JSON-based tools, Langroid also supports 
[XML-based tools](https://langroid.github.io/langroid/notes/xml-tools/), which 
are far more reliable when having the LLM return code in a structured output.)


```python
from pydantic import BaseModel

class CityTemperature(BaseModel):
    city: str
    temp: float

class WeatherTool(lr.ToolMessage):
    request: str = "weather_tool" #(1)!
    purpose: str = "To extract <city_temp> info from text" #(2)!

    city_temp: CityTemperature

    # tool handler
    def handle(self) -> CityTemperature:
        return self.city_temp
```

1. When this tool is enabled for an agent, a method named `weather_tool` gets auto-inserted in the agent class, 
   with body being the `handle` method -- this method handles the LLM's generation 
   of this tool.
2. The value of the `purpose` field is used to populate the system message to the LLM,
   along with the Tool's schema derived from its Pydantic-based definition.

Enable the Agent to use the `ToolMessage`, and set a system message describing the 
agent's task:

```python
agent.enable_message(WeatherTool)
agent.config.system_message = """
 Your job is to extract city and temperature info from user input
 and return it using the `weather_tool`.
"""
```

Create specialized task that returns a `CityTemperature` object:

```python
# configure task to terminate after (a) LLM emits a tool, (b) tool is handled by Agent
task_config = lr.TaskConfig(done_sequences=["T,A"])

# create a task that returns a CityTemperature object
task = lr.Task(agent, interactive=False, config=task_config)[CityTemperature]

# run task, with built-in tool-handling loop
data = task.run("It is 45 degrees F in Boston")

assert data.city == "Boston"
assert int(data.temp) == 45
```

## Chat with a document (RAG)

Create a [`DocChatAgent`][langroid.agent.special.doc_chat_agent.DocChatAgent].

```python
doc_agent_config = lr.agent.special.DocChatAgentConfig(llm=llm_config)
doc_agent = lr.agent.special.DocChatAgent(doc_agent_config)
```

Ingest the contents of a web page into the agent 
(this involves chunking, indexing into a vector-database, etc.):

```python
doc_agent.ingest_doc_paths("https://en.wikipedia.org/wiki/Ludwig_van_Beethoven")
```

Ask a question:

```
result = doc_agent.llm_response("When did Beethoven move from Bonn to Vienna?")
```

You should see the streamed response with citations like this:

![langroid-tour-beethoven.png](langroid-tour-beethoven.png)

## Two-agent interaction

Set up a teacher agent:

```python
from langroid.agent.tools.orchestration import DoneTool

teacher = lr.ChatAgent(
    lr.ChatAgentConfig(
        llm=llm_config,
        system_message=f"""
        Ask a numbers-based question, and your student will answer.
        You can then provide feedback or hints to the student to help them
        arrive at the right answer. Once you receive the right answer,
        use the `{DoneTool.name()}` tool to end the session.
        """
    )
)

teacher.enable_message(DoneTool)
teacher_task = lr.Task(teacher, interactive=False)

```

Set up a student agent:

```python
student = lr.ChatAgent(
    lr.ChatAgentConfig(
        llm=llm_config,
        system_message=f"""
        You will receive a numbers-related question. Answer to the best of
        your ability. If your answer is wrong, you will receive feedback or hints,
        and you can revise your answer, and repeat this process until you get 
        the right answer.
        """
    )
)

student_task = lr.Task(student, interactive=False, single_round=True)
```

Make the `student_task` a subtask of the `teacher_task`:

```python
teacher_task.add_sub_task(student_task)
```

Run the teacher task:

```python
teacher_task.run()
```

You should then see this type of interaction:

![langroid-tour-teacher.png](langroid-tour-teacher.png)
</file>

<file path="issues/20251010-concurrent-rag-status.md">
# Concurrent DocChat RAG – Current Status (2025-10-10)

## Summary
- Sequential DocChat queries work against both cloud and local (Docker) Qdrant backends.
- Concurrent DocChat via `run_batch_tasks` now returns full answers instead of `DO-NOT-KNOW`; `examples/docqa/rag-concurrent.py --local-embeddings --use-builtin-batch` shows 900 char responses in concurrent mode.
- Key fixes in place:
  - `EmbeddingModel.clone()` + `VectorStore.clone()` ensure each clone gets an independent embedding model and leaves `replace_collection=False`.
  - `ChatAgent.clone()` delegates to `_clone_extra_state`, with `DocChatAgent` copying `chunked_docs` and related caches.
  - `DocChatAgent.get_relevant_extracts` now falls back to in-memory `chunked_docs` when the vector store collection is missing/empty, preventing premature `DO-NOT-KNOW`.
  - Regression test `tests/main/test_concurrent_doc_chat_qdrant.py` passes on the fix branch (uses real Qdrant + SentenceTransformer embeddings + MockLM) and fails on main after we drop the backing collection to force the fallback path.

## Findings
1. **Guardrail gap** – The original `get_relevant_extracts` short-circuited whenever Qdrant reported `points_count=0`, even if `chunked_docs` were populated. Clones hit this path because a fresh client often reports zero points immediately after ingest. The fallback resolves this by using the cached chunks whenever the vector store hasn’t caught up yet.
2. **Regression coverage** – The updated pytest harness no longer monkeypatches retrieval. It exercises the full `run_batch_tasks` flow against local Qdrant, with a `MockLM` to avoid external API calls. On main it fails at the `clone.chunked_docs` assertion, confirming the test’s regression behavior.
3. **Example validation** – Running the concurrent example with `--use-builtin-batch` and `--local-embeddings` now yields overlapping worker logs and long-form answers; deleting the collection post-ingest reproduces the legacy failure on main but passes with the new fallback.

## Outstanding Items
- Ensure CI spins up Qdrant before running `tests/main/test_concurrent_doc_chat_qdrant.py` (workflow already starts the container; keep an eye on readiness timing).
- Monitor for any cases where both vector store and `chunked_docs` are empty (e.g., ingest skipped). The new fallback will still produce `DO-NOT-KNOW` in that scenario, which is expected.
- Verify cloud Qdrant regression: run the concurrent example against a remote collection to ensure the fallback doesn’t mask real empty collections.

## Next Steps
1. Add a short CI check (or doc note) to confirm Qdrant health before pytest kicks off.
2. Evaluate whether we should log a debug message when the fallback path is used—helpful for diagnosing future data-sync delays.
3. Consider extending regression coverage to include the cloud Qdrant path once a stable test fixture exists.

## Fix Timeline (2025-10-08 → 2025-10-11)
- **Async blocking in DocChatAgent** (see `issues/20251010-concurrent-rag.md`): `llm_response_async` waited on synchronous retrieval, so `asyncio.gather` serialized every task. We wrapped `answer_from_docs` with `asyncio.to_thread`, letting concurrent tasks progress while the main event loop stays free.
- **Clone safety & retrieval fallback** (see `issues/20251010-concurrent-rag-codex.md`): cloned agents were reusing embedding models and losing access to cached chunks when Qdrant reported zero points. We taught embedding/vector-store configs to clone themselves and had `get_relevant_extracts` fall back to in-memory `chunked_docs`, restoring parallel runs with local embeddings.
- **Cross-encoder race condition** (see `issues/20251011-cross-encoder-race-bug.md`): simultaneous reranker calls tried to move a shared `CrossEncoder` between devices, triggering the PyTorch “meta tensor” error. A per-model cache plus locking (defaulting to CPU, override via `cross_encoder_device`) now keeps concurrent reranks deterministic across CPU, CUDA, and MPS.
</file>

<file path="issues/20251011-cross-encoder-race-bug.md">
# Cross-Encoder Reranker Race Condition Plan

## Summary

Concurrent DocChatAgent tasks that enable `cross_encoder_reranking_model`
raise a PyTorch `NotImplementedError` ("Cannot copy out of meta tensor; no data!")
intermittently. The failure originates inside `CrossEncoder.predict()` when the
underlying Hugging Face model is moved between devices while still in the meta
state. Multiple threads instantiating and using the same cross encoder at once
trigger this race.

## Current Reproduction Status

- `tests/main/test_concurrent_rag_simple.py` fails intermittently on `main` and
  on the working branch when run several times in a row (10–20 iterations).
- Failures occur only when `cross_encoder_reranking_model` is set and multiple
  tasks run concurrently; sequential runs pass.

## Root Cause Hypothesis

1. Each DocChatAgent clone instantiates its own `CrossEncoder` inside
   `rerank_with_cross_encoder()`.
2. SentenceTransformers lazily initializes the underlying HF model on the first
   call to `.predict()`. During initialization, `model.to(device)` tries to copy
   tensors out of the “meta” device.
3. When multiple clones call `.predict()` at the same time, they each try to
   load/transfer shared parameters simultaneously, and one thread encounters the
   `meta` tensor copy race, causing the `NotImplementedError`.

## Investigation Tasks

1. **Confirm shared-state behavior**
   - Inspect `CrossEncoder.predict` to verify it performs `self.model.to(...)`
     on each call, making it unsafe to invoke from multiple threads without
     coordination.
   - Capture concurrent stack traces/logs during failure to confirm multiple
     threads enter the to() conversion simultaneously.

2. **Reproduce in isolation**
   - Write a minimal script that spawns several threads; each thread loads the
     same cross-encoder model and immediately calls `.predict()` to reproduce
     the meta-tensor race outside Langroid. This will clarify whether the bug
     is entirely in PyTorch/HF or also in Langroid’s usage.

3. **Benchmark loading cost**
   - Measure time to instantiate `CrossEncoder` and to run `.predict()` so we
     understand the overhead when caching the model vs. reloading on demand.

## Proposed Fix

Implement a per-model cache with synchronization so each process holds one
`CrossEncoder` instance per model name:

1. **Global cache**
   - Introduce a module-level helper (e.g., `_get_cross_encoder(model_name)`) in
     `doc_chat_agent.py` that stores models in a dictionary keyed by
     `model_name`.
   - Guard cache creation with a global `threading.Lock` to avoid double
     instantiation.

2. **Per-model execution lock**
   - Associate each cached model with a reentrant `Lock`. Before calling
     `predict`, acquire the lock to serialize access. This prevents concurrent
     `.predict()` calls from moving the model between devices at the same time.

3. **Stable device assignment**
   - Force the cached model onto a specific device once (likely CPU unless
     configured otherwise). Skip repeated `model.to()` calls inside the lock so
     subsequent predictions reuse the initialized weights without touching the
     meta tensors.

4. **Agent changes**
   - Update `DocChatAgent.rerank_with_cross_encoder` to fetch the cached
     `(model, lock)` pair and run prediction inside the per-model lock.

## Validation Plan

1. Run `tests/main/test_concurrent_rag_simple.py` in a loop (e.g., 20 times) to
   ensure the race no longer triggers.
2. Execute the sequential control test and a small subset of the wider suite to
   confirm no regressions.
3. Optionally stress-test with more concurrent tasks and different
   `cross_encoder_reranking_model` values to ensure the cache handles multiple
   models correctly.

## Follow-Up Considerations

- Document the shared-model behavior near the config option so users know the
  reranker is serialized per model.
- Evaluate batching requests through the shared cross encoder in future work to
  regain some concurrency while avoiding race conditions.

## Progress Log

- **2025-10-11:** Implemented thread-safe cross-encoder cache in `DocChatAgent` to reuse a single model instance per name and serialize `.predict()` calls. Adjusted reranker to disable the default progress bar for batch runs.
- **2025-10-11:** Validated the fix by running `uv run pytest tests/main/test_concurrent_rag_simple.py -k cross_encoder -x` once and then in a 10× loop; all iterations passed without reproducing the meta-tensor error.
- **2025-10-11:** Defaulted cached cross encoders to CPU but added `cross_encoder_device` override on `DocChatAgentConfig` so users with GPUs can opt in while keeping library-safe defaults.
- **2025-10-11:** Added `--cross-encoder-device` pytest option (with optional `TEST_CROSS_ENCODER_DEVICE` env fallback) so the concurrency test can be run against CPU, CUDA, or MPS paths without code edits.
</file>

<file path="issues/20251011-pr-926-description.md">
# PR 926 Summary and Notes

## Pull Request Description

- fixed concurrent reranking by adding a shared cross-encoder cache (auto CUDA/MPS/CPU, optional override) and documenting the setup
- broadened `DocChatAgent` to accept any `LLMConfig`, cleaned up vector-store embedding cloning, and kept the concurrency demo relying on the default VecDB with opt-in flags for cross encoder/local embeddings
- expanded regression coverage (`tests/main/test_concurrent_rag_simple.py`) and updated docs for cross-encoder usage and device toggles

**Validation**
- `uv run pytest tests/main/test_concurrent_rag_simple.py -k cross_encoder -x`
- `uv run pytest -xvs tests/main/test_vector_stores.py::test_doc_chat_batch_with_vecdb_cloning`
- `uv run ruff check .`

## Cross-Encoder vs Embedding Model Handling

`DocChatAgent` relies on two model types when it runs multiple concurrent tasks:

1. **Embedding model** (part of the vector store) used for similarity retrieval. When clones shared the same embedding model instance, local SentenceTransformer-based models could clash. We now clone the embedding model per agent clone (lightweight enough to duplicate) so each clone gets a clean instance.
2. **Cross-encoder reranker** used to score passages jointly with the query. Duplication is expensive, so we cache a single instance per `(model, device)` and serialize `predict` calls behind a lock. This keeps GPU/CPU usage efficient while eliminating the "meta tensor" race.

In short: embeddings are cloned per clone for isolation; the cross encoder is shared but guarded for thread-safe access.
</file>

<file path="issues/20251107-fix-mcp-dectorator.md">
Title: Fix @mcp_tool pattern for fastmcp>=2.13 / mcp>=1.21

Date: 2025-11-07

Summary

The `@mcp_tool` decorator in Langroid currently accepts a concrete
`ClientTransport` (e.g., `StdioTransport`) created at module import time and
uses it to (a) open a short-lived connection to read the tool schema and (b)
later open a new connection when the tool is actually invoked. This pattern
works with older fastmcp/mcp, but with fastmcp≥2.13.0.2 and mcp≥1.21.0 the
transport instance becomes single-use after the first connection closes,
leading to `anyio.ClosedResourceError` when we try to reuse it.

Key files reviewed

- examples/mcp/claude-code-mcp-single.py
- langroid/agent/tools/mcp/decorators.py
- langroid/agent/tools/mcp/fastmcp_client.py

What happens at decorator time vs tool invocation time

Decorator time (module import):

- The decorator `@mcp_tool(server, tool_name)` runs immediately when the module
  is imported.
- `decorators.py` calls `get_tool(server, tool_name)` (sync wrapper) which
  `asyncio.run`s `get_tool_async`.
- `fastmcp_client.get_tool_async` does `async with FastMCPClient(server)`, which
  constructs an inner `fastmcp.client.Client(server)` and opens a session to the
  MCP server to fetch the tool definition (schema, description, etc.).
- A dynamic `ToolMessage` subclass is created with fields from the tool’s
  input schema. The class is annotated with `_client_config` that includes the
  original `server` argument so it can open a connection again later when the
  tool is invoked.
- The temporary client context is exited, closing the underlying session and
  transport.

Tool invocation time (at runtime in the agent):

- The tool’s `handle_async` calls the generated `call_tool_async`.
- `call_tool_async` reconstructs a new `FastMCPClient(**_client_config)` and
  opens a fresh connection to call `session.call_tool(...)`.

Why ClosedResourceError appears with newer fastmcp/mcp

- In our examples we pass a concrete `ClientTransport` instance to the
  decorator, e.g., a module-level `StdioTransport(...)`.
- At decorator time, we make one connection using that instance and then close
  it when exiting the client context.
- Later at tool invocation time, the generated tool tries to reuse the very same
  `ClientTransport` instance to open a second connection. With
  fastmcp≥2.13.0.2/mcp≥1.21.0 the transport object is effectively single-use
  and owns AnyIO channels/process handles that are closed when the first client
  context exits. Reusing it causes the session’s write side to be closed during
  `session.initialize()`, which surfaces as `anyio.ClosedResourceError` while
  sending the initial JSON-RPC request.
- Older versions (fastmcp==2.3.4, mcp==1.9.0) tolerated reusing the same
  transport instance, as the transport behaved more like a stateless “spec” or
  was internally recreated per connection. That leniency is gone in the newer
  stack, where transports manage lifecycle-bound resources tied to a single
  session.

Conclusion: passing a live, already-used `ClientTransport` instance through the
decorator leads to reusing a closed transport when the tool is actually
invoked, which triggers `ClosedResourceError` during session initialization.

Recommended fixes (choose one)

1) Pass a transport factory (or a server spec), not an instance

Create a zero-arg callable that returns a fresh transport each time. This keeps
the decorator pattern but ensures a brand-new transport is used for every
connection.

Example change to example file:

```python
from fastmcp.client.transports import StdioTransport
from langroid.agent.tools.mcp import mcp_tool

def transport_factory():
    return StdioTransport(command="claude", args=["mcp", "serve"], env={})

@mcp_tool(transport_factory, "Grep")
class GrepTool(lr.ToolMessage):
    async def handle_async(self):
        result = await self.call_tool_async()
        return f"<GrepResult>\n{result}\n</GrepResult>"
```

This works because each call path (`get_tool_async` at decorator time and
`call_tool_async` at runtime) gets a fresh transport by calling the factory.

2) Defer tool creation to runtime (avoid decorator entirely)

For scripts already running inside an event loop or when you want to avoid all
import-time side effects, use the async helper instead of the decorator:

```python
from fastmcp.client.transports import StdioTransport
from langroid.agent.tools.mcp.fastmcp_client import get_tool_async

async def main():
    BaseGrepTool = await get_tool_async(
        lambda: StdioTransport(command="claude", args=["mcp", "serve"], env={}),
        "Grep",
    )

    class GrepTool(BaseGrepTool):
        async def handle_async(self):
            result = await self.call_tool_async()
            return f"<GrepResult>\n{result}\n</GrepResult>"
```

3) Library-level hardening in Langroid (recommended)

Make Langroid resilient regardless of how callers pass `server` by allowing a
factory and by cloning transports when a live instance is provided.

Proposed changes (illustrative, not yet applied):

In `langroid/agent/tools/mcp/fastmcp_client.py`:

```python
from typing import Callable, Union
import inspect
from fastmcp.client.transports import ClientTransport

# Accept either a spec or a zero-arg factory returning a spec
ServerSpec = Union[str, FastMCP[Any], AnyUrl, ClientTransport, Callable[[], Union[str, FastMCP[Any], AnyUrl, ClientTransport]]]

class FastMCPClient:
    def __init__(self, server: ServerSpec, ...):
        self.server = server

    async def __aenter__(self) -> "FastMCPClient":
        server_spec = self.server() if callable(self.server) else self.server
        self._cm = Client(server_spec, ...)
        self.client = await self._cm.__aenter__()
        return self

    async def get_tool_async(self, tool_name: str) -> Type[ToolMessage]:
        ...
        def _as_factory(srv: ServerSpec):
            if callable(srv):
                return srv
            if isinstance(srv, ClientTransport):
                cls = srv.__class__
                sig = inspect.signature(cls)
                # build kwargs from attribute names that match ctor params
                kwargs = {
                    n: getattr(srv, n)
                    for n, p in sig.parameters.items()
                    if n != "self" and hasattr(srv, n)
                }
                return lambda: cls(**kwargs)
            return lambda: srv  # strings/URLs/FastMCP pass-through

        client_config = {
            "server": _as_factory(self.server),  # always a factory now
            ...
        }

        async def call_tool_async(itself: ToolMessage) -> Any:
            cfg = getattr(itself.__class__, "_client_config")
            server_factory = cfg["server"]
            async with FastMCPClient(server_factory, ...) as client:
                return await client.call_mcp_tool(itself.request, payload)
```

With this change:

- Callers may pass a transport instance, a factory, a URL, or a string. We
  always store a factory on the generated class, ensuring a fresh transport for
  each connection.
- `__aenter__` transparently supports receiving a factory and calling it.

Why this addresses the error

- The failure arises from reusing a closed `ClientTransport`. By switching to a
  factory-or-spec approach, every connection uses a brand-new transport
  instance, so the AnyIO channels and subprocess handles are valid during
  `session.initialize()` and the handshake completes normally.

Notes on behavior changes between versions

- The newer fastmcp/mcp stack ties the transport’s resources to the client
  context more strictly (e.g., AnyIO memory channels/process lifetime tied to
  the session). Reusing a transport object after the session is closed now fails
  early in `initialize()` with a closed writer, surfacing as
  `anyio.ClosedResourceError`.
- Older versions were more permissive about reusing the same instance, which is
  why the import-time decorator usage “accidentally” worked.

Action items

- Update examples to pass a factory to `@mcp_tool` (Option 1), or switch those
  examples to `get_tool_async` at runtime (Option 2).
- Optionally harden Langroid per Option 3 so user code keeps working even when
  a transport instance is passed.

Appendix: example patch to the failing example

```diff
--- a/examples/mcp/claude-code-mcp-single.py
+++ b/examples/mcp/claude-code-mcp-single.py
@@
-transport = StdioTransport(
-    command="claude",
-    args=["mcp", "serve"],
-    env={},
-)
+def transport_factory():
+    return StdioTransport(
+        command="claude",
+        args=["mcp", "serve"],
+        env={},
+    )

@@
-@mcp_tool(transport, "Grep")
+@mcp_tool(transport_factory, "Grep")
 class GrepTool(lr.ToolMessage):
     async def handle_async(self):
         # call the actual tool
         result: str = await self.call_tool_async()
```
</file>

<file path="issues/20251123-new-model-support-gpt51-gemini30.md">
# New Model Support: GPT-5.1 and Gemini 3.0

## Objective

Add support for newly released models to Langroid's `model_info.py`:

- GPT-5.1 variants (gpt-5.1, gpt-5.1-chat, gpt-5.1-codex, gpt-5.1-codex-mini)
- Gemini 3.0 variants (to be determined from models.dev)

## Background

New models have been released by OpenAI and Google that need to be added to
Langroid's model registry. This ensures users can leverage these models with
proper cost tracking, context length limits, and feature support.

## Information Sources

- Model specs (context length, costs): https://models.dev/
- OpenAI feature support: https://platform.openai.com/docs/api-reference/chat
- Assumption: GPT-5.1 features similar to GPT-5

## GPT-5.1 Model Information

Based on models.dev data (as of Nov 2025):

### 1. gpt-5.1
- **Context Length**: 272,000 tokens
- **Max Output**: 128,000 tokens
- **Input Cost**: $1.25 per 1M tokens
- **Output Cost**: $10.00 per 1M tokens
- **Cache Read Cost**: $0.13 per 1M tokens
- **Notes**: Released 2024-09, Azure variant

### 2. gpt-5.1-chat
- **Context Length**: 128,000 tokens
- **Max Output**: 16,384 tokens
- **Input Cost**: $1.25 per 1M tokens
- **Output Cost**: $10.00 per 1M tokens
- **Cache Read Cost**: $0.13 per 1M tokens
- **Notes**: Released 2024-09, Azure variant

### 3. gpt-5.1-codex
- **Context Length**: 400,000 tokens
- **Max Output**: 128,000 tokens
- **Input Cost**: $1.25 per 1M tokens
- **Output Cost**: $10.00 per 1M tokens
- **Cache Read Cost**: $0.13 per 1M tokens
- **Notes**: Released 2024-09, Azure variant, code-optimized

### 4. gpt-5.1-codex-mini
- **Context Length**: 400,000 tokens
- **Max Output**: 128,000 tokens
- **Input Cost**: $0.25 per 1M tokens
- **Output Cost**: $2.00 per 1M tokens
- **Cache Read Cost**: $0.03 per 1M tokens
- **Notes**: Released 2024-09, Azure variant, code-optimized, cheaper

## GPT-5.1 Feature Support

Based on similarity to GPT-5 (to be confirmed from OpenAI API reference):

- **has_tools**: `False` (reasoning models typically don't support tools)
- **has_structured_output**: `True` (likely similar to GPT-5)
- **allows_streaming**: `True` (default)
- **allows_system_message**: `True` (default)
- **unsupported_params**: `["temperature"]` (likely similar to GPT-5)
- **rename_params**: `{"max_tokens": "max_completion_tokens"}` (likely)
- **Special parameters**: May support `reasoning_effort` (to be confirmed)

## Gemini 3.0 Model Information

**TO BE DETERMINED**: Need to fetch from models.dev

Expected variants based on previous patterns:
- gemini-3.0-pro
- gemini-3.0-flash
- gemini-3.0-flash-lite

Information needed for each:
- Context length
- Max output tokens
- Input/output costs
- Cached input costs
- Feature support flags

## Implementation Tasks

### 1. Add Enum Entries

In `langroid/language_models/model_info.py`:

**OpenAIChatModel enum** (add after existing GPT-5 models):
```python
class OpenAIChatModel(ModelName):
    # ... existing models ...
    GPT5_1 = "gpt-5.1"
    GPT5_1_CHAT = "gpt-5.1-chat"
    GPT5_1_CODEX = "gpt-5.1-codex"
    GPT5_1_CODEX_MINI = "gpt-5.1-codex-mini"
```

**GeminiModel enum** (add after existing Gemini 2.5 models):
```python
class GeminiModel(ModelName):
    # ... existing models ...
    GEMINI_3_0_PRO = "gemini-3.0-pro"  # if exists
    GEMINI_3_0_FLASH = "gemini-3.0-flash"  # if exists
    GEMINI_3_0_FLASH_LITE = "gemini-3.0-flash-lite"  # if exists
```

### 2. Add MODEL_INFO Entries

Add comprehensive `ModelInfo` entries for each new model with:
- Provider (OpenAI or Google)
- Context length
- Max output tokens
- Costs (input, output, cached)
- Feature flags
- API parameter quirks
- Description

### 3. Update OpenAI_API_ParamInfo (if needed)

If GPT-5.1 supports `reasoning_effort` or other special parameters, add to
the appropriate parameter lists.

### 4. Verification

After implementation:
- Run `make check` to ensure linting and type checking pass
- Verify model names are accessible via the enums
- Verify costs and limits are correctly set
- Check that feature flags match OpenAI API capabilities

## Questions/Clarifications Needed

1. **Gemini 3.0**: Does this model exist yet? If so, what are the exact variant
   names and specs?

2. **GPT-5.1 Feature Support**: Should we confirm all feature flags from the
   OpenAI API reference, or is assuming similarity to GPT-5 acceptable?

3. **Special Parameters**: Do GPT-5.1 models support `reasoning_effort` or
   other special parameters?

4. **Provider**: The models.dev data shows these as "Azure" variants - should
   they still use `ModelProvider.OPENAI`?

## Files to Modify

- `langroid/language_models/model_info.py`
  - Add enum entries for new models
  - Add MODEL_INFO dictionary entries
  - Update OpenAI_API_ParamInfo if needed

## Testing

No specific unit tests are required for individual model definitions (per user
guidance). The implementation focuses on:
- Correct model name registration
- Accurate API cost tracking
- Proper context length limits
- Correct feature support flags

## References

- models.dev: https://models.dev/
- OpenAI Chat API: https://platform.openai.com/docs/api-reference/chat
- Existing GPT-5 implementation: `langroid/language_models/model_info.py:323-364`
- Existing Gemini 2.5 implementation: Similar location in same file
</file>

<file path="issues/issue-919-llamacpp-embeddings.md">
# Issue #919: llama.cpp Embeddings Support

## Background

User reported issues using llama.cpp server for local embeddings with Langroid. The error occurred when using `LlamaCppServerEmbeddingsConfig`:

```
TypeError: list indices must be integers or slices, not str
```

This happened at line 466 in `langroid/embedding_models/models.py`:
```python
embeddings = response.json()["embedding"]
```

## Investigation Summary

### Can llama.cpp Generate Embeddings?

**YES!** llama.cpp supports embeddings in two ways:

1. **Dedicated embedding models** (RECOMMENDED):
   - nomic-embed-text-v1.5 (768 dims)
   - nomic-embed-text-v2-moe
   - nomic-embed-code
   - Other GGUF embedding models

2. **Regular LLMs** (works but not optimal):
   - gpt-oss-20b, gpt-oss-120b
   - Llama models
   - By extracting internal representations

### How to Enable

Start llama-server with the `--embeddings` flag:

```bash
./llama-server -ngl 100 -c 2048 \
  -m ~/nomic-embed-text-v1.5.Q8_0.gguf \
  --host localhost --port 8080 \
  --embeddings -b 2048 -ub 2048
```

## llama.cpp Embedding Endpoints

llama.cpp provides multiple embedding endpoints with different response formats:

### 1. Native `/embedding` endpoint

**Request:**
```json
{
  "content": "your text here"
}
```

**Response:**
```json
{
  "embedding": [0.1, 0.2, 0.3, ...]
}
```

### 2. OpenAI-compatible `/v1/embeddings` endpoint

**Request:**
```json
{
  "input": "your text here",
  "model": "model-name"
}
```

**Response:**
```json
{
  "object": "list",
  "data": [
    {
      "object": "embedding",
      "embedding": [0.1, 0.2, 0.3, ...],
      "index": 0
    }
  ],
  "model": "model-name",
  "usage": {
    "prompt_tokens": 5,
    "total_tokens": 5
  }
}
```

## The Problem

The original Langroid code expected only format #1 (native):
```python
embeddings = response.json()["embedding"]
```

However, llama.cpp can return **different formats** depending on:
- Endpoint used (`/embedding` vs `/v1/embeddings`)
- Server version/configuration
- Batch mode settings

The error indicated that `response.json()` returned a **list**, not a **dict**, suggesting llama.cpp returned an array format.

## Discovered Response Formats

Through investigation, we identified **5 possible response formats**:

1. **Native format**: `{"embedding": [floats]}`
2. **Array format**: `[{"embedding": [floats]}]`
3. **Double-nested**: `[{"embedding": [[floats]]}]`
4. **OpenAI-compatible**: `{"data": [{"embedding": [floats]}]}`
5. **Dict-nested**: `{"embedding": [[floats]]}`

## Our Solution

### Implementation

Added a robust `_extract_embedding()` method in `langroid/embedding_models/models.py` (lines 483-544) that:

1. Tries each format in order
2. Validates the extracted embedding is a list of floats
3. Provides clear error messages if format is unrecognized

```python
def _extract_embedding(
    self, response_json: dict[str, Any] | list[Any]
) -> List[int | float]:
    """
    Extract embedding vector from llama.cpp response.

    Handles multiple response formats:
    1. Native /embedding: {"embedding": [floats]}
    2. Array format: [{"embedding": [floats]}]
    3. Double-nested: [{"embedding": [[floats]]}]
    4. OpenAI /v1/embeddings: {"data": [{"embedding": [floats]}]}
    5. Nested in dict: {"embedding": [[floats]]}

    Args:
        response_json: The JSON response from llama.cpp server

    Returns:
        List of floats representing the embedding vector

    Raises:
        ValueError: If response format is not recognized
    """
    # Implementation handles all 5 formats...
```

### Modified `generate_embedding()` method

Changed from:
```python
embeddings = response.json()["embedding"]
```

To:
```python
embeddings = self._extract_embedding(response.json())
```

## Testing

Created comprehensive unit tests in `tests/extras/test_llamacpp_embedding_formats.py`:

- ✅ test_native_format
- ✅ test_array_format
- ✅ test_double_nested_array_format
- ✅ test_openai_compatible_format
- ✅ test_nested_in_dict_format
- ✅ test_invalid_format_raises_error
- ✅ test_generate_embedding_with_native_format (mocked)
- ✅ test_generate_embedding_with_array_format (mocked)
- ✅ test_generate_embedding_with_openai_format (mocked)
- ✅ test_generate_embedding_http_error

**All tests pass** ✅
**Linting and type checking pass** ✅

## Comparison with PR #920

### PR #920 Approach

Changed:
```python
embeddings = response.json()["embedding"]
```

To:
```python
embeddings = response.json()[0]["embedding"][0]
```

### Issues with PR #920

1. **Too specific**: Only handles ONE format: `[{"embedding": [[floats]]}]`
2. **Logic error**: The double `[0]` indexing would extract a single float, not the full embedding vector
3. **Would fail validation**: The existing validation expects a list of floats
4. **No tests**: No unit tests provided
5. **No documentation**: No explanation of what format is expected

### Our Solution Advantages

1. **Handles 5 different formats** automatically
2. **Backwards compatible**: Works with existing deployments
3. **Well-tested**: 10 unit tests covering all scenarios
4. **Well-documented**: Clear docstring explaining all formats
5. **Robust error messages**: Helps users debug configuration issues

## Usage Example

### Configuration

```python
from langroid.embedding_models.models import LlamaCppServerEmbeddingsConfig
from langroid.vector_store.qdrantdb import QdrantDBConfig

embed_cfg = LlamaCppServerEmbeddingsConfig(
    api_base="http://localhost:8080",  # Your llama.cpp server
    dims=768,  # Match your embedding model dimensions
    context_length=2048,
    batch_size=2048,
)

vecdb_config = QdrantDBConfig(
    collection_name="my-docs",
    embedding=embed_cfg,
    storage_path=".qdrant/",
)
```

### Running llama-server

```bash
# For dedicated embedding model (RECOMMENDED)
./llama-server -ngl 100 -c 2048 \
  -m ~/nomic-embed-text-v1.5.Q8_0.gguf \
  --embeddings -b 2048 -ub 2048 \
  --host localhost --port 8080

# For LLM-based embeddings (gpt-oss example)
./llama-server -ngl 99 \
  -m ~/.cache/llama.cpp/gpt-oss-20b.gguf \
  --embeddings \
  --host localhost --port 8080
```

## Recommendations

### For Users

1. **Use dedicated embedding models** like nomic-embed-text-v1.5 for best results
2. **Match dimensions** in config to your embedding model
3. **Use the `--embeddings` flag** when starting llama-server
4. **Check server logs** if you encounter issues

### For Langroid

1. ✅ **Implemented**: Robust format detection in `_extract_embedding()`
2. ✅ **Tested**: Comprehensive unit tests
3. ✅ **Documented**: Clear docstrings and examples
4. **Consider**: Adding example in `examples/docqa/` using local embeddings
5. **Consider**: Adding to documentation/tutorials

## Files Modified

- `langroid/embedding_models/models.py` - Added `_extract_embedding()` method
- `tests/extras/test_llamacpp_embedding_formats.py` - New comprehensive test suite

## References

- Issue #919: https://github.com/langroid/langroid/issues/919
- PR #920: https://github.com/langroid/langroid/pull/920
- llama.cpp discussion #7712: https://github.com/ggml-org/llama.cpp/discussions/7712
- nomic-embed models: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF
- Langroid docs: `docs/notes/llama-cpp-embeddings.md`

## Conclusion

**Issue #919 is now resolved** with a robust, well-tested solution that handles all known llama.cpp embedding response formats. Users can now use local embeddings with llama.cpp without worrying about response format variations.

**PR #920 is not needed** as our solution is more comprehensive and handles all cases, not just one specific format.
</file>

<file path="langroid/agent/special/relevance_extractor_agent.py">
"""
Agent to retrieve relevant segments from a body of text,
that are relevant to a query.

"""
⋮----
console = Console()
logger = logging.getLogger(__name__)
⋮----
class RelevanceExtractorAgentConfig(ChatAgentConfig)
⋮----
llm: LLMConfig | None = OpenAIGPTConfig()
segment_length: int = 1  # number of sentences per segment
query: str = ""  # query for relevance extraction
handle_llm_no_tool: str = """
system_message: str = """
⋮----
class RelevanceExtractorAgent(ChatAgent)
⋮----
"""
    Agent for extracting segments from text, that are relevant to a given query.
    """
⋮----
def __init__(self, config: RelevanceExtractorAgentConfig)
⋮----
"""Compose a prompt asking to extract relevant segments from a passage.
        Steps:
        - number the segments in the passage
        - compose prompt
        - send to LLM
        """
⋮----
message_str = message.content if isinstance(message, ChatDocument) else message
# number the segments in the passage
⋮----
# compose prompt
prompt = f"""
# send to LLM
response = super().llm_response(prompt)
⋮----
"""
        Compose a prompt asking to extract relevant segments from a passage.
        Steps:
        - number the segments in the passage
        - compose prompt
        - send to LLM
        The LLM is expected to generate a structured msg according to the
        SegmentExtractTool schema, i.e. it should contain a `segment_list` field
        whose value is a list of segment numbers or ranges, like "10,12,14-17".
        """
⋮----
response = await super().llm_response_async(prompt)
⋮----
def extract_segments(self, msg: SegmentExtractTool) -> str
⋮----
"""Method to handle a segmentExtractTool message from LLM"""
spec = msg.segment_list
⋮----
# assume this has numbered segments
⋮----
extracts = extract_numbered_segments(self.numbered_passage, spec)
⋮----
# this response ends the task by saying DONE
</file>

<file path="langroid/agent/tools/mcp/decorators.py">
"""Decorator: declare a ToolMessage class bound to a FastMCP tool.

    Usage:
        @mcp_tool("/path/to/server.py", "get_weather")
        class WeatherTool:
            def pretty(self) -> str:
                return f"Temp is {self.temperature}"

    The `server` may be a string/URL/FastMCP/ClientTransport, or a zero-arg
    callable returning one of those, e.g. `lambda: StdioTransport(...)`. Using a
    factory ensures a fresh transport per connection under fastmcp>=2.13.
    """
⋮----
def decorator(user_cls: Type[ToolMessage]) -> Type[ToolMessage]
⋮----
# build the “real” ToolMessage subclass for this server/tool
RealTool: Type[ToolMessage] = get_tool(server, tool_name)
⋮----
# copy user‐defined methods / attributes onto RealTool
⋮----
# preserve the user’s original name if you like:
</file>

<file path="langroid/agent/tools/seltz_search_tool.py">
"""
A tool to trigger a Seltz search for a given query and return the top results.
Since the tool is stateless (i.e. does not need
access to agent state), it can be enabled for any agent, without having to define a
special method inside the agent: `agent.enable_message(SeltzSearchTool)`

NOTE: To use this tool, you need to:

* set the SELTZ_API_KEY environment variable in
your `.env` file, e.g. `SELTZ_API_KEY=your_api_key_here`

* install langroid with the `seltz` extra, e.g.
`pip install langroid[seltz]` or `uv pip install langroid[seltz]`
or `poetry add langroid[seltz]` or `uv add langroid[seltz]`
(it installs the `seltz` package from pypi).

For more information, please refer to: https://seltz.ai/
"""
⋮----
class SeltzSearchTool(ToolMessage)
⋮----
request: str = "seltz_search"
purpose: str = """
query: str
num_results: int
⋮----
def handle(self) -> str
⋮----
"""
        Conducts a search using the Seltz API based on the provided query
        and number of results by triggering a seltz_search.

        Returns:
            str: A formatted string containing the titles, links, and
                summaries of each search result, separated by two newlines.
        """
⋮----
search_results = seltz_search(self.query, self.num_results)
# return Title, Link, Summary of each result, separated by two newlines
results_str = "\n\n".join(str(result) for result in search_results)
⋮----
@classmethod
    def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]
</file>

<file path="langroid/agent/base.py">
ORCHESTRATION_STRINGS = [DONE, PASS, PASS_TO, SEND_TO]
console = Console(quiet=settings.quiet)
⋮----
logger = logging.getLogger(__name__)
⋮----
T = TypeVar("T")
⋮----
class SearchForTools(Enum)
⋮----
CONTENT = 1  # from message content
FUNCTIONS = 2  # from OpenAI function calls
TOOLS = 3  # from OpenAI tool calls
⋮----
class AgentConfig(BaseSettings)
⋮----
"""
    General config settings for an LLM agent. This is nested, combining configs of
    various components.
    """
⋮----
name: str = "LLM-Agent"
debug: bool = False
vecdb: Optional[VectorStoreConfig] = None
llm: Optional[LLMConfig] = OpenAIGPTConfig()
parsing: Optional[ParsingConfig] = ParsingConfig()
prompts: Optional[PromptsConfig] = PromptsConfig()
show_stats: bool = True  # show token usage/cost stats?
hide_agent_response: bool = False  # hide agent response?
add_to_registry: bool = True  # register agent in ObjectRegistry?
respond_tools_only: bool = False  # respond only to tool messages (not plain text)?
# allow multiple tool messages in a single response?
allow_multiple_tools: bool = True
human_prompt: str = (
⋮----
@field_validator("name")
@classmethod
    def check_name_alphanum(cls, v: str) -> str
⋮----
def noop_fn(*args: List[Any], **kwargs: Dict[str, Any]) -> None
⋮----
async def async_noop_fn(*args: List[Any], **kwargs: Dict[str, Any]) -> None
⋮----
async def async_lambda_noop_fn() -> Callable[..., Coroutine[Any, Any, None]]
⋮----
class Agent(ABC)
⋮----
"""
    An Agent is an abstraction that typically (but not necessarily)
    encapsulates an LLM.
    """
⋮----
id: str = Field(default_factory=lambda: ObjectRegistry.new_id())
# OpenAI tool-calls awaiting response; update when a tool result with Role.TOOL
# is added to self.message_history
oai_tool_calls: List[OpenAIToolCall] = []
# Index of ALL tool calls generated by the agent
oai_tool_id2call: Dict[str, OpenAIToolCall] = {}
⋮----
def __init__(self, config: AgentConfig = AgentConfig())
⋮----
self.id = ObjectRegistry.new_id()  # Initialize agent ID
self.lock = asyncio.Lock()  # for async access to update self.llm.usage_cost
self.dialog: List[Tuple[str, str]] = []  # seq of LLM (prompt, response) tuples
⋮----
self.llm_tools_known: Set[str] = set()  # all known tools, handled/used or not
# Indicates which tool-names are allowed to be inferred when
# the LLM "forgets" to include the request field in its tool-call.
⋮----
None  # If None, we allow all
⋮----
self.interactive: bool = True  # may be modified by Task wrapper
⋮----
# token_encoding_model is used to obtain the tokenizer,
# so in case it's an OpenAI model, we ensure that the tokenizer
# corresponding to the model is used.
⋮----
def init_state(self) -> None
⋮----
"""Initialize all state vars. Called by Task.run() if restart is True"""
⋮----
@staticmethod
    def from_id(id: str) -> "Agent"
⋮----
@staticmethod
    def delete_id(id: str) -> None
⋮----
"""
        Sequence of (entity, response_method) pairs. This sequence is used
            in a `Task` to respond to the current pending message.
            See `Task.step()` for details.
        Returns:
            Sequence of (entity, response_method) pairs.
        """
⋮----
"""
        Async version of `entity_responders`. See there for details.
        """
⋮----
@property
    def indent(self) -> str
⋮----
"""Indentation to print before any responses from the agent's entities."""
⋮----
@indent.setter
    def indent(self, value: str) -> None
⋮----
def update_dialog(self, prompt: str, output: str) -> None
⋮----
def get_dialog(self) -> List[Tuple[str, str]]
⋮----
def clear_dialog(self) -> None
⋮----
"""
        Analyze parameters of a handler method to determine their types.

        Returns:
            Tuple of (has_annotations, agent_param_name, chat_doc_param_name)
            - has_annotations: True if useful type annotations were found
            - agent_param_name: Name of the agent parameter if found
            - chat_doc_param_name: Name of the chat_doc parameter if found
        """
sig = inspect.signature(handler_method)
params = list(sig.parameters.values())
# Remove the first 'self' parameter
params = params[1:]
# Don't use name
# [p for p in params if p.name != "self"]
⋮----
agent_param = None
chat_doc_param = None
has_annotations = False
⋮----
# First try type annotations
⋮----
ann_str = str(param.annotation)
# Check for Agent-like types
⋮----
agent_param = param.name
has_annotations = True
# Check for ChatDocument-like types
⋮----
chat_doc_param = param.name
⋮----
# Fallback to parameter names
⋮----
"""
        Create a wrapper function for a handler method based on its signature.

        Args:
            message_class: The ToolMessage class
            handler_method: The handle/handle_async method
            is_async: Whether this is for an async handler

        Returns:
            Appropriate wrapper function
        """
⋮----
# params = [p for p in params if p.name != "self"]
⋮----
# Build wrapper based on found parameters
⋮----
async def wrapper(obj: Any) -> Any
⋮----
def wrapper(obj: Any) -> Any
⋮----
# Both parameters present - build wrapper respecting their order
param_names = [p.name for p in params]
⋮----
# agent is first parameter
⋮----
async def wrapper(obj: Any, chat_doc: Any) -> Any
⋮----
def wrapper(obj: Any, chat_doc: Any) -> Any
⋮----
# chat_doc is first parameter
⋮----
# Only agent parameter
⋮----
# Only chat_doc parameter
⋮----
async def wrapper(obj: Any, chat_doc: Any) -> Any
⋮----
def wrapper(obj: Any, chat_doc: Any) -> Any
⋮----
# No recognized parameters - backward compatibility
# Assume single parameter is chat_doc (legacy behavior)
⋮----
# Multiple unrecognized parameters - best guess
⋮----
"""
        If `message_class` is None, return a list of all known tool names.
        Otherwise, first add the tool name corresponding to the message class
        (which is the value of the `request` field of the message class),
        to the `self.llm_tools_map` dict, and then return a list
        containing this tool name.

        Args:
            message_class (Optional[Type[ToolMessage]]): The message class whose tool
                name is to be returned; Optional, default is None.
                if None, return a list of all known tool names.

        Returns:
            List[str]: List of tool names: either just the tool name corresponding
                to the message class, or all known tool names
                (when `message_class` is None).

        """
⋮----
tool = message_class.default_value("request")
⋮----
"""
        if tool has handler method explicitly defined - use it,
        otherwise use the tool name as the handler
        """
⋮----
handler = getattr(message_class, "_handler", tool)
⋮----
handler = tool
⋮----
"""
            If the message class has a `handle` method,
            and agent does NOT have a tool handler method,
            then we create a method for the agent whose name
            is the value of `handler`, and whose body is the `handle` method.
            This removes a separate step of having to define this method
            for the agent, and also keeps the tool definition AND handling
            in one place, i.e. in the message class.
            See `tests/main/test_stateless_tool_messages.py` for an example.
            """
wrapper = self._create_handler_wrapper(
⋮----
has_chat_doc_arg = (
⋮----
def response_wrapper_with_chat_doc(obj: Any, chat_doc: Any) -> Any
⋮----
def response_wrapper_no_chat_doc(obj: Any) -> Any
⋮----
# When a ToolMessage has a `handle_message_fallback` method,
# we inject it into the agent as a method, overriding the default
# `handle_message_fallback` method (which does nothing).
# It's possible multiple tool messages have a `handle_message_fallback`,
# in which case, the last one inserted will be used.
def fallback_wrapper(msg: Any) -> Any
⋮----
async_handler_name = f"{handler}_async"
⋮----
@no_type_check
                async def handler(obj, chat_doc)
⋮----
@no_type_check
                async def handler(obj)
⋮----
"""
        Enable an agent to RESPOND (i.e. handle) a "tool" message of a specific type
            from LLM. Also "registers" (i.e. adds) the `message_class` to the
            `self.llm_tools_map` dict.

        Args:
            message_class (Optional[Type[ToolMessage]]): The message class to enable;
                Optional; if None, all known message classes are enabled for handling.

        """
⋮----
"""
        Disable a message class from being handled by this Agent.

        Args:
            message_class (Optional[Type[ToolMessage]]): The message class to disable.
                If None, all message classes are disabled.
        """
⋮----
def sample_multi_round_dialog(self) -> str
⋮----
"""
        Generate a sample multi-round dialog based on enabled message classes.
        Returns:
            str: The sample dialog string.
        """
enabled_classes: List[Type[ToolMessage]] = list(self.llm_tools_map.values())
# use at most 2 sample conversations, no need to be exhaustive;
sample_convo = [
⋮----
msg_cls().usage_examples(random=True)  # type: ignore
⋮----
"""Template for agent_response."""
⋮----
"""
        Render the response from the agent, typically from tool-handling.
        Args:
            results: results from tool-handling, which may be a string,
                a dict of tool results, or a ChatDocument.
        """
⋮----
results_str = results
⋮----
results_str = results.content
⋮----
results_str = json.dumps(results, indent=2)
⋮----
"""
        Convert results to final response.
        """
⋮----
maybe_json = len(extract_top_level_json(results_str)) > 0
⋮----
# Preserve trail of tool_ids for OpenAI Assistant fn-calls
⋮----
sender_name = self.config.name
⋮----
# if result was from handling an LLM `function_call`,
# set sender_name to name of the function_call
sender_name = msg.function_call.name
⋮----
# preserve trail of tool_ids for OpenAI Assistant fn-calls
⋮----
"""
        Asynch version of `agent_response`. See there for details.
        """
⋮----
results = await self.handle_message_async(msg)
⋮----
"""
        Response from the "agent itself", typically (but not only)
        used to handle LLM's "tool message" or `function_call`
        (e.g. OpenAI `function_call`).
        Args:
            msg (str|ChatDocument): the input to respond to: if msg is a string,
                and it contains a valid JSON-structured "tool message", or
                if msg is a ChatDocument, and it contains a `function_call`.
        Returns:
            Optional[ChatDocument]: the response, packaged as a ChatDocument

        """
⋮----
results = self.handle_message(msg)
⋮----
"""
        Process results from a response, based on whether
        they are results of OpenAI tool-calls from THIS agent, so that
        we can construct an appropriate LLMMessage that contains tool results.

        Args:
            results (str): A possible string result from handling tool(s)
            id2result (OrderedDict[str,str]|None): A dict of OpenAI tool id -> result,
                if there are multiple tool results.
            tool_calls (List[OpenAIToolCall]|None): List of OpenAI tool-calls that the
                results are a response to.

        Return:
            - str: The response string
            - Dict[str,str]|None: A dict of OpenAI tool id -> result, if there are
                multiple tool results.
            - str|None: tool_id if there was a single tool result

        """
id2result_ = copy.deepcopy(id2result) if id2result is not None else None
results_str = ""
oai_tool_id = None
⋮----
# in this case ignore id2result
⋮----
# We only have one result, so in case there is a
# "pending" OpenAI tool-call, we expect no more than 1 such.
⋮----
# We record the tool_id of the tool-call that
# the result is a response to, so that ChatDocument.to_LLMMessage
# can properly set the `tool_call_id` field of the LLMMessage.
oai_tool_id = self.oai_tool_calls[0].id
elif id2result is not None and id2result_ is not None:  # appease mypy
⋮----
# if the number of pending tool calls equals the number of results,
# then ignore the ids in id2result, and use the results in order,
# which is preserved since id2result is an OrderedDict.
⋮----
id2result_ = OrderedDict(
⋮----
# This must be an OpenAI tool id -> result map;
# However some ids may not correspond to the tool-calls in the list of
# pending tool-calls (self.oai_tool_calls).
# Such results are concatenated into a simple string, to store in the
# ChatDocument.content, and the rest
# (i.e. those that DO correspond to tools in self.oai_tool_calls)
# are stored as a dict in ChatDocument.oai_tool_id2result.
⋮----
# OAI tools from THIS agent, awaiting response
pending_tool_ids = [tc.id for tc in self.oai_tool_calls]
# tool_calls that the results are a response to
# (but these may have been sent from another agent, hence may not be in
# self.oai_tool_calls)
parent_tool_id2name = {
⋮----
# (id, result) for result NOT corresponding to self.oai_tool_calls,
# i.e. these are results of EXTERNAL tool-calls from another agent.
external_tool_id_results = []
⋮----
results_str = external_tool_id_results[0][1]
⋮----
results_str = "\n\n".join(
⋮----
id2result_ = None
⋮----
results_str = list(id2result_.values())[0]
oai_tool_id = list(id2result_.keys())[0]
⋮----
"""Template for response from entity `e`."""
⋮----
"""Template for user_response."""
⋮----
def user_can_respond(self, msg: Optional[str | ChatDocument] = None) -> bool
⋮----
"""
        Whether the user can respond to a message.

        Args:
            msg (str|ChatDocument): the string to respond to.

        Returns:

        """
# When msg explicitly addressed to user, this means an actual human response
# is being sought.
need_human_response = (
⋮----
"""
        Convert user_msg to final response.
        """
⋮----
user_msg = (
user_msg = user_msg.strip()
⋮----
tool_ids = []
⋮----
tool_ids = msg.metadata.tool_ids
⋮----
# only return non-None result if user_msg not empty
⋮----
user_msg = user_msg.replace("SYSTEM", "").strip()
source = Entity.SYSTEM
sender = Entity.SYSTEM
⋮----
source = Entity.USER
sender = Entity.USER
⋮----
# preserve trail of tool_ids for OpenAI Assistant fn-calls
⋮----
"""
        Asynch version of `user_response`. See there for details.
        """
⋮----
user_msg = self.default_human_response
⋮----
user_msg = await self.callbacks.get_user_response_async(prompt="")
⋮----
user_msg = self.callbacks.get_user_response(prompt="")
⋮----
user_msg = Prompt.ask(
⋮----
"""
        Get user response to current message. Could allow (human) user to intervene
        with an actual answer, or quit using "q" or "x"

        Args:
            msg (str|ChatDocument): the string to respond to.

        Returns:
            (str) User response, packaged as a ChatDocument

        """
⋮----
# ask user with empty prompt: no need for prompt
# since user has seen the conversation so far.
# But non-empty prompt can be useful when Agent
# uses a tool that requires user input, or in other scenarios.
⋮----
@no_type_check
    def llm_can_respond(self, message: Optional[str | ChatDocument] = None) -> bool
⋮----
"""
        Whether the LLM can respond to a message.
        Args:
            message (str|ChatDocument): message or ChatDocument object to respond to.

        Returns:

        """
⋮----
# if there is a valid "tool" message (either JSON or via `function_call`)
# then LLM cannot respond to it
⋮----
def can_respond(self, message: Optional[str | ChatDocument] = None) -> bool
⋮----
"""
        Whether the agent can respond to a message.
        Used in Task.py to skip a sub-task when we know it would not respond.
        Args:
            message (str|ChatDocument): message or ChatDocument object to respond to.
        """
tools = self.try_get_tool_messages(message)
⋮----
# The message has tools that are NOT enabled to be handled by this agent,
# which means the agent cannot respond to it.
⋮----
"""Template for llm_response."""
⋮----
"""
        Asynch version of `llm_response`. See there for details.
        """
⋮----
prompt = message.content
⋮----
prompt = message
⋮----
output_len = self.config.llm.model_max_output_tokens
⋮----
output_len = self.llm.completion_context_length() - self.num_tokens(prompt)
⋮----
response = await self.llm.agenerate(prompt, output_len)
⋮----
# We would have already displayed the msg "live" ONLY if
# streaming was enabled, AND we did not find a cached response.
# If we are here, it means the response has not yet been displayed.
cached = f"[red]{self.indent}(cached)[/red]" if response.cached else ""
⋮----
chat=False,  # i.e. it's a completion model not chat model
⋮----
cdoc = ChatDocument.from_LLMResponse(response, displayed=True)
# Preserve trail of tool_ids for OpenAI Assistant fn-calls
⋮----
"""
        LLM response to a prompt.
        Args:
            message (str|ChatDocument): prompt string, or ChatDocument object

        Returns:
            Response from LLM, packaged as a ChatDocument
        """
⋮----
with ExitStack() as stack:  # for conditionally using rich spinner
⋮----
# show rich spinner only if not streaming!
cm = status("LLM responding to message...")
⋮----
output_len = self.llm.completion_context_length() - self.num_tokens(
⋮----
response = self.llm.generate(prompt, output_len)
⋮----
# we would have already displayed the msg "live" ONLY if
# streaming was enabled, AND we did not find a cached response
⋮----
cached = "[red](cached)[/red]" if response.cached else ""
⋮----
chat=False,  # i.e. it's a completion model not chat model
⋮----
def has_tool_message_attempt(self, msg: str | ChatDocument | None) -> bool
⋮----
"""
        Check whether msg contains a Tool/fn-call attempt (by the LLM).

        CAUTION: This uses self.get_tool_messages(msg) which as a side-effect
        may update msg.tool_messages when msg is a ChatDocument, if there are
        any tools in msg.
        """
⋮----
tools = self.get_tool_messages(msg)
⋮----
# there is a tool/fn-call attempt but had a validation error,
# so we still consider this a tool message "attempt"
⋮----
def _tool_recipient_match(self, tool: ToolMessage) -> bool
⋮----
"""Is tool enabled for handling by this agent and intended for this
        agent to handle (i.e. if there's any explicit `recipient` field exists in
        tool, then it matches this agent's name)?
        """
⋮----
def has_only_unhandled_tools(self, msg: str | ChatDocument) -> bool
⋮----
"""
        Does the msg have at least one tool, and none of the tools in the msg are
        handleable by this agent?
        """
⋮----
tools = self.try_get_tool_messages(msg, all_tools=True)
⋮----
"""
        Get ToolMessages recognized in msg, handle-able by this agent.
        NOTE: as a side-effect, this will update msg.tool_messages
        when msg is a ChatDocument and msg contains tool messages.

        Args:
            msg (str|ChatDocument): the message to extract tools from.
            all_tools (bool):
                - if True, return all tools,
                    i.e. any recognized tool in self.llm_tools_known,
                    whether it is handled by this agent or not;
                - otherwise, return only the tools handled by this agent.

        Returns:
            List[ToolMessage]: list of ToolMessage objects
        """
⋮----
json_tools = self.get_formatted_tool_messages(msg)
⋮----
# We've already found tool_messages,
# (either via OpenAI Fn-call or Langroid-native ToolMessage);
# or they were added by an agent_response.
# note these could be from a forwarded msg from another agent,
# so return ONLY the messages THIS agent to enabled to handle.
⋮----
# We've already identified all_tool_messages in the msg by this same agent;
# so use them to return the corresponding ToolMessage objects
⋮----
tools = self.get_formatted_tool_messages(
⋮----
# filter for actually handle-able tools, and recipient is this agent
my_tools = [t for t in tools if self._tool_recipient_match(t)]
⋮----
# otherwise, we look for `tool_calls` (possibly multiple)
⋮----
tools = self.get_oai_tool_calls_classes(msg)
⋮----
tools = []
my_tools = []
⋮----
# otherwise, we look for a `function_call`
fun_call_cls = self.get_function_call_class(msg)
tools = [fun_call_cls] if fun_call_cls is not None else []
⋮----
"""
        Returns ToolMessage objects (tools) corresponding to
        tool-formatted substrings, if any.
        ASSUMPTION - These tools are either ALL JSON-based, or ALL XML-based
        (i.e. not a mix of both).
        Terminology: a "formatted tool msg" is one which the LLM generates as
            part of its raw string output, rather than within a JSON object
            in the API response (i.e. this method does not extract tools/fns returned
            by OpenAI's tools/fns API or similar APIs).

        Args:
            input_str (str): input string, typically a message sent by an LLM
            from_llm (bool): whether the input was generated by the LLM. If so,
                we track malformed tool calls.

        Returns:
            List[ToolMessage]: list of ToolMessage objects
        """
⋮----
substrings = XMLToolMessage.find_candidates(input_str)
is_json = False
⋮----
substrings = extract_top_level_json(input_str)
is_json = len(substrings) > 0
⋮----
results = [self._get_one_tool_message(j, is_json, from_llm) for j in substrings]
valid_results = [r for r in results if r is not None]
# If any tool is correctly formed we do not set the flag
⋮----
def get_function_call_class(self, msg: ChatDocument) -> Optional[ToolMessage]
⋮----
"""
        From ChatDocument (constructed from an LLM Response), get the `ToolMessage`
        corresponding to the `function_call` if it exists.
        """
⋮----
tool_name = msg.function_call.name
tool_msg = msg.function_call.arguments or {}
⋮----
tool_class = self.llm_tools_map[tool_name]
⋮----
tool = tool_class.model_validate(tool_msg)
⋮----
# Store tool class as an attribute on the exception
ve.tool_class = tool_class  # type: ignore
⋮----
def get_oai_tool_calls_classes(self, msg: ChatDocument) -> List[ToolMessage]
⋮----
"""
        From ChatDocument (constructed from an LLM Response), get
         a list of ToolMessages corresponding to the `tool_calls`, if any.
        """
⋮----
all_errors = True
⋮----
tool_name = tc.function.name
tool_msg = tc.function.arguments or {}
⋮----
all_errors = False
⋮----
# Store tool class as an attribute on the exception
ve.tool_class = tool_class  # type: ignore
⋮----
# When no tool is valid and the message was produced
# by the LLM, set the recovery flag
⋮----
"""
        Handle a validation error raised when parsing a tool message,
            when there is a legit tool name used, but it has missing/bad fields.
        Args:
            ve (ValidationError): The exception raised
            tool_class (Optional[Type[ToolMessage]]): The tool class that
                failed validation

        Returns:
            str: The error message to send back to the LLM
        """
# First try to get tool class from the exception itself
⋮----
tool_name = ve.tool_class.default_value("request")  # type: ignore
⋮----
tool_name = tool_class.default_value("request")
⋮----
# Fallback: try to extract from error context if available
tool_name = "Unknown Tool"
bad_field_errors = "\n".join(
⋮----
"""
        Return error document if the message contains multiple orchestration tools
        """
# check whether there are multiple orchestration-tools (e.g. DoneTool etc),
# in which case set result to error-string since we don't yet support
# multi-tools with one or more orch tools.
⋮----
ORCHESTRATION_TOOLS = (
⋮----
has_orch = any(isinstance(t, ORCHESTRATION_TOOLS) for t in tools)
⋮----
"""
        Convert results to final response
        """
# extract content from ChatDocument results so we have all str|None
results = [r.content if isinstance(r, ChatDocument) else r for r in results]
⋮----
tool_names = [t.default_value("request") for t in tools]
⋮----
has_ids = all([t.id != "" for t in tools])
⋮----
id2result = OrderedDict(
result_values = list(id2result.values())
⋮----
# Cannot support multi-tool results containing orchestration strings!
# Replace results with err string to force LLM to retry
err_str = "ERROR: Please use ONE tool at a time!"
id2result = OrderedDict((id, err_str) for id in id2result.keys())
⋮----
name_results_list = [
⋮----
# there was a non-None result
⋮----
# if there are multiple OpenAI Tool results, return them as a dict
⋮----
# multi-results: prepend the tool name to each result
str_results = [f"Result from {name}: {r}" for name, r in name_results_list]
final = "\n\n".join(str_results)
⋮----
"""
        Asynch version of `handle_message`. See there for details.
        """
⋮----
tools = [t for t in tools if self._tool_recipient_match(t)]
⋮----
# correct tool name but bad fields
⋮----
except XMLException as xe:  # from XMLToolMessage parsing
⋮----
# invalid tool name
# We return None since returning "invalid tool name" would
# be considered a valid result in task loop, and would be treated
# as a response to the tool message even though the tool was not intended
# for this agent.
⋮----
fallback_result = self.handle_message_fallback(msg)
⋮----
chat_doc = msg if isinstance(msg, ChatDocument) else None
⋮----
results = self._get_multiple_orch_tool_errs(tools)
⋮----
results = [
# if there's a solitary ChatDocument|str result, return it as is
⋮----
"""
        Handle a "tool" message either a string containing one or more
        valid "tool" JSON substrings,  or a
        ChatDocument containing a `function_call` attribute.
        Handle with the corresponding handler method, and return
        the results as a combined string.

        Args:
            msg (str | ChatDocument): The string or ChatDocument to handle

        Returns:
            The result of the handler method can be:
             - None if no tools successfully handled, or no tools present
             - str if langroid-native JSON tools were handled, and results concatenated,
                 OR there's a SINGLE OpenAI tool-call.
                (We do this so the common scenario of a single tool/fn-call
                 has a simple behavior).
             - Dict[str, str] if multiple OpenAI tool-calls were handled
                 (dict is an id->result map)
             - ChatDocument if a handler returned a ChatDocument, intended to be the
                 final response of the `agent_response` method.
        """
⋮----
results: List[str | ChatDocument | None] = []
⋮----
results = ["ERROR: Use ONE tool at a time!"] * len(tools)
⋮----
results = [self.handle_tool_message(t, chat_doc=chat_doc) for t in tools]
⋮----
@property
    def all_llm_tools_known(self) -> set[str]
⋮----
"""All known tools; this may extend self.llm_tools_known."""
⋮----
def handle_message_fallback(self, msg: str | ChatDocument) -> Any
⋮----
"""
        Fallback method for the case where the msg has no tools that
        can be handled by this agent.
        This method can be overridden by subclasses, e.g.,
        to create a "reminder" message when a tool is expected but the LLM "forgot"
        to generate one.

        Args:
            msg (str | ChatDocument): The input msg to handle
        Returns:
            Any: The result of the handler method
        """
⋮----
"""
        Parse the tool_candidate_str into ANY ToolMessage KNOWN to agent --
        This includes non-used/handled tools, i.e. any tool in self.all_llm_tools_known.
        The exception to this is below where we try our best to infer the tool
        when the LLM has "forgotten" to include the "request" field in the tool str ---
        in this case we ONLY look at the possible set of HANDLED tools, i.e.
        self.llm_tools_handled.
        """
⋮----
maybe_tool_dict = json.loads(tool_candidate_str)
⋮----
maybe_tool_dict = XMLToolMessage.extract_field_values(
⋮----
# check if the maybe_tool_dict contains a "properties" field
# which further contains the actual tool-call
# (some weak LLMs do this). E.g. gpt-4o sometimes generates this:
# TOOL: {
#     "type": "object",
#     "properties": {
#         "request": "square",
#         "number": 9
#     },
#     "required": [
#         "number",
#         "request"
#     ]
# }
⋮----
properties = maybe_tool_dict.get("properties")
⋮----
maybe_tool_dict = properties
request = maybe_tool_dict.get("request")
⋮----
possible = [self.llm_tools_map[r] for r in self.llm_tools_handled]
⋮----
allowable = self.enabled_requests_for_inference.intersection(
possible = [self.llm_tools_map[r] for r in allowable]
⋮----
default_keys = set(ToolMessage.model_fields.keys())
request_keys = set(maybe_tool_dict.keys())
⋮----
def maybe_parse(tool: type[ToolMessage]) -> Optional[ToolMessage]
⋮----
all_keys = set(tool.model_fields.keys())
non_inherited_keys = all_keys.difference(default_keys)
# If the request has any keys not valid for the tool and
# does not specify some key specific to the type
# (e.g. not just `purpose`), the LLM must explicitly specify `request`
⋮----
candidate_tools = list(
⋮----
# If only one valid candidate exists, we infer
# "request" to be the only possible value
⋮----
message_class = self.llm_tools_map.get(request)
⋮----
message = message_class.model_validate(maybe_tool_dict)
⋮----
ve.tool_class = message_class  # type: ignore
⋮----
"""
        Convert result of a responder (agent_response or llm_response, or task.run()),
        or tool handler, or handle_message_fallback,
        to a ChatDocument, to enable handling by other
        responders/tasks in a task loop possibly involving multiple agents.

        Args:
            msg (Any): The result of a responder or tool handler or task.run()
            orig_tool_name (str): The original tool name that generated the response,
                if any.
            chat_doc (ChatDocument): The original ChatDocument object that `msg`
                is a response to.
            author_entity (Entity): The intended author of the result ChatDocument
        """
⋮----
is_agent_author = author_entity == Entity.AGENT
⋮----
# result is a ToolMessage, so...
result_tool_name = msg.default_value("request")
⋮----
# TODO: do we need to remove the tool message from the chat_doc?
# if (chat_doc is not None and
#     msg in chat_doc.tool_messages):
#    chat_doc.tool_messages.remove(msg)
# if we can handle it, do so
result = self.handle_tool_message(msg, chat_doc=chat_doc)
⋮----
# else wrap it in an agent response and return it so
# orchestrator can find a respondent
⋮----
result = to_string(msg)
⋮----
def from_ChatDocument(self, msg: ChatDocument, output_type: Type[T]) -> Optional[T]
⋮----
"""
        Extract a desired output_type from a ChatDocument object.
        We use this fallback order:
        - if `msg.content_any` exists and matches the output_type, return it
        - if `msg.content` exists and output_type is str return it
        - if output_type is a ToolMessage, return the first tool in `msg.tool_messages`
        - if output_type is a list of ToolMessage,
            return all tools in `msg.tool_messages`
        - search for a tool in `msg.tool_messages` that has a field of output_type,
             and if found, return that field value
        - return None if all the above fail
        """
content = msg.content
⋮----
content_any = msg.content_any
⋮----
list_element_type = get_args(output_type)[0]
⋮----
# list_element_type is a subclass of ToolMessage:
# We output a list of objects derived from list_element_type
⋮----
# output_type is a subclass of ToolMessage:
# return the first tool that has this specific output_type
⋮----
# attempt to get the output_type from the content,
# if it's a primitive type
primitive_value = from_string(content, output_type)  # type: ignore
⋮----
# then search for output_type as a field in a tool
⋮----
value = tool.get_value_of_type(output_type)
⋮----
"""
        Truncate the result string to `max_tokens` tokens.
        """
⋮----
result_str = result.content if isinstance(result, ChatDocument) else result
num_tokens = (
⋮----
truncate_warning = f"""
⋮----
else result[: max_tokens * 4]  # approx truncate
⋮----
else result.content[: max_tokens * 4]  # approx truncate
⋮----
"""
        Asynch version of `handle_tool_message`. See there for details.
        """
tool_name = tool.default_value("request")
⋮----
handler_name = getattr(tool, "_handler", tool_name)
⋮----
handler_name = tool_name
handler_method = getattr(self, handler_name + "_async", None)
⋮----
maybe_result = await handler_method(tool, chat_doc=chat_doc)
⋮----
maybe_result = await handler_method(tool)
result = self.to_ChatDocument(maybe_result, tool_name, chat_doc)
⋮----
# raise the error here since we are sure it's
# not a pydantic validation error,
# which we check in `handle_message`
⋮----
)  # type: ignore
⋮----
"""
        Respond to a tool request from the LLM, in the form of an ToolMessage object.
        Args:
            tool: ToolMessage object representing the tool request.
            chat_doc: Optional ChatDocument object containing the tool request.
                This is passed to the tool-handler method only if it has a `chat_doc`
                argument.

        Returns:

        """
⋮----
handler_method = getattr(self, handler_name, None)
⋮----
maybe_result = handler_method(tool, chat_doc=chat_doc)
⋮----
maybe_result = handler_method(tool)
⋮----
def num_tokens(self, prompt: str | List[LLMMessage]) -> int
⋮----
"""
        Get LLM response stats as a string

        Args:
            chat_length (int): number of messages in the chat
            tot_cost (float): total cost of the chat so far
            response (LLMResponse): LLMResponse object
        """
⋮----
in_tokens = response.usage.prompt_tokens
out_tokens = response.usage.completion_tokens
llm_response_cost = format(response.usage.cost, ".4f")
cumul_cost = format(tot_cost, ".4f")
⋮----
context_length = self.llm.chat_context_length()
max_out = self.config.llm.model_max_output_tokens
⋮----
llm_model = (
# tot cost across all LLMs, agents
all_cost = format(self.llm.tot_tokens_cost()[1], ".4f")
⋮----
"""
        Updates `response.usage` obj (token usage and cost fields) if needed.
        An update is needed only if:
        - stream is True (i.e. streaming was enabled), and
        - the response was NOT obtained from cached, and
        - the API did NOT provide the usage/cost fields during streaming
          (As of Sep 2024, the OpenAI API started providing these; for other APIs
            this may not necessarily be the case).

        Args:
            response (LLMResponse): LLMResponse object
            prompt (str | List[LLMMessage]): prompt or list of LLMMessage objects
            stream (bool): whether to update the usage in the response object
                if the response is not cached.
            chat (bool): whether this is a chat model or a completion model
            print_response_stats (bool): whether to print the response stats
        """
⋮----
no_usage_info = response.usage is None or response.usage.prompt_tokens == 0
# Note: If response was not streamed, then
# `response.usage` would already have been set by the API,
# so we only need to update in the stream case.
⋮----
# usage, cost = 0 when response is from cache
prompt_tokens = 0
completion_tokens = 0
cost = 0.0
⋮----
prompt_tokens = self.num_tokens(prompt)
completion_tokens = self.num_tokens(response.message)
⋮----
cost = self.compute_token_cost(prompt_tokens, 0, completion_tokens)
⋮----
# update total counters
⋮----
chat_length = 1 if isinstance(prompt, str) else len(prompt)
⋮----
def compute_token_cost(self, prompt: int, cached: int, completion: int) -> float
⋮----
price = cast(LanguageModel, self.llm).chat_cost()
⋮----
"""
        Send a request to another agent, possibly after confirming with the user.
        This is not currently used, since we rely on the task loop and
        `RecipientTool` to address requests to other agents. It is generally best to
        avoid using this method.

        Args:
            agent (Agent): agent to ask
            request (str): request to send
            no_answer (str): expected response when agent does not know the answer
            user_confirm (bool): whether to gate the request with a human confirmation

        Returns:
            str: response from agent
        """
agent_type = type(agent).__name__
⋮----
user_response = Prompt.ask(
⋮----
answer = agent.llm_response(request)
</file>

<file path="langroid/agent/openai_assistant.py">
# setup logger
⋮----
logger = logging.getLogger(__name__)
⋮----
class ToolType(str, Enum)
⋮----
RETRIEVAL = "file_search"
CODE_INTERPRETER = "code_interpreter"
FUNCTION = "function"
⋮----
class AssistantTool(BaseModel)
⋮----
type: ToolType
function: Dict[str, Any] | None = None
⋮----
def dct(self) -> Dict[str, Any]
⋮----
d = super().model_dump()
⋮----
class AssistantToolCall(BaseModel)
⋮----
id: str
⋮----
function: LLMFunctionCall
⋮----
class RunStatus(str, Enum)
⋮----
QUEUED = "queued"
IN_PROGRESS = "in_progress"
COMPLETED = "completed"
REQUIRES_ACTION = "requires_action"
EXPIRED = "expired"
CANCELLING = "cancelling"
CANCELLED = "cancelled"
FAILED = "failed"
TIMEOUT = "timeout"
⋮----
class OpenAIAssistantConfig(ChatAgentConfig)
⋮----
use_cached_assistant: bool = False  # set in script via user dialog
assistant_id: str | None = None
use_tools: bool = False
use_functions_api: bool = True
use_cached_thread: bool = False  # set in script via user dialog
thread_id: str | None = None
# set to True once we can add Assistant msgs in threads
cache_responses: bool = True
timeout: int = 30  # can be different from llm.timeout
llm: OpenAIGPTConfig = OpenAIGPTConfig(chat_model=OpenAIChatModel.GPT4o)
tools: List[AssistantTool] = []
files: List[str] = []
⋮----
class OpenAIAssistant(ChatAgent)
⋮----
"""
    A ChatAgent powered by OpenAI Assistant API:
    mainly, in `llm_response` method, we avoid maintaining conversation state,
    and instead let the Assistant API do it for us.
    Also handles persistent storage of Assistant and Threads:
    stores their ids (for given user, org) in a cache, and
    reuses them based on config.use_cached_assistant and config.use_cached_thread.

    This class can be used as a drop-in replacement for ChatAgent.
    """
⋮----
def __init__(self, config: OpenAIAssistantConfig)
⋮----
# handles for various entities and methods
⋮----
# which tool_ids are awaiting output submissions
⋮----
def add_assistant_files(self, files: List[str]) -> None
⋮----
"""Add file_ids to assistant"""
⋮----
def add_assistant_tools(self, tools: List[AssistantTool]) -> None
⋮----
"""Add tools to assistant"""
⋮----
all_tool_dicts = [t.dct() for t in self.config.tools]
⋮----
tools=[tool.dct() for tool in self.config.tools],  # type: ignore
⋮----
"""Override ChatAgent's method: extract the function-related args.
        See that method for details. But specifically about the `include_defaults` arg:
        Normally the OpenAI completion API ignores these fields, but the Assistant
        fn-calling seems to pay attn to these, and if we don't want this,
        we should set this to False.
        """
⋮----
# no specific msg class, or
# we are not enabling USAGE/GENERATION of this tool/fn,
# then there's no need to attach the fn to the assistant
# (HANDLING the fn will still work via self.agent_response)
⋮----
sys_msg = self._create_system_and_tools_message()
⋮----
# add the functions to the assistant:
⋮----
tools = self.assistant.tools
⋮----
"type": "function",  # type: ignore
⋮----
tools=tools,  # type: ignore
⋮----
def _cache_thread_key(self) -> str
⋮----
"""Key to use for caching or retrieving thread id"""
org = self.client.organization or ""
uid = generate_user_id(org)
name = self.config.name
⋮----
def _cache_assistant_key(self) -> str
⋮----
"""Key to use for caching or retrieving assistant id"""
⋮----
@no_type_check
    def _cache_messages_key(self) -> str
⋮----
"""Key to use when caching or retrieving thread messages"""
⋮----
@no_type_check
    def _cache_thread_lookup(self) -> str | None
⋮----
"""Try to retrieve cached thread_id associated with
        this user + machine + organization"""
key = self._cache_thread_key()
⋮----
@no_type_check
    def _cache_assistant_lookup(self) -> str | None
⋮----
"""Try to retrieve cached assistant_id associated with
        this user + machine + organization"""
⋮----
key = self._cache_assistant_key()
⋮----
@no_type_check
    def _cache_messages_lookup(self) -> LLMResponse | None
⋮----
"""Try to retrieve cached response for the message-list-hash"""
⋮----
key = self._cache_messages_key()
cached_dict = self.llm.cache.retrieve(key)
⋮----
def _cache_store(self) -> None
⋮----
"""
        Cache the assistant_id, thread_id associated with
        this user + machine + organization
        """
⋮----
thread_key = self._cache_thread_key()
⋮----
assistant_key = self._cache_assistant_key()
⋮----
@staticmethod
    def thread_msg_to_llm_msg(msg: Message) -> LLMMessage
⋮----
"""
        Convert a Message to an LLMMessage
        """
⋮----
content=msg.content[0].text.value,  # type: ignore
⋮----
def _update_messages_hash(self, msg: Message | LLMMessage) -> None
⋮----
"""
        Update the hash-state in the thread with the given message.
        """
⋮----
llm_msg = self.thread_msg_to_llm_msg(msg)
⋮----
llm_msg = msg
hash = self.thread.metadata["hash"]  # type: ignore
most_recent_msg = llm_msg.content
most_recent_role = llm_msg.role
hash = update_hash(hash, f"{most_recent_role}:{most_recent_msg}")
# TODO is this inplace?
⋮----
assert self.thread.metadata["hash"] == hash  # type: ignore
⋮----
def _maybe_create_thread(self, id: str | None = None) -> None
⋮----
"""Retrieve or create a thread if one does not exist,
        or retrieve it from cache"""
⋮----
cached = self._cache_thread_lookup()
⋮----
hash_key_str = (
hash_hex = update_hash(None, s=hash_key_str)
⋮----
assert self.thread.metadata["hash"] == hash_hex  # type: ignore
⋮----
def _maybe_create_assistant(self, id: str | None = None) -> None
⋮----
"""Retrieve or create an assistant if one does not exist,
        or retrieve it from cache"""
⋮----
cached = self._cache_assistant_lookup()
⋮----
def _get_run(self) -> Run
⋮----
"""Retrieve the run object associated with this thread and run,
        to see its latest status.
        """
⋮----
def _get_run_steps(self) -> List[RunStep]
⋮----
result = self.runs.steps.list(thread_id=self.thread.id, run_id=self.run.id)
⋮----
def _get_code_logs(self) -> List[Tuple[str, str]]
⋮----
"""
        Get list of input, output strings from code logs
        """
run_steps = self._get_run_steps()
# each step may have multiple tool-calls,
# each tool-call may have multiple outputs
tool_calls = [  # list of list of tool-calls
code_logs = []
for tcl in tool_calls:  # each tool-call-list
⋮----
io = tc.code_interpreter  # type: ignore
input = io.input
# TODO for CodeInterpreterOutputImage, there is no "logs"
# revisit when we handle images.
outputs = "\n\n".join(
⋮----
# return the reversed list, since they are stored in reverse chron order
⋮----
def _get_code_logs_str(self) -> str
⋮----
"""
        Get string representation of code logs
        """
code_logs = self._get_code_logs()
⋮----
def _add_thread_message(self, msg: str, role: Role) -> None
⋮----
"""
        Add a message with the given role to the thread.
        Args:
            msg (str): message to add
            role (Role): role of the message
        """
⋮----
# CACHING TRICK! Since the API only allows inserting USER messages,
# we prepend the role to the message, so that we can store ASSISTANT msgs
# as well! When the LLM sees the thread messages, they will contain
# the right sequence of alternating roles, so that it has no trouble
# responding when it is its turn.
msg = f"{role.value.upper()}: {msg}"
thread_msg = self.thread_messages.create(
⋮----
# We ALWAYS store user role since only user role allowed currently
⋮----
def _get_thread_messages(self, n: int = 20) -> List[LLMMessage]
⋮----
"""
        Get the last n messages in the thread, in cleaned-up form (LLMMessage).
        Args:
            n (int): number of messages to retrieve
        Returns:
            List[LLMMessage]: list of messages
        """
⋮----
result = self.thread_messages.list(
num = len(result.data)
if result.has_more and num < n:  # type: ignore
⋮----
thread_msgs = result.data
⋮----
# TODO: could be image, deal with it later
content=m.content[0].text.value,  # type: ignore
⋮----
"""
        Poll the run until it either:
        - EXITs the statuses specified in `until_not`, or
        - ENTERs the statuses specified in `until`, or
        """
⋮----
run = self._get_run()
⋮----
"""Async version of _wait_for_run"""
⋮----
def set_system_message(self, msg: str) -> None
⋮----
"""
        Override ChatAgent's method.
        The Task may use this method to set the system message
        of the chat assistant.
        """
⋮----
def _start_run(self) -> None
⋮----
"""
        Run the assistant on the thread.
        """
⋮----
def _run_result(self) -> LLMResponse
⋮----
"""Result from run completed on the thread."""
status = self._wait_for_run(
⋮----
async def _run_result_async(self) -> LLMResponse
⋮----
"""(Async) Result from run completed on the thread."""
status = await self._wait_for_run_async(
⋮----
def _process_run_result(self, status: RunStatus) -> LLMResponse
⋮----
"""Process the result of the run."""
function_call: LLMFunctionCall | None = None
response = ""
tool_id = ""
# IMPORTANT: FIRST save hash key to store result,
# before it gets updated with the response
⋮----
messages = self._get_thread_messages(n=1)
response = messages[0].content
# update hash to include the response.
⋮----
tool_calls = self._parse_run_required_action()
# pick the FIRST tool call with type "function"
tool_call_fn = [t for t in tool_calls if t.type == ToolType.FUNCTION][0]
# TODO Handling only first tool/fn call for now
# revisit later: multi-tools affects the task.run() loop.
function_call = tool_call_fn.function
tool_id = tool_call_fn.id
result = LLMResponse(
⋮----
usage=None,  # TODO
cached=False,  # TODO - revisit when able to insert Assistant responses
⋮----
def _parse_run_required_action(self) -> List[AssistantToolCall]
⋮----
"""
        Parse the required_action field of the run, i.e. get the list of tool calls.
        Currently only tool calls are supported.
        """
# see https://platform.openai.com/docs/assistants/tools/function-calling
⋮----
if run.status != RunStatus.REQUIRES_ACTION:  # type: ignore
⋮----
tool_calls = run.required_action.submit_tool_outputs.tool_calls
⋮----
def _submit_tool_outputs(self, msg: LLMMessage) -> None
⋮----
"""
        Submit the tool (fn) outputs to the run/thread
        """
⋮----
tool_outputs = [
# run enters queued, in_progress state after this
⋮----
tool_outputs=tool_outputs,  # type: ignore
⋮----
def process_citations(self, thread_msg: Message) -> None
⋮----
"""
        Process citations in the thread message.
        Modifies the thread message in-place.
        """
# could there be multiple content items?
# TODO content could be MessageContentImageFile; handle that later
annotated_content = thread_msg.content[0].text  # type: ignore
annotations = annotated_content.annotations
citations = []
# Iterate over the annotations and add footnotes
⋮----
# Replace the text with a footnote
⋮----
# Gather citations based on annotation attributes
⋮----
cited_file = self.client.files.retrieve(file_citation.file_id)
⋮----
cited_file = self.client.files.retrieve(file_path.file_id)
⋮----
# Note: File download functionality not implemented above for brevity
sep = "\n" if len(citations) > 0 else ""
⋮----
"""
        Preprocess message and return response if found in cache, else None.
        """
is_tool_output = False
⋮----
# note: to_LLMMessage returns a list of LLMMessage,
# which is allowed to have len > 1, in case the msg
# represents results of multiple (non-assistant) tool-calls.
# But for OAI Assistant, we only assume exactly one tool-call at a time.
# TODO look into multi-tools
llm_msg = ChatDocument.to_LLMMessage(message)[0]
tool_id = llm_msg.tool_id
⋮----
result_msg = f"Result for Tool_id {tool_id}: {llm_msg.content}"
⋮----
# add actual result of cached fn-call
⋮----
is_tool_output = True
# submit tool/fn result to the thread/run
⋮----
# We cannot ACTUALLY add this result to thread now
# since run is in `action_required` state,
# so we just update the message hash
⋮----
# add message to the thread
⋮----
# When message is None, the thread may have no user msgs,
# Note: system message is NOT placed in the thread by the OpenAI system.
⋮----
# check if we have cached the response.
# TODO: handle the case of structured result (fn-call, tool, etc)
response = self._cache_messages_lookup()
⋮----
# store the result in the thread so
# it looks like assistant produced it
⋮----
return response  # type: ignore
⋮----
# create a run for this assistant on this thread,
# i.e. actually "run"
⋮----
# DO NOT start a run if we submitted tool outputs,
# since submission of tool outputs resumes a run from
# status = "requires_action"
⋮----
# code from ChatAgent.llm_response_messages
⋮----
# add to cached tools list so we don't create an Assistant run
# in _llm_response_preprocess
⋮----
response_str = str(response.function_call)
⋮----
response_str = response.message
cache_str = "[red](cached)[/red]" if cached else ""
⋮----
cdoc = ChatDocument.from_LLMResponse(
# Note message.metadata.tool_ids may have been popped above
tool_ids = (
⋮----
"""
        Override ChatAgent's method: this is the main LLM response method.
        In the ChatAgent, this updates `self.message_history` and then calls
        `self.llm_response_messages`, but since we are relying on the Assistant API
        to maintain conversation state, this method is simpler: Simply start a run
        on the message-thread, and wait for it to complete.

        Args:
            message (Optional[str | ChatDocument], optional): message to respond to
                (if absent, the LLM response will be based on the
                instructions in the system_message). Defaults to None.
        Returns:
            Optional[ChatDocument]: LLM response
        """
response = self._llm_response_preprocess(message)
cached = True
⋮----
cached = False
response = self._run_result()
⋮----
"""
        Async version of llm_response.
        """
⋮----
response = await self._run_result_async()
⋮----
response = super().agent_response(msg)
⋮----
# When the agent response is to a tool message,
# we prefix it with "TOOL Result: " so that it is clear to the
# LLM that this is the result of the last TOOL;
# This ensures our caching trick works.
</file>

<file path="langroid/cachedb/redis_cachedb.py">
T = TypeVar("T", bound="RedisCache")
logger = logging.getLogger(__name__)
⋮----
class RedisCacheConfig(CacheDBConfig)
⋮----
"""Configuration model for RedisCache."""
⋮----
fake: bool = False
⋮----
class RedisCache(CacheDB)
⋮----
"""Redis implementation of the CacheDB."""
⋮----
_warned_password: bool = False
⋮----
def __init__(self, config: RedisCacheConfig)
⋮----
"""
        Initialize a RedisCache with the given config.

        Args:
            config (RedisCacheConfig): The configuration to use.
        """
⋮----
self.pool = fakeredis.FakeStrictRedis()  # type: ignore
⋮----
redis_password = os.getenv("REDIS_PASSWORD")
redis_host = os.getenv("REDIS_HOST") or None
redis_port = os.getenv("REDIS_PORT")
⋮----
self.pool = fakeredis.FakeStrictRedis()  # type: ignore
⋮----
self.pool = redis.ConnectionPool(  # type: ignore
⋮----
@contextmanager  # type: ignore
@contextmanager  # type: ignore
    def redis_client(self) -> AbstractContextManager[T]:  # type: ignore
⋮----
"""Cleanly open and close a redis client, avoids max clients exceeded error"""
⋮----
client: T = redis.Redis(connection_pool=self.pool)
⋮----
def close_all_connections(self) -> None
⋮----
with self.redis_client() as client:  # type: ignore
clients = client.client_list()
⋮----
def clear(self) -> None
⋮----
"""Clear keys from current db."""
⋮----
def clear_all(self) -> None
⋮----
"""Clear all keys from all dbs."""
⋮----
def store(self, key: str, value: Any) -> None
⋮----
"""
        Store a value associated with a key.

        Args:
            key (str): The key under which to store the value.
            value (Any): The value to store.
        """
⋮----
def retrieve(self, key: str) -> Dict[str, Any] | str | None
⋮----
"""
        Retrieve the value associated with a key.

        Args:
            key (str): The key to retrieve the value for.

        Returns:
            dict|str|None: The value associated with the key.
        """
⋮----
value = client.get(key)
⋮----
def delete_keys(self, keys: List[str]) -> None
⋮----
"""
        Delete the keys from the cache.

        Args:
            keys (List[str]): The keys to delete.
        """
⋮----
def delete_keys_pattern(self, pattern: str) -> None
⋮----
"""
        Delete the keys matching the pattern from the cache.

        Args:
            prefix (str): The pattern to match.
        """
⋮----
keys = client.keys(pattern)
</file>

<file path="langroid/embedding_models/base.py">
class EmbeddingModelsConfig(BaseSettings)
⋮----
model_type: str = "openai"
dims: int = 0
context_length: int = 512
batch_size: int = 512
⋮----
class EmbeddingModel(ABC)
⋮----
"""
    Abstract base class for an embedding model.
    """
⋮----
def clone(self) -> "EmbeddingModel"
⋮----
"""
        Return a copy of this embedding model suitable for use in cloned agents.
        Default behaviour attempts to deep-copy the model configuration and
        instantiate a fresh model of the same type; if that is not possible,
        the original instance is reused.
        """
config = getattr(self, "config", None)
⋮----
return type(self)(config.model_copy(deep=True))  # type: ignore[call-arg]
⋮----
@classmethod
    def create(cls, config: EmbeddingModelsConfig) -> "EmbeddingModel"
⋮----
@abstractmethod
    def embedding_fn(self) -> EmbeddingFunction
⋮----
@property
@abstractmethod
    def embedding_dims(self) -> int
⋮----
def similarity(self, text1: str, text2: str) -> float
⋮----
"""Compute cosine similarity between two texts."""
</file>

<file path="langroid/embedding_models/models.py">
AzureADTokenProvider = Callable[[], str]
⋮----
class OpenAIEmbeddingsConfig(EmbeddingModelsConfig)
⋮----
model_type: str = "openai"
model_name: str = "text-embedding-3-small"
api_key: str = ""
api_base: Optional[str] = None
organization: str = ""
dims: int = 1536
context_length: int = 8192
langdb_params: LangDBParams = LangDBParams()
⋮----
model_config = SettingsConfigDict(env_prefix="OPENAI_")
⋮----
class AzureOpenAIEmbeddingsConfig(EmbeddingModelsConfig)
⋮----
model_type: str = "azure-openai"
⋮----
api_base: str = ""
deployment_name: Optional[str] = None
# api_version defaulted to 2024-06-01 as per https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/embeddings?tabs=python-new
# change this to required  supported version
api_version: Optional[str] = "2024-06-01"
# TODO: Add auth support for Azure OpenAI via AzureADTokenProvider
azure_ad_token: Optional[str] = None
azure_ad_token_provider: Optional[AzureADTokenProvider] = None
⋮----
model_config = SettingsConfigDict(env_prefix="AZURE_OPENAI_")
⋮----
class SentenceTransformerEmbeddingsConfig(EmbeddingModelsConfig)
⋮----
model_type: str = "sentence-transformer"
model_name: str = "BAAI/bge-large-en-v1.5"
context_length: int = 512
data_parallel: bool = False
# Select device (e.g. "cuda", "cpu") when data parallel is disabled
device: Optional[str] = None
# Select devices when data parallel is enabled
devices: Optional[list[str]] = None
⋮----
class FastEmbedEmbeddingsConfig(EmbeddingModelsConfig)
⋮----
"""Config for qdrant/fastembed embeddings,
    see here: https://github.com/qdrant/fastembed
    """
⋮----
model_type: str = "fastembed"
model_name: str = "BAAI/bge-small-en-v1.5"
batch_size: int = 256
cache_dir: Optional[str] = None
threads: Optional[int] = None
parallel: Optional[int] = None
additional_kwargs: Dict[str, Any] = {}
⋮----
class LlamaCppServerEmbeddingsConfig(EmbeddingModelsConfig)
⋮----
context_length: int = 2048
batch_size: int = 2048
⋮----
class GeminiEmbeddingsConfig(EmbeddingModelsConfig)
⋮----
model_type: str = "gemini"
model_name: str = "models/text-embedding-004"
⋮----
dims: int = 768
batch_size: int = 512
⋮----
class EmbeddingFunctionCallable
⋮----
"""
    A callable class designed to generate embeddings for a list of texts using
    the OpenAI or Azure OpenAI API, with automatic retries on failure.

    Attributes:
        embed_model (EmbeddingModel): An instance of EmbeddingModel that provides
               configuration and utilities for generating embeddings.

    Methods:
        __call__(input: List[str]) -> Embeddings: Generate embeddings for
                                a list of input texts.
    """
⋮----
def __init__(self, embed_model: EmbeddingModel, batch_size: int = 512)
⋮----
"""
        Initialize the EmbeddingFunctionCallable with a specific model.

        Args:
            model ( OpenAIEmbeddings or AzureOpenAIEmbeddings): An instance of
                            OpenAIEmbeddings or AzureOpenAIEmbeddings to use for
                            generating embeddings.
            batch_size (int): Batch size
        """
⋮----
def __call__(self, input: List[str]) -> Embeddings
⋮----
"""
        Generate embeddings for a given list of input texts using the OpenAI API,
        with retries on failure.

        This method:
        - Truncates each text in the input list to the model's maximum context length.
        - Processes the texts in batches to generate embeddings efficiently.
        - Automatically retries the embedding generation process with exponential
        backoff in case of failures.

        Args:
            input (List[str]): A list of input texts to generate embeddings for.

        Returns:
            Embeddings: A list of embedding vectors corresponding to the input texts.
        """
embeds = []
⋮----
# Truncate texts to context length while preserving text format
truncated_texts = self.embed_model.truncate_texts(input)
⋮----
# Process in batches
⋮----
result = self.embed_model.client.embeddings.create(
⋮----
input=batch, model=self.embed_model.config.model_name  # type: ignore
⋮----
batch_embeds = [d.embedding for d in result.data]
⋮----
embeds = self.embed_model.model.encode_multi_process(
⋮----
batch_embeds = self.embed_model.model.encode(
⋮----
).tolist()  # type: ignore
⋮----
embeddings = self.embed_model.model.embed(
⋮----
embeds = [embedding.tolist() for embedding in embeddings]
⋮----
tokenized_text = self.embed_model.tokenize_string(input_string)
⋮----
gen_embedding = self.embed_model.generate_embedding(
⋮----
embeds = self.embed_model.generate_embeddings(input)
⋮----
class OpenAIEmbeddings(EmbeddingModel)
⋮----
def __init__(self, config: OpenAIEmbeddingsConfig = OpenAIEmbeddingsConfig())
⋮----
# Check if using LangDB
⋮----
project_id = self.config.langdb_params.project_id
⋮----
model_for_tokenizer = self.config.model_name
⋮----
def truncate_texts(self, texts: List[str]) -> List[str] | List[List[int]]
⋮----
"""
        Truncate texts to the embedding model's context length.
        TODO: Maybe we should show warning, and consider doing T5 summarization?
        """
truncated_tokens = [
⋮----
# LangDB embedding endpt only works with strings, not tokens
⋮----
def embedding_fn(self) -> Callable[[List[str]], Embeddings]
⋮----
@property
    def embedding_dims(self) -> int
⋮----
class AzureOpenAIEmbeddings(EmbeddingModel)
⋮----
"""
    Azure OpenAI embeddings model implementation.
    """
⋮----
"""
        Initializes Azure OpenAI embeddings model.

        Args:
            config: Configuration for Azure OpenAI embeddings model.
        Raises:
            ValueError: If required Azure config values are not set.
        """
⋮----
"""Get the embedding function for Azure OpenAI.

        Returns:
            Callable that generates embeddings for input texts.
        """
⋮----
STEC = SentenceTransformerEmbeddingsConfig
⋮----
class SentenceTransformerEmbeddings(EmbeddingModel)
⋮----
def __init__(self, config: STEC = STEC())
⋮----
# this is an "extra" optional dependency, so we import it here
⋮----
self.config.devices  # type: ignore
⋮----
dims = self.model.get_sentence_embedding_dimension()
⋮----
return dims  # type: ignore
⋮----
class FastEmbedEmbeddings(EmbeddingModel)
⋮----
def __init__(self, config: FastEmbedEmbeddingsConfig = FastEmbedEmbeddingsConfig())
⋮----
@cached_property
    def embedding_dims(self) -> int
⋮----
embed_func = self.embedding_fn()
⋮----
LCSEC = LlamaCppServerEmbeddingsConfig
⋮----
class LlamaCppServerEmbeddings(EmbeddingModel)
⋮----
def __init__(self, config: LCSEC = LCSEC())
⋮----
def tokenize_string(self, text: str) -> List[int]
⋮----
data = {"content": text, "add_special": False, "with_pieces": False}
response = requests.post(self.tokenize_url, json=data)
⋮----
tokens = response.json()["tokens"]
⋮----
# not all(isinstance(token, (int, float)) for token in tokens):
⋮----
def detokenize_string(self, tokens: List[int]) -> str
⋮----
data = {"tokens": tokens}
response = requests.post(self.detokenize_url, json=data)
⋮----
text = response.json()["content"]
⋮----
def truncate_string_to_context_size(self, text: str) -> str
⋮----
tokens = self.tokenize_string(text)
tokens = tokens[: self.config.context_length]
⋮----
def generate_embedding(self, text: str) -> List[int | float]
⋮----
data = {"content": text}
response = requests.post(self.embedding_url, json=data)
⋮----
embeddings = self._extract_embedding(response.json())
⋮----
"""
        Extract embedding vector from llama.cpp response.

        Handles multiple response formats:
        1. Native /embedding: {"embedding": [floats]}
        2. Array format: [{"embedding": [floats]}]
        3. Double-nested: [{"embedding": [[floats]]}]
        4. OpenAI /v1/embeddings: {"data": [{"embedding": [floats]}]}
        5. Nested in dict: {"embedding": [[floats]]}

        Args:
            response_json: The JSON response from llama.cpp server

        Returns:
            List of floats representing the embedding vector

        Raises:
            ValueError: If response format is not recognized
        """
⋮----
# Try native format first: {"embedding": [floats]}
⋮----
embeddings = response_json["embedding"]
# Check if it's [floats]
⋮----
# Might be nested: {"embedding": [[floats]]}
⋮----
# Try OpenAI format: {"data": [{"embedding": [floats]}]}
⋮----
data = response_json["data"]
⋮----
embeddings = data[0]["embedding"]
⋮----
# Try array format: [{"embedding": [floats]}] or [{"embedding": [[floats]]}]
⋮----
first_item = response_json[0]
⋮----
embeddings = first_item["embedding"]
# Check if it's [floats]
⋮----
# Check if it's [[floats]]
⋮----
class GeminiEmbeddings(EmbeddingModel)
⋮----
def __init__(self, config: GeminiEmbeddingsConfig = GeminiEmbeddingsConfig())
⋮----
def generate_embeddings(self, texts: List[str]) -> List[List[float]]
⋮----
"""Generates embeddings for a list of input texts."""
all_embeddings: List[List[float]] = []
⋮----
result = self.client.models.embed_content(  # type: ignore[attr-defined]
⋮----
contents=batch,  # type: ignore
⋮----
# Extract .values from ContentEmbedding objects
⋮----
[emb.values for emb in result.embeddings]  # type: ignore
⋮----
def embedding_model(embedding_fn_type: str = "openai") -> EmbeddingModel
⋮----
"""
    Args:
        embedding_fn_type: Type of embedding model to use. Options are:
         - "openai",
         - "azure-openai",
         - "sentencetransformer", or
         - "fastembed".
            (others may be added in the future)
    Returns:
        EmbeddingModel: The corresponding embedding model class.
    """
⋮----
return OpenAIEmbeddings  # type: ignore
⋮----
return AzureOpenAIEmbeddings  # type: ignore
⋮----
return FastEmbedEmbeddings  # type: ignore
⋮----
return LlamaCppServerEmbeddings  # type: ignore
⋮----
return GeminiEmbeddings  # type: ignore
else:  # default sentence transformer
return SentenceTransformerEmbeddings  # type: ignore
</file>

<file path="langroid/language_models/client_cache.py">
"""
Client caching/singleton pattern for LLM clients to prevent connection pool exhaustion.
"""
⋮----
# Cache for client instances, keyed by hashed configuration parameters.
# Value is a tuple of (client instance, last_used_monotonic_seconds).
_client_cache: Dict[str, Tuple[Any, float]] = {}
_client_cache_lock = threading.RLock()
⋮----
# Keep track of clients for cleanup
_all_clients: weakref.WeakSet[Any] = weakref.WeakSet()
⋮----
def _get_cache_key(client_type: str, **kwargs: Any) -> str
⋮----
"""
    Generate a cache key from client type and configuration parameters.
    Uses the same approach as OpenAIGPT._cache_lookup for consistency.

    Args:
        client_type: Type of client (e.g., "openai", "groq", "cerebras")
        **kwargs: Configuration parameters (api_key, base_url, timeout, etc.)

    Returns:
        SHA256 hash of the configuration as a hex string
    """
# Convert kwargs to sorted string representation
sorted_kwargs_str = str(sorted(kwargs.items()))
⋮----
# Create raw key combining client type and sorted kwargs
raw_key = f"{client_type}:{sorted_kwargs_str}"
⋮----
# Hash the key for consistent length and to handle complex objects
hashed_key = hashlib.sha256(raw_key.encode()).hexdigest()
⋮----
def _get_cached_client(cache_key: str) -> Optional[Any]
⋮----
"""Get cached client and refresh its last-used timestamp.

    Must be called while holding ``_client_cache_lock``.
    """
entry = _client_cache.get(cache_key)
⋮----
def _store_client(cache_key: str, client: Any) -> None
⋮----
"""Store a client in the cache with the current timestamp.

    Must be called while holding ``_client_cache_lock``.
    """
⋮----
"""
    Get or create a singleton OpenAI client with the given configuration.

    Args:
        api_key: OpenAI API key
        base_url: Optional base URL for API
        organization: Optional organization ID
        timeout: Request timeout
        default_headers: Optional default headers
        http_client: Optional httpx.Client instance
        http_client_config: Optional config dict for creating httpx.Client

    Returns:
        OpenAI client instance
    """
⋮----
timeout = Timeout(timeout)
⋮----
# If http_client is provided directly, don't cache (complex object)
⋮----
client = OpenAI(
⋮----
# If http_client_config is provided, create client from config and cache
created_http_client = None
⋮----
created_http_client = Client(**http_client_config)
⋮----
cache_key = _get_cache_key(
⋮----
http_client_config=http_client_config,  # Include config in cache key
⋮----
cached_client = _get_cached_client(cache_key)
⋮----
http_client=created_http_client,  # Use the client created from config
⋮----
"""
    Get or create a singleton AsyncOpenAI client with the given configuration.

    Args:
        api_key: OpenAI API key
        base_url: Optional base URL for API
        organization: Optional organization ID
        timeout: Request timeout
        default_headers: Optional default headers
        http_client: Optional httpx.AsyncClient instance
        http_client_config: Optional config dict for creating httpx.AsyncClient

    Returns:
        AsyncOpenAI client instance
    """
⋮----
client = AsyncOpenAI(
⋮----
# If http_client_config is provided, create async client from config and cache
⋮----
created_http_client = AsyncClient(**http_client_config)
⋮----
def get_groq_client(api_key: str) -> Groq
⋮----
"""
    Get or create a singleton Groq client with the given configuration.

    Args:
        api_key: Groq API key

    Returns:
        Groq client instance
    """
cache_key = _get_cache_key("groq", api_key=api_key)
⋮----
client = Groq(api_key=api_key)
⋮----
def get_async_groq_client(api_key: str) -> AsyncGroq
⋮----
"""
    Get or create a singleton AsyncGroq client with the given configuration.

    Args:
        api_key: Groq API key

    Returns:
        AsyncGroq client instance
    """
cache_key = _get_cache_key("async_groq", api_key=api_key)
⋮----
client = AsyncGroq(api_key=api_key)
⋮----
def get_cerebras_client(api_key: str) -> Cerebras
⋮----
"""
    Get or create a singleton Cerebras client with the given configuration.

    Args:
        api_key: Cerebras API key

    Returns:
        Cerebras client instance
    """
cache_key = _get_cache_key("cerebras", api_key=api_key)
⋮----
client = Cerebras(api_key=api_key)
⋮----
def get_async_cerebras_client(api_key: str) -> AsyncCerebras
⋮----
"""
    Get or create a singleton AsyncCerebras client with the given configuration.

    Args:
        api_key: Cerebras API key

    Returns:
        AsyncCerebras client instance
    """
cache_key = _get_cache_key("async_cerebras", api_key=api_key)
⋮----
client = AsyncCerebras(api_key=api_key)
⋮----
def prune_cache(max_age_seconds: float) -> int
⋮----
"""
    Remove cache entries whose last-used time exceeds *max_age_seconds*.

    Evicted clients are **not** closed here because they may still be serving
    in-flight requests.  Cleanup is handled by the ``atexit`` handler and the
    garbage collector.

    Args:
        max_age_seconds: Maximum age (in seconds) for cache entries to keep.
            Entries older than this value are removed.

    Returns:
        Number of cache entries removed.
    """
⋮----
now = time.monotonic()
⋮----
stale_keys = [
⋮----
# Don't close evicted clients here — they may still be serving in-flight
# requests. The atexit handler and GC will clean them up.
⋮----
def _cleanup_clients() -> None
⋮----
"""
    Cleanup function to close all cached clients on exit.
    Called automatically via atexit.
    """
⋮----
# Check if close is a coroutine function (async)
⋮----
# For async clients, we can't await in atexit
# They will be cleaned up by the OS
⋮----
# Sync clients can be closed directly
⋮----
pass  # Ignore errors during cleanup
⋮----
# Register cleanup function to run on exit
⋮----
# For testing purposes
def _clear_cache() -> None
⋮----
"""Clear the client cache. Only for testing."""
</file>

<file path="langroid/parsing/agent_chats.py">
@no_type_check
def parse_message(msg: str) -> Tuple[str, str]
⋮----
"""
    Parse the intended recipient and content of a message.
    Message format is assumed to be TO[<recipient>]:<message>.
    The TO[<recipient>]: part is optional.

    Args:
        msg (str): message to parse

    Returns:
        str, str: task-name of intended recipient, and content of message
            (if recipient is not specified, task-name is empty string)

    """
⋮----
# Grammar definition
name = Word(alphanums)
to_start = Literal("TO[").suppress()
to_end = Literal("]:").suppress()
to_field = (to_start + name("name") + to_end) | Empty().suppress()
message = SkipTo(StringEnd())("text")
⋮----
# Parser definition
parser = to_field + message
⋮----
parsed = parser.parse_string(msg)
</file>

<file path="langroid/parsing/web_search.py">
"""
Utilities for web search.

NOTE: Using Google Search requires setting the GOOGLE_API_KEY and GOOGLE_CSE_ID
environment variables in your `.env` file, as explained in the
[README](https://github.com/langroid/langroid#gear-installation-and-setup).
"""
⋮----
class WebSearchResult
⋮----
"""
    Class representing a Web Search result, containing the title, link,
    summary and full content of the result.
    """
⋮----
"""
        Args:
            title (str): The title of the search result.
            link (str): The link to the search result.
            max_content_length (int): The maximum length of the full content.
            max_summary_length (int): The maximum length of the summary.
        """
⋮----
def get_summary(self) -> str
⋮----
def get_full_content(self) -> str
⋮----
# First check headers only to get content length and type
head_response: Response = requests.head(self.link, timeout=5)
content_type = head_response.headers.get("content-type", "").lower()
⋮----
# Skip large files
content_length = int(head_response.headers.get("content-length", 0))
if content_length > 5_000_000:  # 5MB limit
⋮----
# Skip non-HTML content types
⋮----
response: Response = requests.get(self.link, timeout=10)
⋮----
soup: BeautifulSoup = BeautifulSoup(response.text, "html.parser")
text = " ".join(soup.stripped_strings)
⋮----
def __str__(self) -> str
⋮----
def to_dict(self) -> Dict[str, str]
⋮----
def google_search(query: str, num_results: int = 5) -> List[WebSearchResult]
⋮----
api_key = os.getenv("GOOGLE_API_KEY")
cse_id = os.getenv("GOOGLE_CSE_ID")
service: Resource = build("customsearch", "v1", developerKey=api_key)
raw_results = (
⋮----
def metaphor_search(query: str, num_results: int = 5) -> List[WebSearchResult]
⋮----
"""
    Method that makes an API call by Metaphor client that queries
    the top num_results links that matches the query. Returns a list
    of WebSearchResult objects.

    Args:
        query (str): The query body that users wants to make.
        num_results (int): Number of top matching results that we want
            to grab
    """
⋮----
api_key = os.getenv("METAPHOR_API_KEY") or os.getenv("EXA_API_KEY")
⋮----
client = Metaphor(api_key=api_key)
⋮----
response = client.search(
raw_results = response.results
⋮----
def exa_search(query: str, num_results: int = 5) -> List[WebSearchResult]
⋮----
"""
    Method that makes an API call by Exa client that queries
    the top num_results links that matches the query. Returns a list
    of WebSearchResult objects.

    Args:
        query (str): The query body that users wants to make.
        num_results (int): Number of top matching results that we want
            to grab
    """
⋮----
api_key = os.getenv("EXA_API_KEY")
⋮----
client = Exa(api_key=api_key)
⋮----
def duckduckgo_search(query: str, num_results: int = 5) -> List[WebSearchResult]
⋮----
"""
    Method that makes an API call by DuckDuckGo client that queries
    the top `num_results` links that matche the query. Returns a list
    of WebSearchResult objects.

    Args:
        query (str): The query body that users wants to make.
        num_results (int): Number of top matching results that we want
            to grab
    """
⋮----
search_results = [r for r in ddgs.text(query, max_results=num_results)]
⋮----
def tavily_search(query: str, num_results: int = 5) -> List[WebSearchResult]
⋮----
"""
    Method that makes an API call to Tavily API that queries
    the top `num_results` links that match the query. Returns a list
    of WebSearchResult objects.

    Args:
        query (str): The query body that users wants to make.
        num_results (int): Number of top matching results that we want
            to grab
    """
⋮----
api_key = os.getenv("TAVILY_API_KEY")
⋮----
client = TavilyClient(api_key=api_key)
response = client.search(query=query, max_results=num_results)
search_results = response["results"]
⋮----
def seltz_search(query: str, num_results: int = 5) -> List[WebSearchResult]
⋮----
"""
    Method that makes an API call to Seltz API that queries
    the top `num_results` results. Returns a list of WebSearchResult objects.

    Args:
        query (str): The query body that users wants to make.
        num_results (int): Number of top matching results that we want
            to grab
    """
⋮----
api_key = os.getenv("SELTZ_API_KEY")
⋮----
client = Seltz(api_key=api_key)
⋮----
results = []
⋮----
result = WebSearchResult(
⋮----
link=None,  # skip HTTP fetch; Seltz already provides content
</file>

<file path="langroid/utils/pandas_utils.py">
COMMON_USE_DF_METHODS = {
⋮----
POTENTIALLY_DANGEROUS_DF_METHODS = {
⋮----
WHITELISTED_DF_METHODS = COMMON_USE_DF_METHODS - POTENTIALLY_DANGEROUS_DF_METHODS
⋮----
BLOCKED_KW = {
MAX_CHAIN = 6
MAX_DEPTH = 25
NUMERIC_LIMIT = 1_000_000_000
⋮----
class UnsafeCommandError(ValueError)
⋮----
"""Raised when a command string violates security policy."""
⋮----
def _literal_ok(node: ast.AST) -> bool
⋮----
"""Return True if *node* is a safe literal (and within numeric limit)."""
⋮----
class CommandValidator(ast.NodeVisitor)
⋮----
"""AST walker that enforces the security policy."""
⋮----
# Comparison operators we allow
ALLOWED_CMPOP = (ast.Gt, ast.GtE, ast.Lt, ast.LtE, ast.Eq, ast.NotEq)
⋮----
# Arithmetic operators we allow (power ** intentionally omitted)
ALLOWED_BINOP = (ast.Add, ast.Sub, ast.Mult, ast.Div, ast.FloorDiv, ast.Mod)
ALLOWED_UNARY = (ast.UAdd, ast.USub)
⋮----
# Node whitelist
ALLOWED_NODES = (
⋮----
def __init__(self, df_name: str = "df")
⋮----
# Depth guard
def generic_visit(self, node: ast.AST) -> None
⋮----
# Literal validation
def visit_Constant(self, node: ast.Constant) -> None
⋮----
# Arithmetic
def visit_BinOp(self, node: ast.BinOp) -> None
⋮----
def visit_UnaryOp(self, node: ast.UnaryOp) -> None
⋮----
# Comparisons
def visit_Compare(self, node: ast.Compare) -> None
⋮----
# Subscripts
def visit_Subscript(self, node: ast.Subscript) -> None
⋮----
# Attribute access
def visit_Attribute(self, node: ast.Attribute) -> None
⋮----
# Block dunder attributes to prevent access to __init__, __globals__, etc.
⋮----
# Block single underscore private attributes as well for defense in depth
⋮----
# Method calls
def visit_Call(self, node: ast.Call) -> None
⋮----
method = node.func.attr
⋮----
# kwarg / arg checks
⋮----
# Check numeric limits on literals; non-literals validated via generic_visit
⋮----
# Names
def visit_Name(self, node: ast.Name) -> None
⋮----
# Top-level gate
def visit(self, node: ast.AST) -> None
⋮----
def sanitize_command(expr: str, df_name: str = "df") -> str
⋮----
"""
    Validate *expr*; return it unchanged if it passes all rules,
    else raise UnsafeCommandError with the first violation encountered.
    """
tree = ast.parse(expr, mode="eval")
⋮----
def stringify(x: Any) -> str
⋮----
# Convert x to DataFrame if it is not one already
⋮----
df = x.to_frame()
⋮----
df = x
⋮----
# Truncate long text columns to 1000 characters
⋮----
# Limit to 10 rows
df = df.head(10)
⋮----
# Convert to string
return df.to_string(index=False)  # type: ignore
</file>

<file path="langroid/utils/pydantic_utils.py">
logger = logging.getLogger(__name__)
⋮----
"""Flatten a nested dictionary, using a separator in the keys.
    Useful for pydantic_v1 models with nested fields -- first use
        dct = mdl.model_dump()
    to get a nested dictionary, then use this function to flatten it.
    """
items: List[Tuple[str, Any]] = []
⋮----
new_key = f"{parent_key}{sep}{k}" if parent_key else k
⋮----
def has_field(model_class: Type[BaseModel], field_name: str) -> bool
⋮----
"""Check if a Pydantic model class has a field with the given name."""
⋮----
def _recursive_purge_dict_key(d: Dict[str, Any], k: str) -> None
⋮----
"""Remove a key from a dictionary recursively"""
⋮----
"""
    Given a possibly nested Pydantic class, return a flattened version of it,
    by constructing top-level fields, whose names are formed from the path
    through the nested structure, separated by double underscores.

    This version ignores inherited defaults, so it is incomplete.
    But retaining it as it is simpler and may be useful in some cases.
    The full version is `flatten_pydantic_model`, see below.

    Args:
        model (Type[BaseModel]): The Pydantic model to flatten.
        base_model (Type[BaseModel], optional): The base model to use for the
            flattened model. Defaults to BaseModel.

    Returns:
        Type[BaseModel]: The flattened Pydantic model.
    """
⋮----
flattened_fields: Dict[str, Tuple[Any, ...]] = {}
models_to_process = [(model, "")]
⋮----
new_prefix = (
⋮----
flattened_name = f"{current_prefix}{name}"
⋮----
"""
    Given a possibly nested Pydantic class, return a flattened version of it,
    by constructing top-level fields, whose names are formed from the path
    through the nested structure, separated by double underscores.

    Args:
        model (Type[BaseModel]): The Pydantic model to flatten.
        base_model (Type[BaseModel], optional): The base model to use for the
            flattened model. Defaults to BaseModel.

    Returns:
        Type[BaseModel]: The flattened Pydantic model.
    """
⋮----
flattened_fields: Dict[str, Any] = {}
⋮----
field_type = field.annotation if hasattr(field, "annotation") else field
⋮----
def get_field_names(model: Type[BaseModel]) -> List[str]
⋮----
"""Get all field names from a possibly nested Pydantic model."""
mdl = flatten_pydantic_model(model)
fields = list(mdl.model_fields.keys())
# fields may be like a__b__c , so we only want the last part
⋮----
"""
    Generates a JSON schema for a Pydantic model,
    with options to exclude specific fields.

    This function traverses the Pydantic model's fields, including nested models,
    to generate a dictionary representing the JSON schema. Fields specified in
    the exclude list will not be included in the generated schema.

    Args:
        model (Type[BaseModel]): The Pydantic model class to generate the schema for.
        exclude (List[str]): A list of string field names to be excluded from the
                             generated schema. Defaults to an empty list.

    Returns:
        Dict[str, Any]: A dictionary representing the JSON schema of the provided model,
                        with specified fields excluded.
    """
⋮----
output: Dict[str, Any] = {}
⋮----
continue  # Skip excluded fields
⋮----
# Recursively generate schema for nested models
⋮----
# Represent the type as a string here
⋮----
# Fallback for complex types
⋮----
# Non-model type, return a simplified representation
⋮----
"""
    Given a possibly nested Pydantic instance, return a flattened version of it,
    as a dict where nested traversal paths are translated to keys a__b__c.

    Args:
        instance (BaseModel): The Pydantic instance to flatten.
        prefix (str, optional): The prefix to use for the top-level fields.
        force_str (bool, optional): Whether to force all values to be strings.

    Returns:
        Dict[str, Any]: The flattened dict.

    """
flat_data: Dict[str, Any] = {}
⋮----
# Assuming nested pydantic model will be a dict here
⋮----
# Get field info from model_fields
field_info = instance.model_fields[name]
# Try to get the nested model type from field annotation
field_type = (
⋮----
nested_flat_data = flatten_pydantic_instance(
⋮----
# Skip non-Pydantic nested fields for safety
⋮----
def extract_fields(doc: BaseModel, fields: List[str]) -> Dict[str, Any]
⋮----
"""
    Extract specified fields from a Pydantic object.
    Supports dotted field names, e.g. "metadata.author".
    Dotted fields are matched exactly according to the corresponding path.
    Non-dotted fields are matched against the last part of the path.
    Clashes ignored.
    Args:
        doc (BaseModel): The Pydantic object.
        fields (List[str]): The list of fields to extract.

    Returns:
        Dict[str, Any]: A dictionary of field names and values.

    """
⋮----
def get_value(obj: BaseModel, path: str) -> Any | None
⋮----
obj = getattr(obj, part)
⋮----
def traverse(obj: BaseModel, result: Dict[str, Any], prefix: str = "") -> None
⋮----
key = f"{prefix}.{k}" if prefix else k
⋮----
result: Dict[str, Any] = {}
⋮----
# Extract values for dotted field names and use last part as key
⋮----
value = get_value(doc, field)
⋮----
key = field.split(".")[-1]
⋮----
# Traverse the object to get non-dotted fields
all_fields: Dict[str, Any] = {}
⋮----
# Add non-dotted fields to the result.
# Prefer top-level attributes (e.g. doc.title) over nested ones
# (e.g. metadata.title) to avoid default metadata values overwriting
# real top-level fields.
⋮----
direct_val = getattr(doc, field)
⋮----
"""
    Given a flattened version of a nested dict, reconstruct the nested dict.
    Field names in the flattened dict are assumed to be of the form
    "field1__field2__field3", going from top level down.

    Args:
        flat_data (Dict[str, Any]): The flattened dict.
        sub_dict (str, optional): The name of the sub-dict to extract from the
            flattened dict. Defaults to "" (extract the whole dict).

    Returns:
        Dict[str, Any]: The nested dict.

    """
nested_data: Dict[str, Any] = {}
⋮----
keys = key.split("__")
d = nested_data
⋮----
d = d.setdefault(k, {})
⋮----
if sub_dict != "":  # e.g. "payload"
nested_data = nested_data[sub_dict]
⋮----
"""Flattened dict with a__b__c style keys -> nested dict -> pydantic object"""
nested_data = nested_dict_from_flat(flat_data, sub_dict)
⋮----
original_values = {}
⋮----
# Save original value
⋮----
# Raise error for non-existent field
⋮----
# Handle validation error
⋮----
# Restore original values
⋮----
T = TypeVar("T", bound=BaseModel)
⋮----
@contextmanager
def temp_params(config: T, field: str, temp: T) -> Generator[None, None, None]
⋮----
"""Context manager to temporarily override `field` in a `config`"""
original_vals = getattr(config, field)
⋮----
# Apply temporary settings
⋮----
# Revert to original settings
⋮----
def numpy_to_python_type(numpy_type: Type[Any]) -> Type[Any]
⋮----
"""Converts a numpy data type to its Python equivalent."""
type_mapping = {
⋮----
# Add other numpy types as necessary
⋮----
def dataframe_to_pydantic_model(df: pd.DataFrame) -> Type[BaseModel]
⋮----
"""Make a Pydantic model from a dataframe."""
fields = {col: (type(df[col].iloc[0]), ...) for col in df.columns}
return create_model("DataFrameModel", __base__=BaseModel, **fields)  # type: ignore
⋮----
def dataframe_to_pydantic_objects(df: pd.DataFrame) -> List[BaseModel]
⋮----
"""Make a list of Pydantic objects from a dataframe."""
Model = dataframe_to_pydantic_model(df)
⋮----
def first_non_null(series: pd.Series) -> Any | None
⋮----
"""Find the first non-null item in a pandas Series."""
⋮----
"""
    Make a subclass of Document from a dataframe.

    Args:
        df (pd.DataFrame): The dataframe.
        content (str): The name of the column containing the content,
            which will map to the Document.content field.
        metadata (List[str]): A list of column names containing metadata;
            these will be included in the Document.metadata field.
        exclude (List[str]): A list of column names to exclude from the model.
            (e.g. "vector" when lance is used to add an embedding vector to the df)

    Returns:
        Type[BaseModel]: A pydantic model subclassing Document.
    """
⋮----
# Remove excluded columns
df = df.drop(columns=exclude, inplace=False)
# Check if metadata_cols is empty
⋮----
# Define fields for the dynamic subclass of DocMetaData
metadata_fields = {
⋮----
None,  # Optional[numpy_to_python_type(type(first_non_null(df[col])))],
⋮----
DynamicMetaData = create_model(  # type: ignore
⋮----
# Use the base DocMetaData class directly
DynamicMetaData = DocMetaData
⋮----
# Define additional top-level fields for DynamicDocument
additional_fields = {
⋮----
None,  # Optional[numpy_to_python_type(type(first_non_null(df[col])))],
⋮----
# Create a dynamic subclass of Document
DynamicDocumentFields = {
DynamicDocument = create_model(  # type: ignore
⋮----
content_val = row[content] if (content and content in row) else ""
metadata_values = (
additional_values = {
metadata = DynamicMetaData(**metadata_values)
⋮----
# Bind the method to the class
⋮----
return DynamicDocument  # type: ignore
⋮----
"""
    Make a list of Document objects from a dataframe.
    Args:
        df (pd.DataFrame): The dataframe.
        content (str): The name of the column containing the content,
            which will map to the Document.content field.
        metadata (List[str]): A list of column names containing metadata;
            these will be included in the Document.metadata field.
        doc_cls (Type[BaseModel], optional): A Pydantic model subclassing
            Document. Defaults to None.
    Returns:
        List[Document]: The list of Document objects.
    """
Model = doc_cls or dataframe_to_document_model(df, content, metadata)
docs = [
⋮----
Model.from_df_row(row, content, metadata)  # type: ignore
⋮----
def extra_metadata(document: Document, doc_cls: Type[Document] = Document) -> List[str]
⋮----
"""
    Checks for extra fields in a document's metadata that are not defined in the
    original metadata schema.

    Args:
        document (Document): The document instance to check for extra fields.
        doc_cls (Type[Document]): The class type derived from Document, used
            as a reference to identify extra fields in the document's metadata.

    Returns:
        List[str]: A list of strings representing the keys of the extra fields found
        in the document's metadata.
    """
# Convert metadata to dict, including extra fields.
metadata_fields = set(document.metadata.model_dump().keys())
⋮----
# Get defined fields in the metadata of doc_cls
metadata_field = doc_cls.model_fields["metadata"]
metadata_type = (
⋮----
defined_fields = set(metadata_type.model_fields.keys())
⋮----
defined_fields = set()
⋮----
# Identify extra fields not in defined fields.
extra_fields = list(metadata_fields - defined_fields)
⋮----
def extend_document_class(d: Document) -> Type[Document]
⋮----
"""Generates a new pydantic class based on a given document instance.

    This function dynamically creates a new pydantic class with additional
    fields based on the "extra" metadata fields present in the given document
    instance. The new class is a subclass of the original Document class, with
    the original metadata fields retained and extra fields added as normal
    fields to the metadata.

    Args:
        d: An instance of the Document class.

    Returns:
        A new subclass of the Document class that includes the additional fields
        found in the metadata of the given document instance.
    """
# Extract the fields from the original metadata class, including types,
# correctly handling special types like List[str].
original_metadata_fields = {
# Extract extra fields from the metadata instance with their types
extra_fields = {
⋮----
# Combine original and extra fields for the new metadata class
combined_fields = {**original_metadata_fields, **extra_fields}
⋮----
# Create a new metadata class with combined fields
NewMetadataClass = create_model(  # type: ignore
# NewMetadataClass.__config__.arbitrary_types_allowed = True
⋮----
# Create a new document class using the new metadata class
NewDocumentClass = create_model(
⋮----
class PydanticWrapper(BaseModel)
⋮----
value: Any
⋮----
def get_pydantic_wrapper(value_type: type) -> type[PydanticWrapper]
⋮----
class WrappedValue(PydanticWrapper)
⋮----
value: value_type  # type: ignore
</file>

<file path="langroid/vector_store/qdrantdb.py">
logger = logging.getLogger(__name__)
⋮----
T = TypeVar("T")
⋮----
def from_optional(x: Optional[T], default: T) -> T
⋮----
def is_valid_uuid(uuid_to_test: str) -> bool
⋮----
"""
    Check if a given string is a valid UUID.
    """
⋮----
uuid_obj = uuid.UUID(uuid_to_test)
⋮----
# Check for valid unsigned 64-bit integer
⋮----
int_value = int(uuid_to_test)
⋮----
class QdrantDBConfig(VectorStoreConfig)
⋮----
cloud: bool = True
docker: bool = False
collection_name: str | None = "temp"
storage_path: str = ".qdrant/data"
embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
use_sparse_embeddings: bool = False
sparse_embedding_model: str = "naver/splade-v3-distilbert"
sparse_limit: int = 3
distance: str = "cosine"
⋮----
class QdrantDB(VectorStore)
⋮----
def __init__(self, config: QdrantDBConfig = QdrantDBConfig())
⋮----
key = os.getenv("QDRANT_API_KEY")
url = os.getenv("QDRANT_API_URL")
⋮----
new_storage_path = config.storage_path + ".new"
⋮----
# Note: Only create collection if a non-null collection name is provided.
# This is useful to delay creation of vecdb until we have a suitable
# collection name (e.g. we could get it from the url or folder path).
⋮----
def clone(self) -> "QdrantDB"
⋮----
"""Create an independent Qdrant client when running against Qdrant Cloud."""
⋮----
cloned = super().clone()
⋮----
def close(self) -> None
⋮----
"""
        Close the QdrantDB client and release any resources (e.g., file locks).
        This is especially important for local storage to release the .lock file.
        """
⋮----
# QdrantLocal has a close method that releases the lock
⋮----
def __enter__(self) -> "QdrantDB"
⋮----
"""Context manager entry."""
⋮----
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None
⋮----
"""Context manager exit - ensure cleanup even if an exception occurred."""
⋮----
def clear_empty_collections(self) -> int
⋮----
coll_names = self.list_collections()
n_deletes = 0
⋮----
info = self.client.get_collection(collection_name=name)
⋮----
def clear_all_collections(self, really: bool = False, prefix: str = "") -> int
⋮----
"""Clear all collections with the given prefix."""
⋮----
coll_names = [
⋮----
n_empty_deletes = 0
n_non_empty_deletes = 0
⋮----
points_count = from_optional(info.points_count, 0)
⋮----
def list_collections(self, empty: bool = False) -> List[str]
⋮----
"""
        Returns:
            List of collection names that have at least one vector.

        Args:
            empty (bool, optional): Whether to include empty collections.
        """
⋮----
colls = list(self.client.get_collections())[0][1]
⋮----
counts = []
⋮----
def create_collection(self, collection_name: str, replace: bool = False) -> None
⋮----
"""
        Create a collection with the given name, optionally replacing an existing
            collection if `replace` is True.
        Args:
            collection_name (str): Name of the collection to create.
            replace (bool): Whether to replace an existing collection
                with the same name. Defaults to False.
        """
⋮----
coll = self.client.get_collection(collection_name=collection_name)
⋮----
vectors_config = {
sparse_vectors_config = None
⋮----
sparse_vectors_config = {
⋮----
collection_info = self.client.get_collection(collection_name=collection_name)
⋮----
level = logger.getEffectiveLevel()
⋮----
def get_sparse_embeddings(self, inputs: List[str]) -> List["SparseVector"]
⋮----
tokens = self.sparse_tokenizer(
output = self.sparse_model(**tokens)
vectors = torch.max(
sparse_embeddings = []
⋮----
cols = vec.nonzero().squeeze().cpu().tolist()
weights = vec[cols].cpu().tolist()
⋮----
def add_documents(self, documents: Sequence[Document]) -> None
⋮----
# Add id to metadata if not already present
⋮----
# Fix the ids due to qdrant finickiness
⋮----
colls = self.list_collections(empty=True)
⋮----
document_dicts = [doc.model_dump() for doc in documents]
embedding_vecs = self.embedding_fn([doc.content for doc in documents])
sparse_embedding_vecs = self.get_sparse_embeddings(
⋮----
ids = [self._to_int_or_uuid(d.id()) for d in documents]
# don't insert all at once, batch in chunks of b,
# else we get an API error
b = self.config.batch_size
⋮----
vectors: Dict[str, Embeddings | List[SparseVector]] = {
⋮----
coll_found: bool = False
⋮----
# poll until collection is ready
⋮----
coll_found = True
⋮----
def delete_collection(self, collection_name: str) -> None
⋮----
def _to_int_or_uuid(self, id: str) -> int | str
⋮----
int_val = int(id)
⋮----
# If doc_id is already a valid UUID, return it as is
⋮----
# Otherwise, generate a UUID from the doc_id
# Convert doc_id to string if it's not already
id_str = str(id)
⋮----
# Hash the document ID using SHA-1
hash_object = hashlib.sha1(id_str.encode())
hash_digest = hash_object.hexdigest()
⋮----
# Truncate or manipulate the hash to fit into a UUID (128 bits)
uuid_str = hash_digest[:32]
⋮----
# Format this string into a UUID format
formatted_uuid = uuid.UUID(uuid_str)
⋮----
def get_all_documents(self, where: str = "") -> List[Document]
⋮----
docs = []
offset = 0
filter = Filter() if where == "" else Filter.model_validate(json.loads(where))
⋮----
limit=10_000,  # try getting all at once, if not we keep paging
⋮----
self.config.document_class(**record.payload)  # type: ignore
⋮----
# ignore
⋮----
offset = next_page_offset  # type: ignore
⋮----
def get_documents_by_ids(self, ids: List[str]) -> List[Document]
⋮----
_ids = [self._to_int_or_uuid(id) for id in ids]
records = self.client.retrieve(
# Note the records may NOT be in the order of the ids,
# so we re-order them here.
id2payload = {record.id: record.payload for record in records}
ordered_payloads = [id2payload[id] for id in _ids if id in id2payload]
docs = [Document(**payload) for payload in ordered_payloads]  # type: ignore
⋮----
embedding = self.embedding_fn([text])[0]
# TODO filter may not work yet
⋮----
filter = Filter()
⋮----
filter = Filter.model_validate(json.loads(where))
requests = [
⋮----
sparse_embedding = self.get_sparse_embeddings([text])[0]
⋮----
search_result_lists: List[List[ScoredPoint]] = self.client.search_batch(
⋮----
search_result = [
⋮----
]  # 2D list -> 1D list
scores = [match.score for match in search_result if match is not None]
docs = [
⋮----
self.config.document_class(**(match.payload))  # type: ignore
⋮----
doc_score_pairs = list(zip(docs, scores))
max_score = max(ds[1] for ds in doc_score_pairs)
</file>

<file path="plugins/langroid/skills/add-pattern/SKILL.md">
---
name: add-pattern
description: Use this skill when you learn one or more design pattern(s) in the
  Langroid (multi) agent framework, and want to make a note for future reference
  for yourself. Use this either autonomously, or when asked by the user to record 
  a new pattern.
---

# add-pattern

## Instructions

When you learn a new Langroid design pattern, do the following:

1. Add an entry in the sibling `patterns/SKILL.md` file in the appropriate category
   section, containing a DESCRIPTION of the goal of the pattern (i.e. what it enables
   you to implement), accompanied by a `- Reference:` pointer to a markdown DOCUMENT
   in the `patterns/` directory.

   IMPORTANT - The DESCRIPTION should be clear enough that future YOU can effectively
   use it to MATCH design problems you may encounter in future.

2. In that DOCUMENT, describe the idea of the implementation along with code examples.
   Follow the format of existing pattern files (Problem, Solution, Complete Code
   Example, Key Points, When to Use).
</file>

<file path="plugins/langroid/skills/patterns/agent-handler-validation-with-state.md">
# Pattern: Validate Tool Output Against Agent State

## Problem

You have an agent that produces tool output, but you need to validate that output
against the input context before accepting it. For example:
- Ensuring placeholders like `{{differentiation}}` are preserved in edited text
- Verifying required fields aren't removed
- Checking that certain patterns from the input appear in the output

If validation fails, you want the LLM to automatically retry.

## Solution

1. Create a **custom agent class** that stores input context as state
2. Define a **handler method** on the agent (name matches tool's `request` field)
3. In the handler, **validate** tool output against stored state
4. Return **error string** for retry, or **AgentDoneTool** for success
5. Use `done_sequences=["T[ToolName], A"]` so handler runs before task terminates
   (use `["T, A"]` only if agent has a single unambiguous tool)

## Complete Code Example

```python
import langroid as lr
from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig
from langroid.agent.tool_message import ToolMessage
from langroid.agent.tools.orchestration import AgentDoneTool
from pydantic import Field


# Reserved content that must be preserved
RESERVED_PLACEHOLDERS = ["{{differentiation}}", "{{company_info}}"]


class LineReplacementTool(ToolMessage):
    """Tool for LLM to output replacement text."""
    request: str = "emit_line_replacement"
    purpose: str = "Output the replacement text for the specified lines"

    replacement_text: str = Field(..., description="The new text")
    explanation: str = Field(..., description="Brief explanation of the edit")


class LineEditorAgent(ChatAgent):
    """Editor agent that validates placeholder preservation."""

    def __init__(self, config: ChatAgentConfig):
        super().__init__(config)
        self.current_text: str = ""  # Set before task.run()

    def init_state(self):
        """Reset state between tasks."""
        super().init_state()
        self.current_text = ""

    def emit_line_replacement(self, msg: LineReplacementTool) -> str | AgentDoneTool:
        """
        Handler for LineReplacementTool. Validates placeholder preservation.

        Name matches the tool's `request` field exactly.
        """
        # Check if any reserved placeholder in original is missing from replacement
        for placeholder in RESERVED_PLACEHOLDERS:
            if placeholder in self.current_text:
                if placeholder not in msg.replacement_text:
                    # Return error string - LLM sees this and can retry
                    return (
                        f"ERROR: You removed the placeholder {placeholder}. "
                        f"This placeholder MUST be preserved exactly as-is. "
                        f"Please output the replacement again, keeping {placeholder} intact."
                    )

        # Validation passed - terminate task successfully
        # Return AgentDoneTool with the validated tool in the tools list
        return AgentDoneTool(tools=[msg])


def create_editor_agent(model: str) -> LineEditorAgent:
    """Create the editor agent with validation handler."""
    config = ChatAgentConfig(
        name="LineEditor",
        llm=lr.language_models.OpenAIGPTConfig(chat_model=model),
        system_message="""You are a precise technical editor.
You will receive text to edit along with instructions.
Output the replacement using the emit_line_replacement tool.
IMPORTANT: Preserve any {{...}} placeholders exactly as they appear.""",
    )
    agent = LineEditorAgent(config)
    agent.enable_message(LineReplacementTool)
    return agent


def apply_edit(current_text: str, instruction: str, model: str) -> LineReplacementTool | None:
    """Apply an edit with placeholder validation."""
    agent = create_editor_agent(model)

    # Store current text in agent state for handler to access
    agent.current_text = current_text

    # Use done_sequences so handler runs before task terminates
    # "T[ToolName], A" = Specific tool emitted, then Agent handles it
    # Use "T, A" only if agent has a single unambiguous tool
    task = lr.Task(
        agent,
        interactive=False,
        config=lr.TaskConfig(done_sequences=["T[LineReplacementTool], A"]),
    )[LineReplacementTool]

    prompt = f"""Edit this text:

{current_text}

Instruction: {instruction}

Use emit_line_replacement tool with your replacement."""

    # If handler returns error string, LLM retries automatically
    # If handler returns DoneTool, task terminates and we get the tool
    result: LineReplacementTool | None = task.run(prompt, turns=5)
    return result
```

## Key Points

1. **Handler method name = tool's `request` field**: If `request = "emit_line_replacement"`,
   define `def emit_line_replacement(self, msg)`

2. **Store context before task.run()**: Set `agent.current_text = ...` so handler can access it

3. **Return types control flow**:
   - `str` (error message) → Langroid sends to LLM, triggers retry
   - `AgentDoneTool(tools=[msg])` → Task terminates successfully with the tool
   - Note: Use `AgentDoneTool` (has `tools` field), NOT `DoneTool` (no `tools` field)

4. **done_sequences=["T[ToolName], A"]**: Ensures handler runs. Without this, task
   might exit immediately when tool is emitted, skipping validation. Use `["T, A"]`
   only when agent has a single unambiguous tool.

5. **init_state()**: Override to reset state between uses if agent is reused

## When to Use This Pattern

- LLM must preserve certain content (placeholders, markers, required fields)
- You need to validate output against input context
- Validation failure should trigger automatic retry
- Simple prompt instructions aren't reliable enough (small LLMs ignore them)
</file>

<file path="plugins/langroid/skills/patterns/agent-tool-handler-with-state.md">
# Stateful Tool Handler as Agent Method

## The Pattern

Instead of defining a `handle()` method inside the `ToolMessage` class, define a
method on the **agent** with the same name as the tool's `request` field. This
gives the handler access to agent state and resources.

## When to Use

- Handler needs to execute external operations (API calls, DB queries, shell cmds)
- Need to track state across retries (e.g., failure counter to limit retries)
- Handler needs access to agent-level resources (connections, configs, caches)
- Want Langroid's automatic retry loop: errors go back to LLM for self-correction

## Key Concepts

1. **Method name = `request` field**: If `request = "my_tool"`, define
   `def my_tool(self, msg: MyToolMessage)`

2. **Return types control flow**:
   - Return `str` (especially error messages) -> Langroid sends to LLM, can retry
   - Return `DoneTool(content="result")` -> Task terminates with this result

3. **State in `init_state()`**: Override `init_state()` to reset counters/state
   between uses. Called by `task.reset_all_sub_tasks()`.

## Example: Query Executor with Retry Limit

```python
from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig
from langroid.agent.tool_message import ToolMessage
from langroid.agent.tools.orchestration import DoneTool
from pydantic import Field
from typing import Union


class QueryTool(ToolMessage):
    """Tool for LLM to emit a query."""
    request: str = "execute_query"
    purpose: str = "Execute a database query"

    query: str = Field(..., description="The SQL query to execute")


class QueryExecutorAgent(ChatAgent):
    """Agent that executes queries with retry limiting."""

    def __init__(self, config: ChatAgentConfig, db_connection, max_retries: int = 3):
        super().__init__(config)
        self.db_connection = db_connection
        self.max_retries = max_retries
        self.failure_count = 0

    def init_state(self):
        """Reset state between tasks. Called by task.reset_all_sub_tasks()."""
        super().init_state()
        self.failure_count = 0

    def execute_query(self, msg: QueryTool) -> Union[str, DoneTool]:
        """Handler for QueryTool. Name matches request field."""
        try:
            result = self.db_connection.execute(msg.query)
            # Success - terminate task with result
            return DoneTool(content=str(result))

        except Exception as e:
            self.failure_count += 1

            if self.failure_count >= self.max_retries:
                # Give up after max retries
                return DoneTool(content="")  # Empty = failure

            # Return error string - Langroid sends to LLM for retry
            return f"Query failed with error: {e}\nPlease fix and try again."


# Usage
config = ChatAgentConfig(
    name="QueryAgent",
    system_message="You execute SQL queries. Use the execute_query tool.",
)
agent = QueryExecutorAgent(config, db_connection=my_db, max_retries=3)
agent.enable_message([QueryTool])

task = lr.Task(agent, interactive=False)
result = task.run("Run a query to get all users")
# result.content will be query results or empty string on failure
```

## Example: External API with Validation

```python
class APICallTool(ToolMessage):
    request: str = "call_api"
    purpose: str = "Call an external API endpoint"

    endpoint: str = Field(..., description="API endpoint path")
    payload: dict = Field(default_factory=dict, description="Request payload")


class APIAgent(ChatAgent):
    def __init__(self, config, api_client):
        super().__init__(config)
        self.api_client = api_client
        self.call_count = 0

    def init_state(self):
        super().init_state()
        self.call_count = 0

    def call_api(self, msg: APICallTool) -> Union[str, DoneTool]:
        """Handler matches 'call_api' request field."""
        # Validate before calling
        if not msg.endpoint.startswith("/"):
            return "Error: endpoint must start with '/'. Please fix."

        try:
            response = self.api_client.post(msg.endpoint, json=msg.payload)

            if response.status_code != 200:
                return f"API returned {response.status_code}: {response.text}"

            self.call_count += 1
            return DoneTool(content=response.json())

        except Exception as e:
            return f"API call failed: {e}. Check endpoint and payload."
```

## Integration with Batch Processing

When using `run_batch_tasks()`, each item gets a cloned agent with fresh state:

```python
from langroid.agent.batch import run_batch_tasks

base_task = lr.Task(agent, interactive=False)

# Each item gets a cloned agent - no state leakage between items
results = run_batch_tasks(
    base_task,
    items=["query1", "query2", "query3"],
    input_map=lambda q: f"Execute: {q}",
    output_map=lambda r: r.content if r else None,
    sequential=False,  # Run in parallel
    batch_size=10,
)
```

## Important Notes

1. The handler method receives the parsed `ToolMessage` object, not raw JSON
2. Langroid automatically deserializes the LLM's tool call into the ToolMessage
3. If handler returns a string, Langroid treats it as a response and continues
   the conversation (LLM sees it, can emit another tool call)
4. `DoneTool` signals task completion - the task's `run()` returns
5. For async handlers, define `async def my_tool(self, msg)` - Langroid handles it
</file>

<file path="plugins/langroid/skills/patterns/done-sequences-specific-tool.md">
# Pattern: Terminate Task on SPECIFIC Tool (done_sequences)

## Problem

You have an agent with multiple tools, but you only want the task to terminate
when ONE specific tool is called. Other tools should NOT trigger termination.

## Solution

Use `TaskConfig(done_sequences=["T[ToolName]"])` with the specific tool name.

### Two Variants

**Exit immediately on tool EMISSION:**
```python
task_config = lr.TaskConfig(
    done_sequences=["T[FinalAnswerTool]"]  # No ", A"
)
```
Task terminates as soon as the LLM emits `FinalAnswerTool`, before any handling.

**Exit after tool is HANDLED:**
```python
task_config = lr.TaskConfig(
    done_sequences=["T[FinalAnswerTool], A"]  # With ", A"
)
```
Task waits for the tool to be emitted AND for the agent to handle it before
terminating.

## Complete Code Example

```python
import langroid as lr
from langroid.agent.task import Task
from langroid.agent.tool_message import ToolMessage
from langroid.agent.chat_agent import ChatAgent, ChatAgentConfig


class SearchTool(ToolMessage):
    """Intermediate tool - should NOT trigger exit."""
    request: str = "search"
    purpose: str = "Search for information"
    query: str


class FinalAnswerTool(ToolMessage):
    """Final tool - SHOULD trigger exit."""
    request: str = "final_answer"
    purpose: str = "Provide the final answer"
    answer: str
    confidence: float


def create_agent() -> ChatAgent:
    config = ChatAgentConfig(
        name="ResearchAgent",
        llm=lr.language_models.OpenAIGPTConfig(chat_model="gpt-4o"),
        system_message="""
You are a research agent. Use the search tool to find information,
then use final_answer when you have enough to answer confidently.
""",
    )
    agent = ChatAgent(config)
    agent.enable_message(SearchTool)
    agent.enable_message(FinalAnswerTool)
    return agent


def research(question: str) -> str | None:
    agent = create_agent()

    # Only exit when FinalAnswerTool is used (SearchTool won't trigger exit)
    task_config = lr.TaskConfig(
        done_sequences=["T[FinalAnswerTool]"]
    )
    task = Task(agent, interactive=False, config=task_config)[FinalAnswerTool]

    # Agent can use SearchTool multiple times without exiting
    # Task only exits when FinalAnswerTool is emitted
    result: FinalAnswerTool | None = task.run(question, turns=15)

    if result:
        return result.answer
    return None
```

## DSL Syntax Reference

| Pattern | Meaning |
|---------|---------|
| `T` | Any tool |
| `T[ToolName]` | Specific tool by class name |
| `A` | Agent response (tool handling) |
| `C[pattern]` | Content matching regex pattern |
| `,` | Then (sequence of events) |

## Key Differences Between Variants

| Pattern | When it exits | Use case |
|---------|---------------|----------|
| `["T[Tool]"]` | Immediately on emission | Get tool output, no handling needed |
| `["T[Tool], A"]` | After emission + handling | Tool has side effects to complete |

## Complex Patterns

### Exit after two specific tools in sequence
```python
done_sequences=["T[SearchTool], A, T[AnalyzeTool], A"]
```

### Multiple exit conditions (OR logic)
```python
done_sequences=[
    "C[quit|exit|bye]",      # Exit if user says quit
    "T[FinalAnswerTool]"     # OR if FinalAnswerTool is used
]
```

### Exit only after tool AND specific content
```python
done_sequences=["T[CompletionTool], A, C[done|complete]"]
```

## When to Use This Pattern

- Agent has multiple tools but only ONE should trigger exit
- Other tools are intermediate steps that should NOT terminate the task
- You need fine-grained control over which tool ends the conversation

## Common Mistake

```python
# WRONG: Bracket notation does NOT filter which tools trigger exit
# It only specifies the RETURN TYPE
task = Task(agent, config=task_config)[FinalAnswerTool]
```

The bracket notation `[FinalAnswerTool]` specifies what type the task returns.
To control which tool TRIGGERS exit, you must use `done_sequences`.
</file>

<file path="plugins/langroid/skills/patterns/mcp-tool-integration.md">
# MCP Tool Integration Pattern

Enable Langroid agents to use tools from MCP (Model Context Protocol) servers,
such as Claude Code's file editing tools.

## Key Imports

```python
from fastmcp.client.transports import StdioTransport
from langroid.agent.tools.mcp import mcp_tool
from langroid.agent.tools.mcp.fastmcp_client import get_tools_async
import langroid as lr
```

## Setting Up the Transport

Connect to an MCP server via stdio (e.g., Claude Code):

```python
transport = StdioTransport(
    command="claude",
    args=["mcp", "serve"],
    env={},
)
```

## Option 1: Enable ALL Tools from MCP Server

Use `get_tools_async()` to fetch and enable all available tools:

```python
async def setup_agent_with_all_tools():
    all_tools = await get_tools_async(transport)

    agent = lr.ChatAgent(lr.ChatAgentConfig(
        system_message="You have access to file tools.",
        llm=lr.language_models.OpenAIGPTConfig(chat_model="gpt-4o"),
    ))

    agent.enable_message(all_tools)  # Enable all tools at once
    return agent
```

## Option 2: Enable SPECIFIC Tools (Preferred)

Use the `@mcp_tool` decorator to create ToolMessage subclasses for specific
tools. This gives you control over which tools are available and lets you
customize result handling.

```python
# Basic usage - just wrap the MCP tool
@mcp_tool(transport, "Read")
class ReadTool(lr.ToolMessage):
    async def handle_async(self):
        return await self.call_tool_async()


@mcp_tool(transport, "Edit")
class EditTool(lr.ToolMessage):
    async def handle_async(self):
        return await self.call_tool_async()


@mcp_tool(transport, "Write")
class WriteTool(lr.ToolMessage):
    async def handle_async(self):
        return await self.call_tool_async()


# Enable specific tools on agent
agent.enable_message(ReadTool)
agent.enable_message(EditTool)
agent.enable_message(WriteTool)
```

## Option 3: Custom Result Processing

Override `handle_async()` to transform MCP tool results before returning to LLM:

```python
@mcp_tool(transport, "Grep")
class GrepTool(lr.ToolMessage):
    async def handle_async(self):
        result = await self.call_tool_async()

        # Result may be tuple (text, files) or just text
        result_text, _files = result if isinstance(result, tuple) else (result, [])

        # Parse and transform the result
        import json
        try:
            data = json.loads(result_text)
            # Custom formatting...
            return f"Found {data.get('numMatches', 0)} matches:\n{data.get('content', '')}"
        except:
            return result_text
```

## Complete Example: File Editor Agent

```python
from fastmcp.client.transports import StdioTransport
from langroid.agent.tools.mcp import mcp_tool
import langroid as lr

transport = StdioTransport(
    command="claude",
    args=["mcp", "serve"],
    env={},
)


@mcp_tool(transport, "Read")
class ReadFileTool(lr.ToolMessage):
    async def handle_async(self):
        return await self.call_tool_async()


@mcp_tool(transport, "Edit")
class EditFileTool(lr.ToolMessage):
    async def handle_async(self):
        return await self.call_tool_async()


async def create_file_editor_agent():
    agent = lr.ChatAgent(lr.ChatAgentConfig(
        name="FileEditor",
        system_message="""You are a file editor. Use the Read tool to read files
        and the Edit tool to make changes.""",
        llm=lr.language_models.OpenAIGPTConfig(chat_model="gpt-4o"),
    ))

    agent.enable_message(ReadFileTool)
    agent.enable_message(EditFileTool)

    return agent


async def main():
    agent = await create_file_editor_agent()
    task = lr.Task(agent, interactive=False)

    result = await task.run_async(
        "Read the file proposal.md and fix any typos you find."
    )
    return result
```

## Server Factory Pattern (for Concurrency)

For concurrent usage, create fresh transports to avoid `ClosedResourceError`:

```python
def make_transport():
    return StdioTransport(
        command="claude",
        args=["mcp", "serve"],
        env={},
    )

# Use factory when creating tools for concurrent scenarios
@mcp_tool(make_transport, "Edit")  # Pass factory, not instance
class EditTool(lr.ToolMessage):
    async def handle_async(self):
        return await self.call_tool_async()
```

## Available Claude Code MCP Tools

Common tools exposed by Claude Code's MCP server:

- `Read` - Read file contents
- `Edit` - Edit file with old_string/new_string replacement
- `Write` - Write/create files
- `Grep` - Search with ripgrep
- `Glob` - Find files by pattern
- `Bash` - Execute shell commands
- `LS` - List directory contents
</file>

<file path="plugins/langroid/skills/patterns/run-batch-tasks.md">
# Batch Processing with run_batch_tasks()

## The Pattern

Use `run_batch_tasks()` to process multiple inputs through the same task/agent
logic concurrently. Each input gets a **cloned** task+agent with isolated state.

## When to Use

- Process many items (prompts, questions, documents) with the same agent logic
- Need parallelism without manual asyncio/threading complexity
- Need state isolation between items (no message history leakage)
- Want to avoid connection exhaustion from creating agents manually
- Need ordered results matching input order

## Key Functions

### `run_batch_tasks()` - Simple Case

```python
from langroid.agent.batch import run_batch_tasks

results = run_batch_tasks(
    task,                    # Base task to clone
    items,                   # List of items to process
    input_map=lambda x: x,   # Convert item -> prompt string
    output_map=lambda x: x,  # Convert result -> desired output
    sequential=False,        # False = parallel, True = sequential
    batch_size=10,           # Max concurrent tasks (None = unlimited)
    turns=-1,                # Max turns per task (-1 = unlimited)
)
```

### `run_batch_task_gen()` - Custom Task Generation

```python
from langroid.agent.batch import run_batch_task_gen

def task_gen(i: int) -> Task:
    """Generate a custom task for item at index i."""
    return base_task.clone(i)  # or create entirely new task

results = run_batch_task_gen(
    gen_task=task_gen,       # Function that creates task for each index
    items=items,
    input_map=lambda x: x,
    output_map=lambda x: x,
    sequential=False,
)
```

## How Cloning Works

When `run_batch_tasks()` processes each item, it calls `task.clone(i)`:

1. **Task cloning** (`Task.clone()`):
   - Creates new Task with name `{original}-{i}`
   - Calls `agent.clone(i)` for the agent

2. **Agent cloning** (`ChatAgent.clone()`):
   - Deep copies the config
   - Creates fresh agent with new message history
   - Copies tool definitions (shared, not duplicated)
   - Clones vector store client if present
   - Assigns unique agent ID

**Result**: Each item is processed by an isolated agent with no state leakage.

## Example: Analyze Multiple Code Files

```python
import langroid as lr
from langroid.agent.batch import run_batch_tasks

# Create base agent and task
agent = lr.ChatAgent(
    lr.ChatAgentConfig(
        name="CodeAnalyzer",
        llm=lr.language_models.OpenAIGPTConfig(chat_model="gpt-4"),
        system_message="Analyze code for security vulnerabilities.",
    )
)
agent.enable_message([VulnerabilityTool])
base_task = lr.Task(agent, interactive=False)

# Process multiple code files
code_files = [
    {"id": "file1", "code": "void foo() { strcpy(buf, input); }"},
    {"id": "file2", "code": "void bar() { strncpy(buf, input, sizeof(buf)); }"},
    {"id": "file3", "code": "void baz() { gets(buffer); }"},
]

results = run_batch_tasks(
    base_task,
    items=code_files,
    input_map=lambda f: f"Analyze this code:\n{f['code']}",
    output_map=lambda r: r.content if r else "ANALYSIS_FAILED",
    sequential=False,
    batch_size=5,  # Max 5 concurrent analyses
)

for file, result in zip(code_files, results):
    print(f"{file['id']}: {result}")
```

## Example: Q&A with Structured Output

```python
from langroid.agent.batch import run_batch_tasks

class AnswerTool(lr.ToolMessage):
    request: str = "answer"
    purpose: str = "Provide an answer"
    answer: str
    confidence: float

agent = lr.ChatAgent(config)
agent.enable_message([AnswerTool])

# Configure task to return tool directly
task = lr.Task(
    agent,
    interactive=False,
    config=lr.TaskConfig(done_if_tool=True)
)[AnswerTool]  # Bracket notation: task returns AnswerTool | None

questions = ["What is 2+2?", "Capital of France?", "Largest planet?"]

answers = run_batch_tasks(
    task,
    items=questions,
    input_map=lambda q: q,
    output_map=lambda tool: tool.answer if tool else "NO_ANSWER",
    sequential=False,
    batch_size=3,
)
# answers = ["4", "Paris", "Jupiter"]
```

## Example: With Stateful Agent Handler

Combining batch processing with stateful handlers (see pattern #2):

```python
class QueryAgent(lr.ChatAgent):
    def __init__(self, config, db_connection, max_retries=3):
        super().__init__(config)
        self.db = db_connection
        self.max_retries = max_retries
        self.failures = 0

    def init_state(self):
        super().init_state()
        self.failures = 0  # Reset per clone

    def execute_query(self, msg: QueryTool) -> str | DoneTool:
        try:
            result = self.db.execute(msg.query)
            return DoneTool(content=str(result))
        except Exception as e:
            self.failures += 1
            if self.failures >= self.max_retries:
                return DoneTool(content="")
            return f"Error: {e}. Fix and retry."

agent = QueryAgent(config, db_connection=my_db)
agent.enable_message([QueryTool])
base_task = lr.Task(agent, interactive=False)

# Each query gets a cloned agent with fresh failure counter
queries = ["SELECT * FROM users", "SELECT * FROM orders", ...]
results = run_batch_tasks(base_task, queries, ...)
```

## Parameters Reference

| Parameter | Type | Description |
|-----------|------|-------------|
| `task` | Task | Base task to clone for each item |
| `items` | List[T] | Items to process |
| `input_map` | Callable[[T], str] | Convert item to prompt |
| `output_map` | Callable[[Result], U] | Convert result to output |
| `sequential` | bool | True=one at a time, False=parallel |
| `batch_size` | int\|None | Max concurrent tasks (None=all) |
| `turns` | int | Max turns per task (-1=unlimited) |
| `handle_exceptions` | bool\|ExceptionHandling | How to handle errors |
| `max_cost` | float | Stop if cumulative cost exceeds |
| `max_tokens` | int | Stop if cumulative tokens exceed |

## Important Notes

1. **Order preserved**: Results list matches input items order
2. **Exceptions**: By default raised; use `handle_exceptions=RETURN_NONE` to continue
3. **Memory**: Each clone has separate message history - no accumulation
4. **Connections**: Cloned agents share underlying LLM client but have separate state
5. **Vector stores**: Each clone gets its own vector store client (same data, isolated state)
</file>

<file path="plugins/langroid/skills/patterns/task-return-tool.md">
# Pattern: Make Task Return a Specific ToolMessage Directly

## Problem

When an agent emits a ToolMessage, you need to extract it from the task result. The naive approach is to search through `task.agent.message_history` to find the tool, but this is **error-prone** and **inefficient**.

## Solution

Use **TaskConfig with `done_if_tool=True`** combined with **bracket notation** to make the task:
1. Terminate as soon as a tool is emitted
2. Return the tool directly (typed as `ToolClass | None`)

## Code Pattern

### Wrong Approach (searching message_history)

```python
from langroid.agent.task import Task

task = Task(agent, interactive=False)
result = task.run(prompt, turns=5)

# BAD: Searching message_history
pruned_classes = None
for msg in task.agent.message_history:
    if isinstance(msg, EmitPrunedModelTool):
        pruned_classes = msg.classes
        break

if not pruned_classes:
    print("❌ Agent did not use the tool")
    return 1
```

**Problems**:
- Iterating through entire message history
- Error-prone type checking with `isinstance`
- Can miss the tool if not searching correctly
- Not type-safe

### Correct Approach (TaskConfig + bracket notation)

```python
import langroid as lr
from langroid.agent.task import Task

# 1. Create TaskConfig with done_if_tool=True
task_config = lr.TaskConfig(done_if_tool=True)

# 2. Use bracket notation to specify return type
task = Task(agent, interactive=False, config=task_config)[EmitPrunedModelTool]

# 3. Run task - returns EmitPrunedModelTool | None
result: EmitPrunedModelTool | None = task.run(prompt, turns=5)

# 4. Check if tool was emitted
if not result:
    print("❌ Agent did not use the tool")
    return 1

# 5. Access tool data directly
pruned_classes = result.classes  # Type-safe!
```

**Benefits**:
- Task terminates immediately when tool is emitted (efficient)
- Return type is explicit and type-safe
- No need to search message_history
- Clean, readable code

## Key Components

### 1. TaskConfig(done_if_tool=True)

```python
task_config = lr.TaskConfig(done_if_tool=True)
```

This tells the task to **stop as soon as any tool is emitted**, rather than continuing for `turns` iterations.

### 2. Bracket Notation: `Task(...)[ToolClass]`

```python
task = Task(agent, interactive=False, config=task_config)[EmitPrunedModelTool]
```

The bracket notation **specifies the expected return type**:
- If the agent emits `EmitPrunedModelTool`, task returns it
- If the agent doesn't emit the tool, task returns `None`
- Return type is `EmitPrunedModelTool | None`

### 3. Type-Safe Result Handling

```python
result: EmitPrunedModelTool | None = task.run(prompt, turns=5)

if not result:
    # Agent didn't emit the tool
    handle_failure()
else:
    # Tool was emitted, access fields directly
    data = result.classes  # Type-safe attribute access
```

## Real-World Example

From `tools/prune_xsdata_models.py`:

```python
import langroid as lr
from langroid.agent.task import Task
from interop.agents.model_pruning_agent import (
    EmitPrunedModelTool,
    create_model_pruning_agent,
)

# Create agent
agent = create_model_pruning_agent(
    raw_generated_code=raw_content,
    reference_code=reference_code,
    target_entity="Aircraft",
    model="gpt-4o",
)

# Configure task to return tool directly
task_config = lr.TaskConfig(done_if_tool=True)
task = Task(agent, interactive=False, config=task_config)[EmitPrunedModelTool]

# Build prompt
prompt = f"""
Here is the raw xsdata-generated code for Aircraft:

```python
{raw_content[:50000]}
```

Please analyze this code and emit pruned class definitions using the tool.
"""

# Run task - returns tool or None
result: EmitPrunedModelTool | None = task.run(prompt, turns=5)

if not result:
    print("❌ Agent did not use the EmitPrunedModelTool")
    return 1

# Extract data from tool
pruned_classes = result.classes
print(f"✅ Agent produced {len(pruned_classes)} pruned classes")

# Use the data
for cls_def in pruned_classes:
    print(f"   • {cls_def.class_name}: {len(cls_def.fields)} fields")
```

## When to Use This Pattern

Use this pattern when:
- ✅ You expect the agent to emit a **specific tool** as its final output
- ✅ You want **type-safe access** to the tool data
- ✅ You want the task to **terminate immediately** when the tool is emitted
- ✅ The tool emission is the **primary goal** of the task (not intermediate step)

Don't use this pattern when:
- ❌ The agent might emit multiple different tools during conversation
- ❌ You need the full conversation history
- ❌ Tool emission is an intermediate step in a longer workflow

## Related Patterns

- **handle_llm_no_tool**: Use this in `ChatAgentConfig` to catch cases where the LLM doesn't use the tool
- **ToolMessage validation**: Use Pydantic models to ensure tool output is well-formed
- **Multi-turn tasks**: Combine with `turns` parameter for agents that need multiple attempts

## Common Mistakes

### Mistake 1: Forgetting `done_if_tool=True`

```python
# WRONG: Task will run for all turns even after tool is emitted
task = Task(agent)[EmitPrunedModelTool]
result = task.run(prompt, turns=5)  # Wastes turns!
```

**Fix**: Always use `TaskConfig(done_if_tool=True)`

### Mistake 2: Not checking for None

```python
# WRONG: Will crash if agent doesn't emit tool
result = task.run(prompt, turns=5)
pruned_classes = result.classes  # AttributeError if result is None!
```

**Fix**: Always check `if not result:` before accessing fields

### Mistake 3: Searching message_history instead

```python
# WRONG: Negates the entire point of bracket notation
result = task.run(prompt, turns=5)
for msg in task.agent.message_history:
    if isinstance(msg, EmitPrunedModelTool):
        # Why did you use bracket notation then?
```

**Fix**: Trust the bracket notation - result IS the tool

## Summary

**Pattern**: `Task(agent, config=TaskConfig(done_if_tool=True))[ToolClass]`

**Returns**: `ToolClass | None`

**Benefits**:
- Efficient (terminates immediately)
- Type-safe (explicit return type)
- Clean (no message_history iteration)
- Robust (can't miss the tool)

**Use when**: Tool emission is the primary goal of the task
</file>

<file path="PR_954_REVIEW.md">
# Review: PR #954 — Support Vertex AI for Gemini models

**Author:** @alexagr
**File changed:** `langroid/language_models/openai_gpt.py` (+5, -1)

## Summary

This PR adds support for Google Vertex AI's OpenAI Compatibility layer for Gemini
models. Vertex AI uses project-specific URLs (unlike the fixed
`generativelanguage.googleapis.com` URL used by Google's standard Gemini API), so
users need to specify a custom `api_base` in `OpenAIGPTConfig`.

The change modifies the `is_gemini` branch in `OpenAIGPT.__init__()` to respect
`config.api_base` when set, falling back to `GEMINI_BASE_URL` otherwise.

## Code Analysis

### Current code (line 593):
```python
self.api_base = GEMINI_BASE_URL
```

### Proposed change:
```python
if self.config.api_base:
    self.api_base = self.config.api_base
else:
    self.api_base = GEMINI_BASE_URL
```

### Correctness: PASS

The truthiness check on `self.config.api_base` correctly handles:
- `None` (default) → uses `GEMINI_BASE_URL` ✓
- `""` (empty string from env) → uses `GEMINI_BASE_URL` ✓
- A valid URL string → uses the custom URL ✓

The second commit (`c715dbc`) addressing empty string handling is implicitly
covered by the truthiness check, so no additional code was needed.

## Issues Found

### 1. Style inconsistency (Minor)

Other providers in the same file use the `or` pattern for the same logic:

```python
# ollama (line 503)
self.api_base = self.config.api_base or OLLAMA_BASE_URL

# vllm (line 512)
self.api_base = self.config.api_base or "http://localhost:8000/v1"

# litellm proxy (line 588)
self.api_base = self.config.litellm_proxy.api_base or self.api_base
```

**Recommendation:** Replace the 4-line `if/else` block with:
```python
self.api_base = self.config.api_base or GEMINI_BASE_URL
```

This is functionally identical, reduces the change to a single line, and is
consistent with the established codebase patterns.

### 2. No tests (Minor)

The PR does not include tests. While the change is small, a unit test verifying
that `api_base` is correctly set when `config.api_base` is provided (vs. when it
is `None`) would improve confidence, especially since this is a new integration
path (Vertex AI).

### 3. No documentation or usage example (Minor)

There is no documentation showing how to configure Vertex AI. A brief example
in the PR description or docs would help users:

```python
import langroid.language_models as lm

config = lm.OpenAIGPTConfig(
    chat_model="gemini/gemini-2.0-flash",
    api_base="https://{REGION}-aiplatform.googleapis.com/v1beta1/projects/{PROJECT_ID}/locations/{REGION}/endpoints/openapi",
)
```

### 4. Other providers could benefit from the same pattern (Observation)

The `glhf/`, `openrouter/`, and `deepseek/` branches also unconditionally set
their `api_base` without checking `config.api_base`. If there's value in allowing
custom endpoints for Gemini via Vertex AI, the same argument could apply to other
providers (e.g., self-hosted DeepSeek endpoints). This is out of scope for this PR
but worth noting for future consideration.

## Verdict

**Approve with minor suggestion.** The change is correct and solves a real need
for Vertex AI users. The only actionable suggestion is to simplify the `if/else`
to the `or` pattern for consistency:

```python
self.api_base = self.config.api_base or GEMINI_BASE_URL
```
</file>

<file path="PR_REVIEW_975.md">
# PR #975 Review: Remove traceback from OpenAI API error logs

**Author:** alexagr
**Branch:** `api_error_log` → `main`
**Changed file:** `langroid/language_models/openai_gpt.py` (+12, -0)

## Summary

This PR adds `except openai.APIError` handlers before the generic `except Exception`
blocks in the four public methods of `OpenAIGPT`: `generate`, `agenerate`, `chat`,
and `achat`. The intent is to log API errors cleanly (without a full traceback) since
server-side errors don't benefit from a local stack trace.

The motivation is sound — `friendly_error()` includes `traceback.format_exc()` which
produces multi-line stack traces for every OpenAI API error. For authentication
failures, bad requests, and similar server-side errors these tracebacks are noisy and
provide no diagnostic value.

## Issues

### 1. `openai.APIError` is too broad — catches connection and timeout errors too

`openai.APIError` is the base class for the entire OpenAI exception hierarchy:

```
openai.APIError
├── openai.APIConnectionError    ← network/local issues
│   └── openai.APITimeoutError   ← timeout issues
└── openai.APIStatusError        ← HTTP status errors from the API server
    ├── openai.BadRequestError (400)
    ├── openai.AuthenticationError (401)
    ├── openai.PermissionDeniedError (403)
    ├── openai.NotFoundError (404)
    ├── openai.UnprocessableEntityError (422)
    ├── openai.RateLimitError (429)
    └── openai.InternalServerError (>=500)
```

The PR description correctly identifies that server-side errors (AuthenticationError,
BadRequestError, etc.) don't benefit from tracebacks. However, `APIConnectionError`
and `APITimeoutError` **are** related to the local environment (network configuration,
DNS, proxy issues), where a traceback **could** help diagnose the problem.

**Recommendation:** Use `openai.APIStatusError` instead of `openai.APIError`. This
captures exactly the server-side HTTP errors (400, 401, 403, 404, 422, 429, 500+)
while letting connection/timeout errors fall through to the `except Exception` handler
where `friendly_error()` provides the full traceback.

### 2. `raise e` vs bare `raise`

The PR uses `raise e` which resets the exception's `__traceback__` attribute. A bare
`raise` preserves the original traceback chain. This is consistent with the existing
code (the `except Exception` blocks also use `raise e`), but `raise` is generally
preferred — especially since callers higher up the stack may want the full traceback
context even if the log message omits it.

This is a minor style point and not a blocker — it could be a separate cleanup across
the file.

### 3. Log level consideration

Using `logging.error()` is appropriate for most API errors, but for `RateLimitError`
(a subclass of `APIStatusError`) `logging.warning()` might be more fitting since it's
a transient condition. That said, by the time the error reaches this outer handler the
retry logic in `utils.py` has already been exhausted, so `error` level is reasonable.

Not a blocker.

## Suggested Change

Replace `openai.APIError` with `openai.APIStatusError` in all four handlers:

```python
except openai.APIStatusError as e:
    logging.error(f"API error in OpenAIGPT.generate: {e}")
    raise e
```

## Verdict

The change addresses a real usability issue — excessively noisy tracebacks for
server-side API errors. With the suggested narrowing from `APIError` to
`APIStatusError`, this would be a clean, well-targeted improvement. The code is
consistent with existing patterns and correctly placed before the generic exception
handlers.

**Recommendation: Request changes** — use `openai.APIStatusError` instead of
`openai.APIError` to avoid suppressing tracebacks for connection/timeout errors.
</file>

<file path="pytest.ini">
[pytest]
markers =
    unit: marks tests as unit tests (deselect with '-m "not unit"')
    integration: marks tests as integration tests (deselect with '-m "not integration"')

    
# MySQL configuration settings
mysql_host = localhost
mysql_port = 3306
mysql_user = root
</file>

<file path="docs/notes/mcp-tools.md">
# Langroid MCP Integration

Langroid provides seamless integration with Model Context Protocol (MCP) servers via 
two methods, both of which involve creating Langroid `ToolMessage` subclasses
corresponding to the MCP tools: 

1. Programmatic creation of Langroid tools using `get_tool_async`, 
   `get_tools_async` from the tool definitions defined on an MCP server.
2. Declarative creation of Langroid tools using the **`@mcp_tool` decorator**, which allows
   customizing the tool-handling behavior beyond what is provided by the MCP server.

This integration allows _any_ LLM (that is good enough to do function-calling via prompts) to use any MCP server.
See the following to understand the integration better:

- example python scripts under [`examples/mcp`](https://github.com/langroid/langroid/tree/main/examples/mcp)
- [`tests/main/test_mcp_tools.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_mcp_tools.py)

---

## 1. Connecting to an MCP server via transport specification

Before creating Langroid tools, we first need to define and connect to an MCP server
via a [FastMCP](https://gofastmcp.com/getting-started/welcome) client. 
There are several ways to connect to a server, depending on how it is defined. 
Each of these uses a different type of [transport](https://gofastmcp.com/clients/transports).

The typical pattern to use with Langroid is as follows:

- define an MCP server transport
- create a `ToolMessage` subclass using the `@mcp_tool` decorator or 
  `get_tool_async()` function, with the transport as the first argument


Langroid's MCP integration will work with any of [transports](https://gofastmcp.com/clients/transportsl) 
supported by FastMCP.
Below we go over some common ways to define transports and extract tools from the servers.

1. **Local Python script**
2. **In-memory FastMCP server** - useful for testing and for simple in-memory servers
   that don't need to be run as a separate process.
3. **NPX stdio transport**
4. **UVX stdio transport**
5. **Generic stdio transport** – launch any CLI‐based MCP server via stdin/stdout
6. **Network SSE transport** – connect to HTTP/S MCP servers via `SSETransport`


All examples below use the async helpers to create Langroid tools (`ToolMessage` subclasses):

```python
from langroid.agent.tools.mcp import (
    get_tools_async,
    get_tool_async,
)
```

---

#### Path to a Python Script

Point at your MCP‐server entrypoint, e.g., to the `weather.py` script in the 
langroid repo (based on the [Anthropic quick-start guide](https://modelcontextprotocol.io/quickstart/server)):

```python
async def example_script_path() -> None:
    server = "tests/main/mcp/weather-server-python/weather.py"
    tools = await get_tools_async(server) # all tools available
    AlertTool = await get_tool_async(server, "get_alerts") # specific tool

    # instantiate the tool with a specific input
    msg = AlertTool(state="CA")
    
    # Call the tool via handle_async()
    alerts = await msg.handle_async()
    print(alerts)
```

---

#### In-Memory FastMCP Server

Define your server with `FastMCP(...)` and pass the instance:

```python
from fastmcp.server import FastMCP
from pydantic import BaseModel, Field

class CounterInput(BaseModel):
    start: int = Field(...)

def make_server() -> FastMCP:
    server = FastMCP("CounterServer")

    @server.tool()
    def increment(data: CounterInput) -> int:
        """Increment start by 1."""
        return data.start + 1

    return server

async def example_in_memory() -> None:
    server = make_server()
    tools = await get_tools_async(server)
    IncTool = await get_tool_async(server, "increment")

    result = await IncTool(start=41).handle_async()
    print(result)  # 42
```

See the [`mcp-file-system.py`](https://github.com/langroid/langroid/blob/main/examples/mcp/mcp-file-system.py)
script for a working example of this.

---

#### NPX stdio Transport

Use any npm-installed MCP server via `npx`, e.g., the 
[Exa web-search MCP server](https://docs.exa.ai/examples/exa-mcp):

```python
from fastmcp.client.transports import NpxStdioTransport

transport = NpxStdioTransport(
    package="exa-mcp-server",
    env_vars={"EXA_API_KEY": "…"},
)

async def example_npx() -> None:
    tools = await get_tools_async(transport)
    SearchTool = await get_tool_async(transport, "web_search_exa")

    results = await SearchTool(
        query="How does Langroid integrate with MCP?"
    ).handle_async()
    print(results)
```

For a fully working example, see the script [`exa-web-search.py`](https://github.com/langroid/langroid/blob/main/examples/mcp/exa-web-search.py).

---

#### UVX stdio Transport

Connect to a UVX-based MCP server, e.g., the [Git MCP Server](https://github.com/modelcontextprotocol/servers/tree/main/src/git)

```python
from fastmcp.client.transports import UvxStdioTransport

transport = UvxStdioTransport(tool_name="mcp-server-git")

async def example_uvx() -> None:
    tools = await get_tools_async(transport)
    GitStatus = await get_tool_async(transport, "git_status")

    status = await GitStatus(path=".").handle_async()
    print(status)
```

--- 

#### Generic stdio Transport

Use `StdioTransport` to run any MCP server as a subprocess over stdio:

```python
from fastmcp.client.transports import StdioTransport
from langroid.agent.tools.mcp import get_tools_async, get_tool_async


async def example_stdio() -> None:
    """Example: any CLI‐based MCP server via StdioTransport."""
    transport: StdioTransport = StdioTransport(
        command="uv",
        args=["run", "--with", "biomcp-python", "biomcp", "run"],
    )
    tools: list[type] = await get_tools_async(transport)
    BioTool = await get_tool_async(transport, "tool_name")
    result: str = await BioTool(param="value").handle_async()
    print(result)
```

See the full example in [`examples/mcp/biomcp.py`](https://github.com/langroid/langroid/blob/main/examples/mcp/biomcp.py).

---

#### Network SSE Transport

Use `SSETransport` to connect to a FastMCP server over HTTP/S:

```python
from fastmcp.client.transports import SSETransport
from langroid.agent.tools.mcp import (
    get_tools_async,
    get_tool_async,
)


async def example_sse() -> None:
    """Example: connect to an HTTP/S MCP server via SSETransport."""
    url: str = "https://localhost:8000/sse"
    transport: SSETransport = SSETransport(
        url=url, headers={"Authorization": "Bearer TOKEN"}
    )
    tools: list[type] = await get_tools_async(transport)
    ExampleTool = await get_tool_async(transport, "tool_name")
    result: str = await ExampleTool(param="value").handle_async()
    print(result)
```    

---

With these patterns you can list tools, generate Pydantic-backed `ToolMessage` classes,
and invoke them via `.handle_async()`, all with zero boilerplate client setup. 
As the `FastMCP` library adds other types of transport (e.g., `StreamableHTTPTransport`),
the pattern of usage with Langroid will remain the same.


---

## Best Practice: Use a server factory for stdio transports

Starting with fastmcp 2.13 and mcp 1.21, stdio transports (e.g., `StdioTransport`,
`NpxStdioTransport`, `UvxStdioTransport`) are effectively single‑use. Reusing the
same transport instance across multiple connections can lead to errors such as
`anyio.ClosedResourceError` during session initialization.

To make your code robust and future‑proof, pass a zero‑argument server factory to
Langroid’s MCP helpers. A “server factory” is simply a `lambda` or function that
returns a fresh server spec or transport each time.

Benefits:

- Fresh, reliable connections on every call (no reuse of closed transports).
- Works across fastmcp/mcp versions without subtle lifecycle issues.
- Enables concurrent calls safely (each call uses its own subprocess/session).
- Keeps your decorator ergonomics and `handle_async` overrides unchanged.

You can use a factory with both the decorator and the async helpers:

```python
from fastmcp.client.transports import StdioTransport
from langroid.agent.tools.mcp import mcp_tool, get_tool_async

# 1) Decorator style
@mcp_tool(lambda: StdioTransport(command="claude", args=["mcp", "serve"], env={}),
          "Grep")
class GrepTool(lr.ToolMessage):
    async def handle_async(self) -> str:
        # pre/post-process around the raw MCP call
        result = await self.call_tool_async()
        return f"<GrepResult>\n{result}\n</GrepResult>"

# 2) Programmatic style
BaseGrep = await get_tool_async(
    lambda: StdioTransport(command="claude", args=["mcp", "serve"], env={}),
    "Grep",
)
```

Notes:

- Passing a concrete transport instance still works: Langroid will try to clone
  it internally; however, a factory is the most reliable across environments.
- For network transports (e.g., `SSETransport`), a factory is optional; you can
  continue passing the transport instance directly.

---

## Output-schema validation: return structured content when required

Newer `mcp` clients validate tool outputs against the tool’s output schema. If a
tool declares a structured output, returning plain text may raise a runtime
error. Some servers (for example, Claude Code’s Grep) expose an argument like
`output_mode` that controls the shape of the response.

Recommendations:

- Prefer structured modes when a tool declares an output schema.
- If available, set options like `output_mode="structured"` (or a documented
  structured variant such as `"files_with_matches"`) in your tool’s
  `handle_async` before calling `await self.call_tool_async()`.

Example tweak in a decorator-based tool:

```python
@mcp_tool(lambda: StdioTransport(command="claude", args=["mcp", "serve"]),
          "Grep")
class GrepTool(lr.ToolMessage):
    async def handle_async(self) -> str:
        # Ensure a structured response if the server supports it
        if hasattr(self, "output_mode"):
            self.output_mode = "structured"
        return await self.call_tool_async()
```

If the server does not provide such a switch, follow its documentation for
returning data that matches its declared output schema.

---

## 2. Create Langroid Tools declaratively using the `@mcp_tool` decorator

The above examples showed how you can create Langroid tools programmatically using
the helper functions `get_tool_async()` and `get_tools_async()`,
with the first argument being the transport to the MCP server. The `@mcp_tool` decorator
works in the same way: 

- **Arguments to the decorator**
    1. `server_spec`: path/URL/`FastMCP`/`ClientTransport`, as mentioned above.
    2. `tool_name`: name of a specific MCP tool

- **Behavior**
    - Generates a `ToolMessage` subclass with all input fields typed.
    - Provides a `call_tool_async()` under the hood -- this is the "raw" MCP tool call,
      returning a string.
    - If you define your own `handle_async()`, it overrides the default. Typically,
you would override it to customize either the input or the output of the tool call, or both.
    - If you don't define your own `handle_async()`, it defaults to just returning the
      value of the `call_tool_async()` method.

Here is a simple example of using the `@mcp_tool` decorator to create a Langroid tool:

```python
from fastmcp.server import FastMCP
from langroid.agent.tools.mcp import mcp_tool
import langroid as lr

# Define your MCP server (pydantic v2 for schema)
server = FastMCP("MyServer")

@mcp_tool(server, "greet")
class GreetTool(lr.ToolMessage):
    """Say hello to someone."""

    async def handle_async(self) -> str:
        # Customize post-processing
        raw = await self.call_tool_async()
        return f"💬 {raw}"
```

Using the decorator method allows you to customize the `handle_async` method of the
tool, or add additional fields to the `ToolMessage`. 
You may want to customize the input to the tool, or the tool result before it is sent back to 
the LLM. If you don't override it, the default behavior is to simply return the value of 
the "raw" MCP tool call `await self.call_tool_async()`. 

```python
@mcp_tool(server, "calculate")
class CalcTool(ToolMessage):
    """Perform complex calculation."""

    async def handle_async(self) -> str:
        result = await self.call_tool_async()
        # Add context or emojis, etc.
        return f"🧮 Result is *{result}*"
```

---

## 3. Enabling Tools in Your Agent

Once you’ve created a Langroid `ToolMessage` subclass from an MCP server, 
you can enable it on a `ChatAgent`, just like you normally would. Below is an example of using 
the [Exa MCP server](https://docs.exa.ai/examples/exa-mcp) to create a 
Langroid web search tool, enable a `ChatAgent` to use it, and then set up a `Task` to 
run the agent loop.

First we must define the appropriate `ClientTransport` for the MCP server:
```python
# define the transport
transport = NpxStdioTransport(
    package="exa-mcp-server",
    env_vars=dict(EXA_API_KEY=os.getenv("EXA_API_KEY")),
)
```

Then we use the `@mcp_tool` decorator to create a `ToolMessage` 
subclass representing the web search tool. Note that one reason to use the decorator
to define our tool is so we can specify a custom `handle_async` method that
controls what is sent to the LLM after the actual raw MCP tool-call
(the `call_tool_async` method) is made.

```python
# the second arg specifically refers to the `web_search_exa` tool available
# on the server defined by the `transport` variable.
@mcp_tool(transport, "web_search_exa")
class ExaSearchTool(lr.ToolMessage):
    async def handle_async(self):
        result: str = await self.call_tool_async()
        return f"""
        Below are the results of the web search:
        
        <WebSearchResult>
        {result}
        </WebSearchResult>
        
        Use these results to answer the user's original question.
        """

```

If we did not want to override the `handle_async` method, we could simply have
created the `ExaSearchTool` class programmatically via the `get_tool_async` 
function as shown above, i.e.:

```python
from langroid.agent.tools.mcp import get_tool_async

ExaSearchTool = await get_tool_async(transport, "web_search_exa")
```

We can now define our main function where we create our `ChatAgent`,
attach the `ExaSearchTool` to it, define the `Task`, and run the task loop.

```python
async def main():
    agent = lr.ChatAgent(
        lr.ChatAgentConfig(
            # forward to user when LLM doesn't use a tool
            handle_llm_no_tool=NonToolAction.FORWARD_USER,
            llm=lm.OpenAIGPTConfig(
                max_output_tokens=1000,
                # this defaults to True, but we set it to False so we can see output
                async_stream_quiet=False,
            ),
        )
    )

    # enable the agent to use the web-search tool
    agent.enable_message(ExaSearchTool)
    # make task with interactive=False =>
    # waits for user only when LLM doesn't use a tool
    task = lr.Task(agent, interactive=False)
    await task.run_async()
```

See [`exa-web-search.py`](https://github.com/langroid/langroid/blob/main/examples/mcp/exa-web-search.py) for a full working example of this.
</file>

<file path="langroid/agent/callbacks/chainlit.py">
"""
Callbacks for Chainlit integration.
"""
⋮----
# Attempt to reconfigure the root logger to your desired settings
log_level = logging.INFO if settings.debug else logging.WARNING
⋮----
USER_TIMEOUT = 60_000
SYSTEM = "System 🖥️"
LLM = "LLM 🧠"
AGENT = "Agent <>"
YOU = "You 😃"
ERROR = "Error 🚫"
⋮----
@no_type_check
async def ask_helper(func, **kwargs)
⋮----
res = await func(**kwargs).send()
⋮----
@no_type_check
async def setup_llm() -> None
⋮----
"""From the session `llm_settings`, create new LLMConfig and LLM objects,
    save them in session state."""
llm_settings = cl.user_session.get("llm_settings", {})
model = llm_settings.get("chat_model")
context_length = llm_settings.get("context_length", 16_000)
temperature = llm_settings.get("temperature", 0.2)
timeout = llm_settings.get("timeout", 90)
⋮----
llm_config = lm.OpenAIGPTConfig(
⋮----
# or, other possibilities for example:
# "litellm/ollama_chat/mistral"
# "litellm/ollama_chat/mistral:7b-instruct-v0.2-q8_0"
# "litellm/ollama/llama2"
# "local/localhost:8000/v1"
# "local/localhost:8000"
chat_context_length=context_length,  # adjust based on model
⋮----
llm = lm.OpenAIGPT(llm_config)
⋮----
@no_type_check
async def update_llm(new_settings: Dict[str, Any]) -> None
⋮----
"""Update LLMConfig and LLM from settings, and save in session state."""
⋮----
config = config or lm.OpenAIGPTConfig()
⋮----
).send()  # type: ignore
⋮----
@no_type_check
async def inform_llm_settings() -> None
⋮----
llm_settings: Dict[str, Any] = cl.user_session.get("llm_settings", {})
settings_dict = dict(
⋮----
"""Get dict (file_name -> file_path) from files uploaded in chat msg"""
⋮----
files = [file for file in message.elements if file.path.endswith(tuple(extensions))]
⋮----
def wrap_text_preserving_structure(text: str, width: int = 90) -> str
⋮----
"""Wrap text preserving paragraph breaks. Typically used to
    format an agent_response output, which may have long lines
    with no newlines or paragraph breaks."""
⋮----
paragraphs = text.split("\n\n")  # Split the text into paragraphs
wrapped_text = []
⋮----
if para.strip():  # If the paragraph is not just whitespace
# Wrap this paragraph and add it to the result
wrapped_paragraph = textwrap.fill(para, width=width)
⋮----
# Preserve paragraph breaks
⋮----
class ChainlitCallbackConfig(BaseSettings)
⋮----
user_has_agent_name: bool = True  # show agent name in front of "YOU" ?
show_subtask_response: bool = True  # show sub-task response as a step?
⋮----
class ChainlitAgentCallbacks
⋮----
"""Inject Chainlit callbacks into a Langroid Agent"""
⋮----
last_step: Optional[cl.Step] = None  # used to display sub-steps under this
curr_step: Optional[cl.Step] = None  # used to update an initiated step
stream: Optional[cl.Step] = None  # pushed into openai_gpt.py to stream tokens
parent_agent: Optional["Agent"] = None  # used to get parent id, for step nesting
⋮----
"""Add callbacks to the agent, and save the initial message,
        so we can alter the display of the first user message.
        """
⋮----
# We don't want to suppress LLM output in async + streaming,
# since we often use chainlit async callbacks to display LLM output
⋮----
def _get_parent_id(self) -> str | None
⋮----
"""Get step id under which we need to nest the current step:
        This should be the parent Agent's last_step.
        """
⋮----
last_step = self.parent_agent.callbacks.get_last_step()
⋮----
return last_step.id  # type: ignore
⋮----
def set_parent_agent(self, parent: "Agent") -> None
⋮----
def get_last_step(self) -> Optional[cl.Step]
⋮----
def start_llm_stream(self) -> Callable[[str, StreamEventType], None]
⋮----
"""Returns a streaming fn that can be passed to the LLM class"""
⋮----
def stream_token(t: str, e: StreamEventType) -> None
⋮----
async def start_llm_stream_async(self) -> Callable[[str, StreamEventType], None]
⋮----
async def stream_token(t: str, e: StreamEventType) -> None
⋮----
def cancel_llm_stream(self) -> None
⋮----
"""Called when cached response found."""
⋮----
run_sync(self.stream.remove())  # type: ignore
⋮----
"""Update the stream, and display entire response in the right language.

        Args:
            content: The main LLM response content
            tools_content: Tool-related content if any
            is_tool: Whether this is a tool response
            reasoning: Chain-of-thought reasoning from the LLM (if available)
        """
⋮----
run_sync(self.stream.update())  # type: ignore
stream_id = self.stream.id if tools_content or content else None
step = cl.Message(
⋮----
run_sync(step.update())  # type: ignore
⋮----
# Display reasoning content if available (e.g., from thinking models)
⋮----
reasoning_step = cl.Message(
run_sync(reasoning_step.send())  # type: ignore
⋮----
"""Show non-streaming LLM response.

        Args:
            content: The main LLM response content
            tools_content: Tool-related content if any
            is_tool: Whether this is a tool response
            cached: Whether this response was from cache
            language: Language for syntax highlighting
            reasoning: Chain-of-thought reasoning from the LLM (if available)
        """
⋮----
run_sync(step.send())  # type: ignore
⋮----
def show_error_message(self, error: str) -> None
⋮----
"""Show error message."""
⋮----
"""Show message from agent (typically tool handler)."""
⋮----
content = wrap_text_preserving_structure(content, width=90)
⋮----
def show_start_response(self, entity: str) -> None
⋮----
"""When there's a potentially long-running process, start a step,
        so that the UI displays a spinner while the process is running."""
⋮----
run_sync(self.curr_step.remove())  # type: ignore
⋮----
"""Construct name of entity to display as Author of a step"""
tool_indicator = " =>  🛠️" if tool else ""
cached = "(cached)" if cached else ""
⋮----
model = self.agent.config.llm.chat_model
⋮----
def _get_user_response_buttons(self, prompt: str) -> str
⋮----
"""Not used. Save for future reference"""
res = run_sync(
⋮----
return ""  # process the "feedback" case here
⋮----
def get_user_response(self, prompt: str) -> str
⋮----
"""Ask for user response, wait for it, and return it"""
⋮----
async def get_user_response_async(self, prompt: str) -> str
⋮----
"""
        Ask user for input.

        Args:
            prompt (str): Prompt to display to user
            timeout (int): Timeout in seconds
            suppress_values (List[str]): List of values to suppress from display
                (e.g. "c" for continue)

        Returns:
            str: User response
        """
ask_msg = cl.AskUserMessage(
res = await ask_msg.send()
⋮----
# if there was no actual prompt, clear the row from the UI for clarity.
⋮----
# Finally, reproduce the user response at right nesting level
⋮----
class ChainlitTaskCallbacks(ChainlitAgentCallbacks)
⋮----
"""
    Recursively inject ChainlitAgentCallbacks into a Langroid Task's agent and
    agents of sub-tasks.
    """
⋮----
"""Inject callbacks recursively, ensuring msg is passed to the
        top-level agent"""
⋮----
# recursively apply ChainlitAgentCallbacks to agents of sub-tasks
⋮----
# ChainlitTaskCallbacks(t, config=config)
⋮----
"""Show sub-task response as a step, nested at the right level."""
⋮----
# The step should nest under the calling agent's last step
</file>

<file path="langroid/agent/tools/mcp/fastmcp_client.py">
# Optional transports; import guarded for environments without uvx/npx
⋮----
except Exception:  # pragma: no cover - optional
NpxStdioTransport = tuple()  # type: ignore
UvxStdioTransport = tuple()  # type: ignore
⋮----
load_dotenv()  # load environment variables from .env
⋮----
# Concrete server/transport spec accepted by fastmcp.Client
FastMCPServerConcrete: TypeAlias = str | FastMCP[Any] | ClientTransport | AnyUrl
# Public spec we accept: concrete spec or a zero-arg factory returning a spec
FastMCPServerSpec: TypeAlias = (
⋮----
class FastMCPClient
⋮----
"""A client for interacting with a FastMCP server.

    Provides async context manager functionality to safely manage resources.
    """
⋮----
logger = logging.getLogger(__name__)
_cm: Optional[Client[ClientTransport]] = None
client: Optional[Client[ClientTransport]] = None
read_timeout_seconds: datetime.timedelta | None = None
⋮----
sampling_handler: SamplingHandler | None = None,  # type: ignore
roots: RootsList | RootsHandler | None = None,  # type: ignore
⋮----
"""Initialize the FastMCPClient.

        Args:
            server: FastMCP server or path to such a server
        """
⋮----
# Default a slightly larger read timeout for stdio transports on first
# connects. Allows flaky subprocess servers a bit more time to boot.
⋮----
default_secs = int(os.getenv("LANGROID_MCP_READ_TIMEOUT", "15"))
⋮----
async def __aenter__(self) -> "FastMCPClient"
⋮----
"""Enter the async context manager and connect inner client.

        Always obtain a fresh transport/spec via a factory, then connect.
        If the session initialization fails due to a transient stdio issue
        (e.g., ClosedResourceError / connection closed), retry once with a
        new transport instance for better resilience across fastmcp/mcp
        versions and server launch timing.
        """
# Always normalize to a server factory and create a fresh spec
server_factory = self._as_server_factory(self.server)
⋮----
# Configurable retry/backoff for transient stdio startup races.
max_retries = int(os.getenv("LANGROID_MCP_CONNECT_RETRIES", "6"))
⋮----
backoff_base = float(os.getenv("LANGROID_MCP_CONNECT_BACKOFF_BASE", "0.35"))
⋮----
backoff_base = 0.35
⋮----
last_err: Optional[BaseException] = None
⋮----
server_spec: FastMCPServerConcrete = server_factory()
# create inner client context manager
self._cm = Client(  # type: ignore[assignment]
⋮----
# actually enter it (opens the session)
self.client = await self._cm.__aenter__()  # type: ignore
⋮----
# Common transient failures when a subprocess exits early or
# closes during initialize. Retry once with a fresh transport.
⋮----
last_err = e
# ensure we reset _cm/client before retry
⋮----
await self._cm.__aexit__(None, None, None)  # type: ignore
⋮----
# brief backoff to allow server process to finish booting
⋮----
# fastmcp wraps ClosedResourceError into RuntimeError
# "Server session was closed unexpectedly". Treat as transient.
emsg = str(e)
⋮----
await self._cm.__aexit__(None, None, None)  # type: ignore
⋮----
# otherwise re-raise
⋮----
# If we get here both attempts failed
⋮----
async def connect(self) -> None
⋮----
"""Open the underlying session."""
⋮----
async def close(self) -> None
⋮----
"""Close the underlying session."""
⋮----
"""Exit the async context manager and close inner client."""
# exit and close the inner fastmcp.Client
⋮----
await self._cm.__aexit__(exc_type, exc_val, exc_tb)  # type: ignore
⋮----
def __del__(self) -> None
⋮----
"""Warn about unclosed persistent connections."""
⋮----
"""Convert a JSON Schema snippet into a (type, Field) tuple.

        Args:
            name: Name of the field.
            schema: JSON Schema for this field.
            prefix: Prefix to use for nested model names.
            is_required: Whether this field is required (from JSON Schema "required").

        Returns:
            A tuple of (python_type, Field(...)) for create_model.
        """
t = schema.get("type")
# Use schema default if present, otherwise:
# ... for required fields, None for optional fields
⋮----
default = schema["default"]
⋮----
default = ... if is_required else None
desc = schema.get("description")
# Object → nested BaseModel
⋮----
sub_name = f"{prefix}_{name.capitalize()}"
sub_fields: Dict[str, Tuple[type, Any]] = {}
# Get required fields for this nested object
nested_required = set(schema.get("required", []))
⋮----
submodel = create_model(  # type: ignore
# Wrap in Optional if not required
model_type = submodel if is_required else Optional[submodel]
return model_type, Field(default=default, description=desc)  # type: ignore
# Array → List of items
⋮----
array_type = List[item_type]  # type: ignore
⋮----
array_type = Optional[array_type]  # type: ignore
return array_type, Field(default=default, description=desc)  # type: ignore
# Primitive types
⋮----
str_type = str if is_required else Optional[str]
⋮----
int_type = int if is_required else Optional[int]
⋮----
float_type = float if is_required else Optional[float]
⋮----
bool_type = bool if is_required else Optional[bool]
⋮----
# Fallback or unions
⋮----
# Default fallback
⋮----
async def get_tool_async(self, tool_name: str) -> Type[ToolMessage]
⋮----
"""
        Create a Langroid ToolMessage subclass from the MCP Tool
        with the given `tool_name`.
        """
⋮----
target = await self.get_mcp_tool_async(tool_name)
⋮----
props = target.inputSchema.get("properties", {})
# Get the list of required fields from JSON Schema
required_fields = set(target.inputSchema.get("required", []))
fields: Dict[str, Tuple[type, Any]] = {}
⋮----
# Convert target.name to CamelCase and add Tool suffix
parts = target.name.replace("-", "_").split("_")
camel_case = "".join(part.capitalize() for part in parts)
model_name = f"{camel_case}Tool"
⋮----
# IMPORTANT: Avoid clashes with reserved field names in Langroid ToolMessage!
# First figure out which field names are reserved
reserved = set(_BaseToolMessage.__annotations__.keys())
⋮----
renamed: Dict[str, str] = {}
new_fields: Dict[str, Tuple[type, Any]] = {}
⋮----
new_name = fname + "__"
⋮----
# now replace fields with our renamed‐aware mapping
fields = new_fields
⋮----
# create Langroid ToolMessage subclass, with expected fields.
tool_model = cast(
⋮----
create_model(  # type: ignore[call-overload]
⋮----
# Store ALL client configuration needed to recreate a client
client_config = {
⋮----
# Always store a SERVER FACTORY to ensure a fresh transport per call
⋮----
tool_model._client_config = client_config  # type: ignore [attr-defined]
tool_model._renamed_fields = renamed  # type: ignore[attr-defined]
⋮----
# 2) define an arg-free call_tool_async()
async def call_tool_async(itself: ToolMessage) -> Any
⋮----
# pack up the payload
# Get exclude fields from model config with proper type checking
exclude_fields = set()
model_config = getattr(itself, "model_config", {})
⋮----
exclude_list = model_config["json_schema_extra"]["exclude"]
⋮----
exclude_fields = set(exclude_list)
⋮----
# Add standard excluded fields
⋮----
# Exclude None values - MCP servers don't expect None for optional params
payload = itself.model_dump(exclude=exclude_fields, exclude_none=True)
⋮----
# restore any renamed fields
for orig, new in itself.__class__._renamed_fields.items():  # type: ignore
⋮----
client_cfg = getattr(itself.__class__, "_client_config", None)  # type: ignore
⋮----
# Fallback or error - ideally _client_config should always exist
⋮----
# Connect the client if not yet connected and keep the connection open
⋮----
# open a fresh client, call the tool, then close
async with FastMCPClient(**client_cfg) as client:  # type: ignore
⋮----
tool_model.call_tool_async = call_tool_async  # type: ignore
⋮----
# 3) define handle_async() method with optional agent parameter
⋮----
"""
                Auto-generated handler for MCP tool. Returns ChatDocument with files
                if files are present and agent is provided, otherwise returns text.

                To override: define your own handle_async method with matching signature
                if you need file handling, or simpler signature if you only need text.
                """
response = await self.call_tool_async()  # type: ignore[attr-defined]
⋮----
# If we have files and an agent is provided, return a ChatDocument
⋮----
# Otherwise, just return the text content
⋮----
# add the handle_async() method to the tool model
tool_model.handle_async = handle_async  # type: ignore
⋮----
async def get_tools_async(self) -> List[Type[ToolMessage]]
⋮----
"""
        Get all available tools as Langroid ToolMessage classes,
        handling nested schemas, with `handle_async` methods
        """
⋮----
resp = await self.client.list_tools()
⋮----
async def get_mcp_tool_async(self, name: str) -> Optional[Tool]
⋮----
"""Find the "original" MCP Tool (i.e. of type mcp.types.Tool) on the server
         matching `name`, or None if missing. This contains the metadata for the tool:
         name, description, inputSchema, etc.

        Args:
            name: Name of the tool to look up.

        Returns:
            The raw Tool object from the server, or None.
        """
⋮----
resp: List[Tool] = await self.client.list_tools()
⋮----
"""Normalize a server spec to a zero-arg factory.

        - If already callable, return as-is.
        - If a ClientTransport instance, return a factory that yields the SAME
          instance. This preserves state for keep-alive stdio transports (e.g.,
          npx/uvx servers) so multi-call workflows can share process state.
          Recreating a fresh transport each call would lose stateful servers
          like `@modelcontextprotocol/server-memory` and break tests.
        - Otherwise return a factory that yields the given spec.
        """
if callable(server):  # type: ignore[arg-type]
return server  # type: ignore[return-value]
⋮----
# Reuse policy split:
# - Npx/Uvx stdio transports: reuse the SAME instance to preserve
#   keep-alive subprocess state (stateful MCP servers).
# - Plain StdioTransport: CLONE a fresh transport to avoid reusing
#   process/pipes across decorator-time schema fetch and runtime calls
#   (some stdio servers close after first session, like CLI wrappers).
⋮----
) or (  # type: ignore[arg-type]
⋮----
):  # type: ignore[arg-type]
⋮----
# If optional classes are tuples (import failed), fall through
⋮----
# Best‑effort clone with back‑compat: only pass kwargs supported
# by this installed fastmcp version's StdioTransport.__init__.
sig = inspect.signature(StdioTransport.__init__)
params = sig.parameters
⋮----
def _pick(name: str, default: Any = None) -> Any
⋮----
# Required in all known versions
cmd = getattr(server, "command", None)
args = list(getattr(server, "args", []) or [])
⋮----
# Optional, filter by signature presence
env = _pick("env")
cwd = _pick("cwd")
keep_alive = _pick("keep_alive")
log_file = _pick("log_file")
⋮----
def _factory() -> StdioTransport
⋮----
kwargs = {"command": cmd, "args": args}
⋮----
return StdioTransport(**kwargs)  # type: ignore[arg-type]
⋮----
# Default for other ClientTransport types: reuse
⋮----
return lambda: server  # type: ignore[return-value]
⋮----
# Log more detailed error information
error_content = None
⋮----
error_content = [
⋮----
error_content = [f"Could not extract error content: {str(e)}"]
⋮----
# 1) Collect any plain TextContent first. This preserves legacy behavior
# for simple servers that return only text. If we have text, prefer it
# over structuredContent to avoid surprising downstream code.
results_text: list[str] = [
results_file: list[FileAttachment] = []
⋮----
# Also collect resources alongside text; callers may want them.
⋮----
# 2) No plain text — use structuredContent if available. To maintain
# backwards compatibility, unwrap simple shapes like {"result": 5}
# into "5"; otherwise serialize the full object as JSON for fidelity.
⋮----
sc = result.structuredContent
⋮----
# Unwrap primitives directly
⋮----
# Unwrap single-key primitive dicts commonly used by tools
⋮----
# Otherwise, serialize to JSON for rich/structured tools
⋮----
# 3) Nothing usable — return empty text and any files
⋮----
"""Call an MCP tool with the given arguments.

        Args:
            tool_name: Name of the tool to call.
            arguments: Arguments to pass to the tool.

        Returns:
            The result of the tool call.
        """
⋮----
# Prefer validated call; if server fails to provide structured content
# despite declaring a schema, fall back to a raw request to bypass
# client-side validation and still surface the data.
⋮----
result: CallToolResult = await self.client.session.call_tool(
⋮----
msg = str(e)
⋮----
result = await self.client.session.send_request(  # type: ignore[assignment]
results = self._convert_tool_result(tool_name, result)
⋮----
# ==============================================================================
# Convenience functions (wrappers around FastMCPClient methods)
# These are useful for one-off calls without needing to manage the
# FastMCPClient context explicitly.
⋮----
"""Get a single Langroid ToolMessage subclass for a specific MCP tool name (async).

    This is a convenience wrapper that creates a temporary FastMCPClient.

    Args:
        server: Specification of the FastMCP server to connect to.
        tool_name: The name of the tool to retrieve.
        **client_kwargs: Additional keyword arguments to pass to the
            FastMCPClient constructor (e.g., sampling_handler, roots).

    Returns:
        A dynamically created Langroid ToolMessage subclass representing the
        requested tool.
    """
⋮----
"""Get a single Langroid ToolMessage subclass
    for a specific MCP tool name (synchronous).

    This is a convenience wrapper that creates a temporary FastMCPClient and runs the
    async `get_tool_async` function using `asyncio.run()`.

    Args:
        server: Specification of the FastMCP server to connect to.
        tool_name: The name of the tool to retrieve.
        **client_kwargs: Additional keyword arguments to pass to the
            FastMCPClient constructor (e.g., sampling_handler, roots).

    Returns:
        A dynamically created Langroid ToolMessage subclass representing the
        requested tool.
    """
⋮----
"""Get all available tools as Langroid ToolMessage subclasses (async).

    This is a convenience wrapper that creates a temporary FastMCPClient.

    Args:
        server: Specification of the FastMCP server to connect to.
        **client_kwargs: Additional keyword arguments to pass to the
            FastMCPClient constructor (e.g., sampling_handler, roots).

    Returns:
        A list of dynamically created Langroid ToolMessage subclasses
        representing all available tools on the server.
    """
⋮----
"""Get all available tools as Langroid ToolMessage subclasses (synchronous).

    This is a convenience wrapper that creates a temporary FastMCPClient and runs the
    async `get_tools_async` function using `asyncio.run()`.

    Args:
        server: Specification of the FastMCP server to connect to.
        **client_kwargs: Additional keyword arguments to pass to the
            FastMCPClient constructor (e.g., sampling_handler, roots).

    Returns:
        A list of dynamically created Langroid ToolMessage subclasses
        representing all available tools on the server.
    """
⋮----
"""Get the raw MCP Tool object for a specific tool name (async).

    This is a convenience wrapper that creates a temporary FastMCPClient to
    retrieve the tool definition from the server.

    Args:
        server: Specification of the FastMCP server to connect to.
        name: The name of the tool to look up.
        **client_kwargs: Additional keyword arguments to pass to the
            FastMCPClient constructor.

    Returns:
        The raw `mcp.types.Tool` object from the server, or `None` if the tool
        is not found.
    """
⋮----
"""Get all available raw MCP Tool objects from the server (async).

    This is a convenience wrapper that creates a temporary FastMCPClient to
    retrieve the list of tool definitions from the server.

    Args:
        server: Specification of the FastMCP server to connect to.
        **client_kwargs: Additional keyword arguments to pass to the
            FastMCPClient constructor.

    Returns:
        A list of raw `mcp.types.Tool` objects available on the server.
    """
</file>

<file path="langroid/agent/task.py">
logger = logging.getLogger(__name__)
⋮----
Responder = Entity | Type["Task"]
⋮----
T = TypeVar("T")
⋮----
def noop_fn(*args: List[Any], **kwargs: Dict[str, Any]) -> None
⋮----
class EventType(str, Enum)
⋮----
"""Types of events that can occur in a task"""
⋮----
TOOL = "tool"  # Any tool generated
SPECIFIC_TOOL = "specific_tool"  # Specific tool by name
LLM_RESPONSE = "llm_response"  # LLM generates response
AGENT_RESPONSE = "agent_response"  # Agent responds
USER_RESPONSE = "user_response"  # User responds
CONTENT_MATCH = "content_match"  # Response matches pattern
NO_RESPONSE = "no_response"  # No valid response from entity
CUSTOM = "custom"  # Custom condition
⋮----
class AgentEvent(BaseModel)
⋮----
"""Single event in a task sequence"""
⋮----
event_type: EventType
tool_name: Optional[str] = None  # For SPECIFIC_TOOL
tool_class: Optional[Type[Any]] = (
⋮----
None  # For storing tool class references when using SPECIFIC_TOOL events
⋮----
content_pattern: Optional[str] = None  # For CONTENT_MATCH (regex)
responder: Optional[str] = None  # Specific responder name
# Optionally match only if the responder was specific entity/task
sender: Optional[str] = None  # Entity name or Task name that sent the message
⋮----
class DoneSequence(BaseModel)
⋮----
"""A sequence of events that triggers task completion"""
⋮----
events: List[AgentEvent]
# Optional name for debugging
name: Optional[str] = None
⋮----
class TaskConfig(BaseModel)
⋮----
"""Configuration for a Task. This is a container for any params that
    we didn't include in the task `__init__` method.
    We may eventually move all the task __init__ params to this class, analogous to how
    we have config classes for `Agent`, `ChatAgent`, `LanguageModel`, etc.

    Attributes:
        inf_loop_cycle_len (int): max exact-loop cycle length: 0 => no inf loop test
        inf_loop_dominance_factor (float): dominance factor for exact-loop detection
        inf_loop_wait_factor (int): wait this * cycle_len msgs before loop-check
        restart_as_subtask (bool): whether to restart *every* run of this task
            when run as a subtask.
        addressing_prefix (str): "@"-like prefix an agent can use to address other
            agents, or entities of the agent. E.g., if this is "@", the addressing
            string would be "@Alice", or "@user", "@llm", "@agent", etc.
            If this is an empty string, then addressing is disabled.
            Default is empty string "".
            CAUTION: this is a deprecated practice, since normal prompts
            can accidentally contain such addressing prefixes, and will break
            your runs. This could happen especially when your prompt/context
            contains code, but of course could occur in normal text as well.
            Instead, use the `RecipientTool` to have agents address other agents or
            entities. If you do choose to use `addressing_prefix`, the recommended
            setting is to use `langroid.utils.constants.AT`, which currently is "|@|".
            Note that this setting does NOT affect the use of `constants.SEND_TO` --
            this is always enabled since this is a critical way for responders to
            indicate that the message should be sent to a specific entity/agent.
            (Search for "SEND_TO" in the examples/ dir to see how this is used.)
        allow_subtask_multi_oai_tools (bool): whether to allow multiple OpenAI
            tool-calls to be sent to a sub-task.
        recognize_string_signals (bool): whether to recognize string-based signaling
            like DONE, SEND_TO, PASS, etc. Default is True, but note that we don't need
            to use string-based signaling, and it is recommended to use the
            new Orchestration tools instead (see agent/tools/orchestration.py),
            e.g. DoneTool, SendTool, etc.
            Note: this is distinct from
            ``ChatAgentConfig.recognize_recipient_in_content``, which controls
            whether LLM response text is parsed for ``TO[<recipient>]:`` and
            JSON ``{"recipient": ...}`` patterns at the Agent level.
            To fully disable all text-based routing, set both to False.
        done_if_tool (bool): whether to consider the task done if the pending message
            contains a Tool attempt by the LLM
            (including tools not handled by the agent).
            Default is False.
        done_sequences (List[DoneSequence]): List of event sequences that trigger task
            completion. Task is done if ANY sequence matches the recent event history.
            Each sequence is checked against the message parent chain.
            Tool classes can be referenced in sequences like "T[MyToolClass]".

    """
⋮----
inf_loop_cycle_len: int = 10
inf_loop_dominance_factor: float = 1.5
inf_loop_wait_factor: int = 5
restart_as_subtask: bool = False
logs_dir: str = "logs"
enable_loggers: bool = True
enable_html_logging: bool = True
addressing_prefix: str = ""
allow_subtask_multi_oai_tools: bool = True
recognize_string_signals: bool = True
done_if_tool: bool = False
done_sequences: Optional[List[Union[str, DoneSequence]]] = None
⋮----
class Task
⋮----
"""
    A `Task` wraps an `Agent` object, and sets up the `Agent`'s goals and instructions.
    A `Task` maintains two key variables:

    - `self.pending_message`, which is the message awaiting a response, and
    - `self.pending_sender`, which is the entity that sent the pending message.

    The possible responders to `self.pending_message` are the `Agent`'s own "native"
    responders (`agent_response`, `llm_response`, and `user_response`), and
    the `run()` methods of any sub-tasks. All responders have the same type-signature
    (somewhat simplified):
    ```
    str | ChatDocument -> ChatDocument
    ```
    Responders may or may not specify an intended recipient of their generated response.

    The main top-level method in the `Task` class is `run()`, which repeatedly calls
    `step()` until `done()` returns true. The `step()` represents a "turn" in the
    conversation: this method sequentially (in round-robin fashion) calls the responders
    until it finds one that generates a *valid* response to the `pending_message`
    (as determined by the `valid()` method). Once a valid response is found,
    `step()` updates the `pending_message` and `pending_sender` variables,
    and on the next iteration, `step()` re-starts its search for a valid response
    *from the beginning* of the list of responders (the exception being that the
    human user always gets a chance to respond after each non-human valid response).
    This process repeats until `done()` returns true, at which point `run()` returns
    the value of `result()`, which is the final result of the task.
    """
⋮----
# class variable called `cache` that is a RedisCache object
_cache: RedisCache | None = None
_background_tasks_started: bool = False
⋮----
**kwargs: Any,  # catch-all for any legacy params, for backwards compatibility
⋮----
"""
        A task to be performed by an agent.

        Args:
            agent (Agent): agent associated with the task
            name (str): name of the task
            llm_delegate (bool):
                Whether to delegate "control" to LLM; conceptually,
                the "controlling entity" is the one "seeking" responses to its queries,
                and has a goal it is aiming to achieve, and decides when a task is done.
                The "controlling entity" is either the LLM or the USER.
                (Note within a Task there is just one
                LLM, and all other entities are proxies of the "User" entity).
                See also: `done_if_response`, `done_if_no_response` for more granular
                control of task termination.
            single_round (bool):
                If true, task runs until one message by "controller"
                (i.e. LLM if `llm_delegate` is true, otherwise USER)
                and subsequent response by non-controller [When a tool is involved,
                this will not give intended results. See `done_if_response`,
                `done_if_no_response` below].
                termination]. If false, runs for the specified number of turns in
                `run`, or until `done()` is true.
                One run of step() is considered a "turn".
                See also: `done_if_response`, `done_if_no_response` for more granular
                control of task termination.
            system_message (str): if not empty, overrides agent's system_message
            user_message (str): if not empty, overrides agent's user_message
            restart (bool): if true (default), resets the agent's message history
                *at every run* when it is the top-level task. Ignored when
                the task is a subtask of another task. Restart behavior of a subtask's
                `run()` can be controlled via the `TaskConfig.restart_as_subtask`
                setting.
            default_human_response (str|None): default response from user; useful for
                testing, to avoid interactive input from user.
                [Instead of this, setting `interactive` usually suffices]
            default_return_type: if not None, extracts a value of this type from the
                result of self.run()
            interactive (bool): if true, wait for human input after each non-human
                response (prevents infinite loop of non-human responses).
                Default is true. If false, then `default_human_response` is set to ""
                Note: When interactive = False, the one exception is when the user
                is explicitly addressed, via "@user" or using RecipientTool, in which
                case the system will wait for a user response. In other words, use
                `interactive=False` when you want a "largely non-interactive"
                run, with the exception of explicit user addressing.
            only_user_quits_root (bool): if true, when interactive=True, only user can
                quit the root task (Ignored when interactive=False).
            erase_substeps (bool): if true, when task completes, erase intermediate
                conversation with subtasks from this agent's `message_history`, and also
                erase all subtask agents' `message_history`.
                Note: erasing can reduce prompt sizes, but results in repetitive
                sub-task delegation.
            allow_null_result (bool):
                If true, create dummy NO_ANSWER response when no valid response is found
                in a step.
                Optional, default is False.
                *Note:* In non-interactive mode, when this is set to True,
                you can have a situation where an LLM generates (non-tool) text,
                and no other responders have valid responses, and a "Null result"
                is inserted as a dummy response from the User entity, so the LLM
                will now respond to this Null result, and this will continue
                until the LLM emits a DONE signal (if instructed to do so),
                otherwise langroid detects a potential infinite loop after
                a certain number of such steps (= `TaskConfig.inf_loop_wait_factor`)
                and will raise an InfiniteLoopException.
            max_stalled_steps (int): task considered done after this many consecutive
                steps with no progress. Default is 3.
            done_if_no_response (List[Responder]): consider task done if NULL
                response from any of these responders. Default is empty list.
            done_if_response (List[Responder]): consider task done if NON-NULL
                response from any of these responders. Default is empty list.
        """
⋮----
agent = ChatAgent()
⋮----
# Store parsed done sequences (will be initialized after agent assignment)
⋮----
# how to behave as a sub-task; can be overridden by `add_sub_task()`
⋮----
# counts of distinct pending messages in history,
# to help detect (exact) infinite loops
⋮----
# copy the agent's config, so that we don't modify the original agent's config,
# which may be shared by other agents.
⋮----
config_copy = copy.deepcopy(agent.config)
⋮----
agent = cast(ChatAgent, agent)
⋮----
# possibly change the system and user messages
⋮----
# we always have at least 1 task_message
⋮----
# Initialize parsed done sequences now that self.agent is available
⋮----
# Pass agent's llm_tools_map directly
tools_map = (
⋮----
self.n_stalled_steps = 0  # how many consecutive steps with no progress?
# how many 2-step-apart alternations of no_answer step-result have we had,
# i.e. x1, N/A, x2, N/A, x3, N/A ...
⋮----
self._step_idx = -1  # current step index
⋮----
self.is_done = False  # is task done (based on response)?
self.is_pass_thru = False  # is current response a pass-thru?
⋮----
# task name overrides name in agent config
⋮----
# only override agent's default_human_response if it is explicitly set
⋮----
# set to True if we want to collapse multi-turn conversation with sub-tasks into
# just the first outgoing message and last incoming message.
# Note this also completely erases sub-task agents' message_history.
⋮----
agent_entity_responders = agent.entity_responders()
agent_entity_responders_async = agent.entity_responders_async()
⋮----
self.human_tried = False  # did human get a chance to respond in last step?
⋮----
# latest message in a conversation among entities and agents.
⋮----
self.turns = -1  # no limit
⋮----
# Track last responder for done sequence checking
⋮----
# Track response sequence for message chain
⋮----
# 0: User instructs (delegating to LLM);
# 1: LLM (as the Controller) asks;
# 2: user replies.
⋮----
# 0: User (as Controller) asks,
# 1: LLM replies.
⋮----
# other sub_tasks this task can delegate to
⋮----
self.caller: Task | None = None  # which task called this task's `run` method
⋮----
def clone(self, i: int) -> "Task"
⋮----
"""
        Returns a copy of this task, with a new agent.
        """
⋮----
agent: ChatAgent = self.agent.clone(i)
⋮----
@classmethod
    def cache(cls) -> RedisCache
⋮----
@classmethod
    def _start_background_tasks(cls) -> None
⋮----
"""Start background object registry cleanup thread. NOT USED."""
⋮----
cleanup_thread = threading.Thread(
⋮----
def __repr__(self) -> str
⋮----
def __str__(self) -> str
⋮----
def _init_message_counter(self) -> None
⋮----
# create a unique string that will not likely be in any message,
# so we always have a message with count=1
⋮----
def _cache_session_store(self, key: str, value: str) -> None
⋮----
"""
        Cache a key-value pair for the current session.
        E.g. key = "kill", value = "1"
        """
⋮----
def _cache_session_lookup(self, key: str) -> Dict[str, Any] | str | None
⋮----
"""
        Retrieve a value from the cache for the current session.
        """
session_id_key = f"{self.session_id}:{key}"
⋮----
cached_val = self.cache().retrieve(session_id_key)
⋮----
def _is_kill(self) -> bool
⋮----
"""
        Check if the current session is killed.
        """
⋮----
def _set_alive(self) -> None
⋮----
"""
        Initialize the kill status of the current session.
        """
⋮----
@classmethod
    def kill_session(cls, session_id: str = "") -> None
⋮----
"""
        Kill the session with the given session_id.
        """
session_id_kill_key = f"{session_id}:kill"
⋮----
def kill(self) -> None
⋮----
"""
        Kill the task run associated with the current session.
        """
⋮----
@property
    def _level(self) -> int
⋮----
@property
    def _indent(self) -> str
⋮----
@property
    def _enter(self) -> str
⋮----
@property
    def _leave(self) -> str
⋮----
"""
        Add a sub-task (or list of subtasks) that this task can delegate
        (or fail-over) to. Note that the sequence of sub-tasks is important,
        since these are tried in order, as the parent task searches for a valid
        response (unless a sub-task is explicitly addressed).

        Args:
            task: A task, or list of tasks, or a tuple of task and task config,
                or a list of tuples of task and task config.
                These tasks are added as sub-tasks of the current task.
                The task configs (if any) dictate how the tasks are run when
                invoked as sub-tasks of other tasks. This allows users to specify
                behavior applicable only in the context of a particular task-subtask
                combination.
        """
⋮----
config = TaskConfig()
⋮----
def init(self, msg: None | str | ChatDocument = None) -> ChatDocument | None
⋮----
"""
        Initialize the task, with an optional message to start the conversation.
        Initializes `self.pending_message` and `self.pending_sender`.
        Args:
            msg (str|ChatDocument): optional message to start the conversation.

        Returns:
            (ChatDocument|None): the initialized `self.pending_message`.
            Currently not used in the code, but provided for convenience.
        """
⋮----
# if agent has a history beyond system msg, set the
# pending message to the ChatDocument linked from
# last message in the history
last_agent_msg = self.agent.message_history[-1]
⋮----
# carefully deep-copy: fresh metadata.id, register
# as new obj in registry
original_parent_id = msg.metadata.parent_id
⋮----
# Preserve the parent pointer from the original message
⋮----
# msg may have come from `caller`, so we pretend this is from
# the CURRENT task's USER entity
⋮----
# update parent, child, agent pointers
⋮----
# Only override parent_id if it wasn't already set in the
# original message. This preserves parent chains from TaskTool
⋮----
# Log system message if it exists
⋮----
system_msg = self.agent._create_system_and_tools_message()
system_message_chat_doc = ChatDocument.from_LLMMessage(
# log the system message
⋮----
def init_loggers(self) -> None
⋮----
"""Initialise per-task Rich and TSV loggers."""
⋮----
# unique logger name ensures a distinct `logging.Logger` object
⋮----
header = ChatDocLoggerFields().tsv_header()
⋮----
# HTML logger
⋮----
model_info = ""
⋮----
model_info = getattr(self.agent.config.llm, "chat_model", "")
⋮----
# Log clickable file:// link to the HTML log
html_log_path = self.html_logger.file_path.resolve()
⋮----
def reset_all_sub_tasks(self) -> None
⋮----
"""
        Recursively reset message history & state of own agent and
        those of all sub-tasks.
        """
⋮----
def __getitem__(self, return_type: type) -> Self
⋮----
"""Returns a (shallow) copy of `self` with a default return type."""
clone = copy.copy(self)
⋮----
def run(  # noqa
⋮----
) -> Optional[ChatDocument]: ...  # noqa
⋮----
) -> Optional[T]: ...  # noqa
⋮----
"""Synchronous version of `run_async()`.
        See `run_async()` for details."""
⋮----
# We are either at top level, with restart = True, OR
# we are a sub-task with restart_as_subtask = True,
# so reset own agent and recursively for all sub-tasks
⋮----
self._no_answer_step = -5  # last step where the best explicit response was N/A
# how many N/A alternations have we had so far? (for Inf loop detection)
⋮----
msg_input = self.agent.to_ChatDocument(msg, author_entity=Entity.USER)
⋮----
# this task is not the intended recipient so return None
⋮----
# self.turns overrides if it is > 0 and turns not set (i.e. = -1)
turns = self.turns if turns < 0 else turns
i = 0
⋮----
self._step_idx = i  # used in step() below
⋮----
# Track pending message in response sequence
⋮----
max_turns = (
⋮----
# Important to distinguish between:
# (a) intentional run for a
#     fixed number of turns, where we expect the pending message
#     at that stage to be the desired result, and
# (b) hitting max_turns limit, which is not intentional, and is an
#     exception, resulting in a None task result
status = (
⋮----
final_result = self.result(status)
⋮----
return_type = self.default_return_type
⋮----
# If possible, take a final strict decoding step
# when the output does not match `return_type`
⋮----
parsed_result = self.agent.from_ChatDocument(final_result, return_type)
⋮----
strict_agent = self.agent[return_type]
output_args = strict_agent._function_args()[-1]
⋮----
schema = output_args.function.parameters
strict_result = strict_agent.llm_response(
⋮----
async def run_async(  # noqa
⋮----
"""
        Loop over `step()` until task is considered done or `turns` is reached.
        Runs asynchronously.

        Args:
            msg (Any): initial *user-role* message to process; if None,
                the LLM will respond to its initial `self.task_messages`
                which set up and kick off the overall task.
                The agent tries to achieve this goal by looping
                over `self.step()` until the task is considered
                done; this can involve a series of messages produced by Agent,
                LLM or Human (User). Note that `msg`, if passed, is treated as
                message with role `user`; a "system" role message should not be
                passed here.
            turns (int): number of turns to run the task for;
                default is -1, which means run until task is done.
            caller (Task|None): the calling task, if any
            max_cost (float): max cost allowed for the task (default 0 -> no limit)
            max_tokens (int): max tokens allowed for the task (default 0 -> no limit)
            session_id (str): session id for the task
            allow_restart (bool): whether to allow restarting the task
            return_type (Optional[Type[T]]): desired final result type

        Returns:
            Optional[ChatDocument]: valid result of the task.
        """
⋮----
# Even if the initial "sender" is not literally the USER (since the task could
# have come from another LLM), as far as this agent is concerned, the initial
# message can be considered to be from the USER
# (from the POV of this agent's LLM).
⋮----
await asyncio.sleep(0.01)  # temp yield to avoid blocking
⋮----
strict_result = await strict_agent.llm_response_async(
⋮----
# sets indentation to be printed prior to any output from agent
⋮----
# mark where we are in the message history, so we can reset to this when
# we are done with the task
⋮----
# TODO decide on whether or not to print, based on is_async
llm_model = (
⋮----
def _post_run_loop(self) -> None
⋮----
# delete all messages from our agent's history, AFTER the first incoming
# message, and BEFORE final result message
n_messages = 0
⋮----
# TODO I don't like directly accessing agent message_history. Revisit.
# (Pchalasani)
# Note: msg history will consist of:
# - H: the original msg history, ending at idx= self.message_history_idx
# - R: this agent's response, which presumably leads to:
# - X: a series of back-and-forth msgs (including with agent's own
#     responders and with sub-tasks)
# - F: the final result message, from this agent.
# Here we are deleting all of [X] from the agent's message history,
# so that it simply looks as if the sub-tasks never happened.
⋮----
dropped = self.agent.message_history[
# first delete the linked ChatDocuments (and descendants) from
# ObjectRegistry
⋮----
# then delete the messages from the agent's message_history
⋮----
n_messages = len(self.agent.message_history)
⋮----
# erase our conversation with agent of subtask t
⋮----
# erase message_history of agent of subtask t
# TODO - here we assume that subtask-agents are
# ONLY talking to the current agent.
⋮----
def step(self, turns: int = -1) -> ChatDocument | None
⋮----
"""
        Synchronous version of `step_async()`. See `step_async()` for details.
        TODO: Except for the self.response() calls, this fn should be identical to
        `step_async()`. Consider refactoring to avoid duplication.
        """
⋮----
parent = self.pending_message
recipient = (
⋮----
error_doc = ChatDocument(
⋮----
responders: List[Responder] = self.non_human_responders.copy()
⋮----
# Give human first chance if they haven't been tried in last step,
# and the msg is not a tool-call attempt;
# (When `interactive=False`, human is only allowed to respond only if
#  if explicitly addressed)
# This ensures human gets a chance to respond,
#   other than to a LLM tool-call.
# When there's a tool msg attempt we want the
#  Agent to be the next responder; this only makes a difference in an
#  interactive setting: LLM generates tool, then we don't want user to
#  have to respond, and instead let the agent_response handle the tool.
⋮----
found_response = False
# (responder, result) from a responder who explicitly said NO_ANSWER
no_answer_response: None | Tuple[Responder, ChatDocument] = None
n_non_responders = 0
⋮----
# create dummy msg for logging
log_doc = ChatDocument(
# no need to register this dummy msg in ObjectRegistry
⋮----
# don't stay in this "non-response" loop forever
⋮----
result = self.response(r, turns)
⋮----
no_answer_response = (r, result)
⋮----
found_response = True
⋮----
# skip trying other responders in this step
⋮----
if not found_response:  # did not find a valid response
⋮----
# even though there was no valid response from anyone in this step,
# if there was at least one who EXPLICITLY said NO_ANSWER, then
# we process that as a valid response.
⋮----
async def step_async(self, turns: int = -1) -> ChatDocument | None
⋮----
"""
        A single "turn" in the task conversation: The "allowed" responders in this
        turn (which can be either the 3 "entities", or one of the sub-tasks) are
        tried in sequence, until a _valid_ response is obtained; a _valid_
        response is one that contributes to the task, either by ending it,
        or producing a response to be further acted on.
        Update `self.pending_message` to the latest valid response (or NO_ANSWER
        if no valid response was obtained from any responder).

        Args:
            turns (int): number of turns to process. Typically used in testing
                where there is no human to "quit out" of current level, or in cases
                where we want to limit the number of turns of a delegated agent.

        Returns (ChatDocument|None):
            Updated `self.pending_message`. Currently the return value is not used
                by the `task.run()` method, but we return this as a convenience for
                other use-cases, e.g. where we want to run a task step by step in a
                different context.
        """
⋮----
responders: List[Responder] = self.non_human_responders_async.copy()
⋮----
result = await self.response_async(r, turns)
⋮----
def _update_no_answer_vars(self, result: ChatDocument) -> None
⋮----
"""Update variables related to NO_ANSWER responses, to aid
        in alternating NO_ANSWER infinite-loop detection."""
⋮----
# N/A two steps ago
⋮----
# reset alternations counter
⋮----
# record the last step where the best explicit response was N/A
⋮----
"""Processes valid result from a responder, during a step"""
⋮----
# Store the last responder for done sequence checking
⋮----
# pending_sender is of type Responder,
# i.e. it is either one of the agent's entities
# OR a sub-task, that has produced a valid response.
# Contrast this with self.pending_message.metadata.sender, which is an ENTITY
# of this agent, or a sub-task's agent.
⋮----
# when pending msg is from our own agent, respect the sender set there,
# since sometimes a response may "mock" as if the response is from
# another entity (e.g when using RewindTool, the agent handler
# returns a result as if it were from the LLM).
⋮----
# when pending msg is from a sub-task, the sender is the sub-task
⋮----
# set the parent/child links ONLY if not already set by agent internally,
# which may happen when using the RewindTool, or in other scenarios.
⋮----
# reset stuck counter since we made progress
⋮----
# We're ignoring the DoneTools (if any) in this case,
# so remove them from the pending msg, to ensure
# they don't affect the next step.
⋮----
# update counters for infinite loop detection
hashed_msg = hash(str(self.pending_message))
⋮----
def _process_invalid_step_result(self, parent: ChatDocument | None) -> None
⋮----
"""
        Since step had no valid result from any responder, decide whether to update the
        self.pending_message to a NO_ANSWER message from the opposite entity,
        or leave it as is.
        Args:
           parent (ChatDocument|None): parent message of the current message
        """
⋮----
# Null step-result is allowed, and we're not in a "pass-thru" situation,
# so we update the pending_message to a dummy NO_ANSWER msg
# from the entity 'opposite' to the current pending_sender,
# so that the task can continue.
# CAUTION: unless the LLM is instructed to signal DONE at an appropriate
# time, this can result in an infinite loop.
responder = (
parent_id = "" if parent is None else parent.id()
⋮----
def _show_pending_message_if_debug(self) -> None
⋮----
sender_str = escape(str(self.pending_sender))
msg_str = escape(str(self.pending_message))
⋮----
def _forbid_multi_oai_tools(self, e: Responder) -> ChatDocument
⋮----
# Passing multiple OpenAI Tools to be handled by another agent
# is not supported yet (we need to carefully establish correspondence
# between the original tool-calls of agent A, and the returned results,
# which may involve recursive-called tools by agent B).
# So we set an error result corresponding to each tool-call.
⋮----
err_str = """
id2result = OrderedDict((tc.id, err_str) for tc in self.agent.oai_tool_calls)
result = e.agent.create_user_response(
⋮----
"""
        Sync version of `response_async()`. See `response_async()` for details.
        """
⋮----
actual_turns = e.turns if e.turns > 0 else turns
⋮----
# e.callbacks.set_parent_agent(self.agent)
pending_tools = self.agent.try_get_tool_messages(self.pending_message)
# TODO disable this
⋮----
result = self._forbid_multi_oai_tools(e)
⋮----
result = e.run(
# update result.tool_messages if any
⋮----
result_str = (  # only used by callback to display content and possible tool
maybe_tool = len(extract_top_level_json(result_str)) > 0
⋮----
response_fn = self._entity_responder_map[cast(Entity, e)]
result = response_fn(self.pending_message)
# update result.tool_messages if any.
# Do this only if sender is LLM, since this could be
# a tool-call result from the Agent responder, which may
# contain strings that look like tools, and we don't want to
# trigger strict tool recovery due to that.
⋮----
result_chat_doc = self.agent.to_ChatDocument(
⋮----
# process result in case there is a routing instruction
⋮----
# this supports Agent responders and Task.run() to
# return a ToolMessage, in addition str, ChatDocument
⋮----
# With the curr defn of Task.result(),
# Task.run() can't return a ToolMessage, so this case doesn't occur,
# but we leave it here in case a
# Task subclass overrides default behavior
⋮----
# e must be this agent's Entity (LLM, AGENT or USER)
⋮----
# ignore all string-based signaling/routing
⋮----
# parse various routing/addressing strings in result
⋮----
if is_pass is None:  # no routing, i.e. neither PASS nor SEND
⋮----
# Just PASS, no recipient
# This means pass on self.pending_message to the next responder
# in the default sequence of responders.
# So leave result intact since we handle "PASS" in step()
⋮----
# set recipient in self.pending_message
⋮----
# clear out recipient, replace with just PASS
⋮----
# we are sending non-empty content to non-null recipient
# clean up result.content, set metadata.recipient and return
⋮----
"""
        Get response to `self.pending_message` from a responder.
        If response is __valid__ (i.e. it ends the current turn of seeking
        responses):
            -then return the response as a ChatDocument object,
            -otherwise return None.
        Args:
            e (Responder): responder to get response from.
            turns (int): number of turns to run the task for.
                Default is -1, which means run until task is done.

        Returns:
            Optional[ChatDocument]: response to `self.pending_message` from entity if
            valid, None otherwise
        """
⋮----
# e.callbacks.set_parent_agent(self.agent)
result = await e.run_async(
⋮----
response_fn = self._entity_responder_async_map[cast(Entity, e)]
result = await response_fn(self.pending_message)
# update result.tool_messages if any
⋮----
def result(self, status: StatusCode | None = None) -> ChatDocument | None
⋮----
"""
        Get result of task. This is the default behavior.
        Derived classes can override this.

        Note the result of a task is returned as if it is from the User entity.

        Args:
            status (StatusCode): status of the task when it ended
        Returns:
            ChatDocument: result of task
        """
⋮----
# In these case we don't know (and don't want to try to guess)
# what the task result should be, so we return None
⋮----
result_msg = self.pending_message
⋮----
content = result_msg.content if result_msg else ""
content_any = result_msg.content_any if result_msg else None
⋮----
# assuming it is of the form "DONE: <content>"
content = content.replace(DONE, "").strip()
oai_tool_calls = result_msg.oai_tool_calls if result_msg else None
oai_tool_id2result = result_msg.oai_tool_id2result if result_msg else None
fun_call = result_msg.function_call if result_msg else None
tool_messages = result_msg.tool_messages if result_msg else []
# if there is a DoneTool or AgentDoneTool among these,
# we extract content and tools from here, and ignore all others
⋮----
content = ""
content_any = None
tool_messages = [t]  # pass it on to parent so it also quits
⋮----
# there shouldn't be multiple tools like this; just take the first
content = to_string(t.content)
content_any = t.content
fun_call = None
oai_tool_calls = None
⋮----
# AgentDoneTool may have tools, unlike DoneTool
tool_messages = t.tools
⋮----
# drop the "Done" tools since they should not be part of the task result,
# or else they would cause the parent task to get unintentionally done!
tool_messages = [
block = result_msg.metadata.block if result_msg else None
recipient = result_msg.metadata.recipient if result_msg else ""
tool_ids = result_msg.metadata.tool_ids if result_msg else []
⋮----
# regardless of which entity actually produced the result,
# when we return the result, we set entity to USER
# since to the "parent" task, this result is equivalent to a response from USER
result_doc = ChatDocument(
⋮----
def _is_empty_message(self, msg: str | ChatDocument | None) -> bool
⋮----
"""
        Check if msg is empty or None
        Args:
            msg (str|ChatDocument|None): message to check
        Returns:
            bool: True if msg is (equivalent to) empty or None, False otherwise
        """
# if ignoring string-based signaling, set pass_str to ""
pass_str = PASS if self.config.recognize_string_signals else ""
⋮----
"""Is the task done based on the response from the given responder?"""
⋮----
allow_done_string = self.config.recognize_string_signals
response_says_done = result is not None and (
⋮----
# this condition ensures agent had chance to handle tools
⋮----
def _maybe_infinite_loop(self) -> bool
⋮----
"""
        Detect possible infinite loop based on message frequencies.
        NOTE: This detects two types of loops:
        - Alternating NO_ANSWER loops, specifically of the form
        x1 NO_ANSWER x2 NO_ANSWER x3 NO_ANSWER...
        (e.g. an LLM repeatedly saying something different, and another responder
        or sub-task saying NO_ANSWER -- i.e. "DO-NOT-KNOW")

        - "exact" loops, i.e. a cycle of messages that repeats exactly, e.g.
        a r b i t r a t e r a t e r a t e r a t e ...

        [It does not detect more general "approximate" loops, where two entities are
        responding to each other potentially forever, with (slightly) different
        messages each time]

        Here is the logic for the exact-loop detection:
        Intuition: when you look at a sufficiently long sequence with an m-message
        loop, then the frequencies of these m messages will "dominate" those
        of all other messages.

        1. First find m "dominant" messages, i.e. when arranged in decreasing
            frequency order, find the m such that
                freq[m] > F * freq[m+1] and
                freq[m] > W + freq[m+1]
            where F = config.inf_loop_dominance_factor (default 1.5) and
            W = config.inf_loop_wait_factor (default 5).
            So if you plot these frequencies in decreasing order,
            you will see a big drop in the plot, from m to m+1.
            We call the freqs until m the "dominant" freqs.
        2. Say we found m such dominant messages
           If the set of last (W * m) messages are the same as the
           set of m dominant messages,  then we are likely in a loop.
        """
⋮----
max_cycle_len = self.config.inf_loop_cycle_len
⋮----
# no loop detection
⋮----
wait_factor = self.config.inf_loop_wait_factor
⋮----
# we haven't seen enough messages to detect a loop
⋮----
# recall there's always a dummy msg with freq = 1
most_common_msg_counts: List[Tuple[str, int]] = (
# get the most dominant msgs, i.e. these are at least 1.5x more freq
# than the rest
F = self.config.inf_loop_dominance_factor
# counts array in non-increasing order
counts = np.array([c for _, c in most_common_msg_counts])
# find first index where counts[i] > F * counts[i+1]
ratios = counts[:-1] / counts[1:]
diffs = counts[:-1] - counts[1:]
indices = np.where((ratios > F) & (diffs > wait_factor))[0]
m = indices[-1] if indices.size > 0 else -1
⋮----
# no dominance found, but...
⋮----
# ...The most-common messages are at most max_cycle_len,
# even though we looked for the most common (max_cycle_len + 1) msgs.
# This means there are only at most max_cycle_len distinct messages,
# which also indicates a possible loop.
m = len(most_common_msg_counts) - 1
⋮----
# ... we have enough messages, but no dominance found,
# so there COULD be loops longer than max_cycle_len,
# OR there is no loop at all; we can't tell, so we return False.
⋮----
dominant_msg_counts = most_common_msg_counts[: m + 1]
# if the SET of dominant m messages is the same as the
# the SET of last m*w messages, (where w = config.inf_loop_wait_factor),
# then we are likely in a loop
dominant_msgs = set([msg for msg, _ in dominant_msg_counts])
lookback = wait_factor * (m + 1)
recent_msgs = set(list(self.history)[-lookback:])
⋮----
"""
        Check if task is done. This is the default behavior.
        Derived classes can override this.
        Args:
            result (ChatDocument|None): result from a responder
            r (Responder|None): responder that produced the result
                Not used here, but could be used by derived classes.
        Returns:
            bool: True if task is done, False otherwise
            StatusCode: status code indicating why task is done
        """
⋮----
result = result or self.pending_message
⋮----
# Check if task should be done if message contains a tool
⋮----
# Check done sequences
⋮----
# Get the message chain from the current result
msg_chain = self._get_message_chain(result)
⋮----
# Use last responder if r not provided
responder = r if r is not None else self._last_responder
⋮----
# Check each sequence
⋮----
seq_name = sequence.name or "unnamed"
⋮----
# An entity decided task is done, either via DoneTool,
# or by explicitly saying DONE
done_result = result is not None and (
⋮----
user_quit = (
⋮----
# we are stuck, so bail to avoid infinite loop
⋮----
# for top-level task, only user can quit out
⋮----
final = (
⋮----
# no valid response from any entity/agent in current turn
⋮----
or (  # current task is addressing message to caller task
⋮----
"""
        Is the result from a Responder (i.e. an entity or sub-task)
        such that we can stop searching for responses in this step?
        """
# TODO caution we should ensure that no handler method (tool) returns simply
# an empty string (e.g when showing contents of an empty file), since that
# would be considered an invalid response, and other responders will wrongly
# be given a chance to respond.
⋮----
# if task would be considered done given responder r's `result`,
# then consider the result valid.
⋮----
# some weaker LLMs, including even GPT-4o, may say "DO-NOT-KNOW."
# (with a punctuation at the end), so need to strip out punctuation
⋮----
"""
        Log current pending message, and related state, for lineage/debugging purposes.

        Args:
            resp (Responder): Responder that generated the `msg`
            msg (ChatDocument, optional): Message to log. Defaults to None.
            mark (bool, optional): Whether to mark the message as the final result of
                a `task.step()` call. Defaults to False.
        """
⋮----
default_values = ChatDocLoggerFields().model_dump().values()
msg_str_tsv = "\t".join(str(v) for v in default_values)
⋮----
msg_str_tsv = msg.tsv_str()
⋮----
mark_str = "*" if mark else " "
task_name = self.name if self.name != "" else "root"
resp_color = "white" if mark else "red"
resp_str = f"[{resp_color}] {resp} [/{resp_color}]"
⋮----
msg_str = f"{mark_str}({task_name}) {resp_str}"
⋮----
color = {
f = msg.log_fields()
tool_type = f.tool_type.rjust(6)
tool_name = f.tool.rjust(10)
tool_str = f"{tool_type}({tool_name})" if tool_name != "" else ""
sender = f"[{color}]" + str(f.sender_entity).rjust(10) + f"[/{color}]"
sender_name = f.sender_name.rjust(10)
recipient = "=>" + str(f.recipient).rjust(10)
block = "X " + str(f.block or "").rjust(10)
content = f"[{color}]{f.content}[/{color}]"
msg_str = (
⋮----
resp_str = str(resp)
⋮----
# Create a minimal fields object for None messages
⋮----
fields_dict = {
⋮----
# Get fields from the message
fields = msg.log_fields()
fields_dict = fields.model_dump()
⋮----
# Create a ChatDocLoggerFields-like object for the HTML logger
# Create a simple BaseModel subclass dynamically
⋮----
class LogFields(BaseModel)
⋮----
model_config = ConfigDict(extra="allow")  # Allow extra fields
⋮----
log_obj = LogFields(**fields_dict)
⋮----
def _valid_recipient(self, recipient: str) -> bool
⋮----
"""
        Is the recipient among the list of responders?
        Args:
            recipient (str): Name of recipient
        """
⋮----
responder_names = [self.name.lower()] + [
⋮----
def _recipient_mismatch(self, e: Responder) -> bool
⋮----
"""
        Is the recipient explicitly specified and does not match responder "e" ?
        """
⋮----
and not (recipient == e)  # case insensitive for entities
⋮----
and recipient != self.name  # case sensitive
⋮----
def _user_can_respond(self) -> bool
⋮----
def _can_respond(self, e: Responder) -> bool
⋮----
user_can_respond = self._user_can_respond()
⋮----
def set_color_log(self, enable: bool = True) -> None
⋮----
"""
        Flag to enable/disable color logging using rich.console.
        In some contexts, such as Colab notebooks, we may want to disable color logging
        using rich.console, since those logs show up in the cell output rather than
        in the log file. Turning off this feature will still create logs, but without
        the color formatting from rich.console
        Args:
            enable (bool): value of `self.color_log` to set to,
                which will enable/diable rich logging
        """
⋮----
"""
        Parse routing instruction if any, of the form:
        PASS:<recipient>  (pass current pending msg to recipient)
        SEND:<recipient> <content> (send content to recipient)
        @<recipient> <content> (send content to recipient)
        Args:
            msg (ChatDocument|str|None): message to parse
            addressing_prefix (str): prefix to address other agents or entities,
                (e.g. "@". See documentation of `TaskConfig` for details).
        Returns:
            Tuple[bool|None, str|None, str|None]:
                bool: true=PASS, false=SEND, or None if neither
                str: recipient, or None
                str: content to send, or None
        """
msg_str = msg.content if isinstance(msg, ChatDocument) else msg
⋮----
content = msg.content if isinstance(msg, ChatDocument) else msg
content = content.strip()
⋮----
"""Classify a message into an AgentEvent for sequence matching."""
⋮----
event_type = EventType.NO_RESPONSE
tool_name = None
tool_messages = self.agent.try_get_tool_messages(msg, all_tools=True)
⋮----
event_type = EventType.TOOL
⋮----
tool_name = tool_messages[0].request
⋮----
event_type = EventType.LLM_RESPONSE
⋮----
event_type = EventType.AGENT_RESPONSE
⋮----
event_type = EventType.USER_RESPONSE
⋮----
sender_name = None
⋮----
sender_name = responder.value
⋮----
sender_name = responder.name
⋮----
"""Get the chain of messages from response sequence."""
⋮----
max_depth = 50  # default fallback
⋮----
max_depth = max(len(seq.events) for seq in self._parsed_done_sequences)
⋮----
def _matches_event(self, actual: AgentEvent, expected: AgentEvent) -> bool
⋮----
"""Check if an actual event matches an expected event pattern."""
⋮----
# First try tool_class matching if available
⋮----
# Handle case where actual.tool_class might be a class instance
⋮----
# If actual.tool_class is an instance, get its class
⋮----
actual_class = actual.tool_class
⋮----
actual_class = type(actual.tool_class)
⋮----
# Compare the tool classes
⋮----
# Also check if actual tool is an instance of expected class
⋮----
# If tool_class comparison didn't match, continue to tool_name fallback
⋮----
# Fall back to tool_name comparison for backwards compatibility
⋮----
"""Check if a message chain matches a done sequence.
        We traverse the message chain and try to match the sequence events.
        The events don't have to be consecutive in the chain.
        """
⋮----
events = []
⋮----
responder = None
⋮----
responder = msg.metadata.sender
⋮----
event = self._classify_event(msg, responder)
⋮----
seq_idx = 0
⋮----
expected = sequence.events[seq_idx]
⋮----
def close_loggers(self) -> None
⋮----
"""Close all loggers to ensure clean shutdown."""
⋮----
"""Check if the message chain plus current message matches a done sequence.
        Process messages in reverse order (newest first) and match against
        the sequence events in reverse order.
        """
⋮----
msg_chain = msg_chain + [current_msg]
⋮----
seq_idx = len(sequence.events) - 1
msg_idx = len(msg_chain) - 1
⋮----
msg = msg_chain[msg_idx]
⋮----
responder = current_responder
⋮----
matched = False
⋮----
matched = True
</file>

<file path="langroid/vector_store/base.py">
logger = logging.getLogger(__name__)
⋮----
class VectorStoreConfig(BaseSettings)
⋮----
type: str = ""  # deprecated, keeping it for backward compatibility
collection_name: str | None = "temp"
replace_collection: bool = False  # replace collection if it already exists
storage_path: str = ".qdrant/data"
cloud: bool = False
batch_size: int = 200
embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig(
embedding_model: Optional[EmbeddingModel] = None
timeout: int = 60
host: str = "127.0.0.1"
port: int = 6333
# used when parsing search results back as Document objects
document_class: Type[Document] = Document
metadata_class: Type[DocMetaData] = DocMetaData
# compose_file: str = "langroid/vector_store/docker-compose-qdrant.yml"
full_eval: bool = False  # runs eval without sanitization. Use only on trusted input
⋮----
class VectorStore(ABC)
⋮----
"""
    Abstract base class for a vector store.
    """
⋮----
def __init__(self, config: VectorStoreConfig)
⋮----
@staticmethod
    def create(config: VectorStoreConfig) -> Optional["VectorStore"]
⋮----
@property
    def embedding_dim(self) -> int
⋮----
def clone(self) -> "VectorStore"
⋮----
"""Return a vector-store clone suitable for agent cloning.

        The default implementation deep-copies the configuration, reuses any
        existing embedding model, and instantiates a fresh store of the same
        type. Subclasses can override when sharing the instance is required
        (e.g., embedded/local stores that rely on file locks).
        """
⋮----
config_class = self.config.__class__
config_data = self.config.model_dump(mode="python")
⋮----
config_copy = config_class.model_validate(config_data)
⋮----
# Preserve the calculated collection contents without forcing replaces
⋮----
config_copy.replace_collection = False  # type: ignore[attr-defined]
cloned_embedding: Optional[EmbeddingModel] = None
⋮----
cloned_embedding = self.embedding_model.clone()  # type: ignore[attr-defined]
⋮----
cloned_store = type(self)(config_copy)  # type: ignore[call-arg]
⋮----
# Some stores might not honour replace_collection; ensure same collection
⋮----
@abstractmethod
    def clear_empty_collections(self) -> int
⋮----
"""Clear all empty collections in the vector store.
        Returns the number of collections deleted.
        """
⋮----
@abstractmethod
    def clear_all_collections(self, really: bool = False, prefix: str = "") -> int
⋮----
"""
        Clear all collections in the vector store.

        Args:
            really (bool, optional): Whether to really clear all collections.
                Defaults to False.
            prefix (str, optional): Prefix of collections to clear.
        Returns:
            int: Number of collections deleted.
        """
⋮----
@abstractmethod
    def list_collections(self, empty: bool = False) -> List[str]
⋮----
"""List all collections in the vector store
        (only non empty collections if empty=False).
        """
⋮----
def set_collection(self, collection_name: str, replace: bool = False) -> None
⋮----
"""
        Set the current collection to the given collection name.
        Args:
            collection_name (str): Name of the collection.
            replace (bool, optional): Whether to replace the collection if it
                already exists. Defaults to False.
        """
⋮----
@abstractmethod
    def create_collection(self, collection_name: str, replace: bool = False) -> None
⋮----
"""Create a collection with the given name.
        Args:
            collection_name (str): Name of the collection.
            replace (bool, optional): Whether to replace the
                collection if it already exists. Defaults to False.
        """
⋮----
@abstractmethod
    def add_documents(self, documents: Sequence[Document]) -> None
⋮----
def compute_from_docs(self, docs: List[Document], calc: str) -> str
⋮----
"""Compute a result on a set of documents,
        using a dataframe calc string like `df.groupby('state')['income'].mean()`.

        If full_eval is False (default), the input expression is sanitized to prevent
        most common code injection attack vectors.
        If full_eval is True, sanitization is bypassed - use only with trusted input!
        """
# convert each doc to a dict, using dotted paths for nested fields
dicts = [flatten_dict(doc.model_dump(by_alias=True)) for doc in docs]
df = pd.DataFrame(dicts)
⋮----
# SECURITY MITIGATION: Eval input is sanitized to prevent most common
# code injection attack vectors when full_eval is False.
vars = {"df": df}
⋮----
calc = sanitize_command(calc)
code = compile(calc, "<calc>", "eval")
result = eval(code, vars, {})
⋮----
# return error message so LLM can fix the calc string if needed
err = f"""
⋮----
# Pd.eval sometimes fails on a perfectly valid exprn like
# df.loc[..., 'column'] with a KeyError.
⋮----
def maybe_add_ids(self, documents: Sequence[Document]) -> None
⋮----
"""Add ids to metadata if absent, since some
        vecdbs don't like having blank ids."""
⋮----
"""
        Find k most similar texts to the given text, in terms of vector distance metric
        (e.g., cosine similarity).

        Args:
            text (str): The text to find similar texts for.
            k (int, optional): Number of similar texts to retrieve. Defaults to 1.
            where (Optional[str], optional): Where clause to filter the search.

        Returns:
            List[Tuple[Document,float]]: List of (Document, score) tuples.

        """
⋮----
"""
        In each doc's metadata, there may be a window_ids field indicating
        the ids of the chunks around the current chunk.
        These window_ids may overlap, so we
        - coalesce each overlapping groups into a single window (maintaining ordering),
        - create a new document for each part, preserving metadata,

        We may have stored a longer set of window_ids than we need during chunking.
        Now, we just want `neighbors` on each side of the center of the window_ids list.

        Args:
            docs_scores (List[Tuple[Document, float]]): List of pairs of documents
                to add context windows to together with their match scores.
            neighbors (int, optional): Number of neighbors on "each side" of match to
                retrieve. Defaults to 0.
                "Each side" here means before and after the match,
                in the original text.

        Returns:
            List[Tuple[Document, float]]: List of (Document, score) tuples.
        """
# We return a larger context around each match, i.e.
# a window of `neighbors` on each side of the match.
docs = [d for d, s in docs_scores]
scores = [s for d, s in docs_scores]
⋮----
doc_chunks = [d for d in docs if d.metadata.is_chunk]
⋮----
window_ids_list = []
id2metadata = {}
# id -> highest score of a doc it appears in
id2max_score: Dict[int | str, float] = {}
⋮----
window_ids = d.metadata.window_ids
⋮----
window_ids = [d.id()]
⋮----
n = len(window_ids)
chunk_idx = window_ids.index(d.id())
neighbor_ids = window_ids[
⋮----
# window_ids could be from different docs,
# and they may overlap, so we coalesce overlapping groups into
# separate windows.
window_ids_list = self.remove_overlaps(window_ids_list)
final_docs = []
final_scores = []
⋮----
metadata = copy.deepcopy(id2metadata[w[0]])
⋮----
document = Document(
# make a fresh id since content is in general different
⋮----
@staticmethod
    def remove_overlaps(windows: List[List[str]]) -> List[List[str]]
⋮----
"""
        Given a collection of windows, where each window is a sequence of ids,
        identify groups of overlapping windows, and for each overlapping group,
        order the chunk-ids using topological sort so they appear in the original
        order in the text.

        Args:
            windows (List[int|str]): List of windows, where each window is a
                sequence of ids.

        Returns:
            List[int|str]: List of windows, where each window is a sequence of ids,
                and no two windows overlap.
        """
ids = set(id for w in windows for id in w)
# id -> {win -> # pos}
id2win2pos: Dict[str, Dict[int, int]] = {id: {} for id in ids}
⋮----
n = len(windows)
# relation between windows:
order = np.zeros((n, n), dtype=np.int8)
⋮----
id = list(set(w).intersection(x))[0]  # any common id
⋮----
order[i, j] = -1  # win i is before win j
⋮----
order[i, j] = 1  # win i is after win j
⋮----
# find groups of windows that overlap, like connected components in a graph
groups = components(np.abs(order))
⋮----
# order the chunk-ids in each group using topological sort
new_windows = []
⋮----
# find total ordering among windows in group based on order matrix
# (this is a topological sort)
_g = np.array(g)
order_matrix = order[_g][:, _g]
ordered_window_indices = topological_sort(order_matrix)
ordered_window_ids = [windows[i] for i in _g[ordered_window_indices]]
flattened = [id for w in ordered_window_ids for id in w]
flattened_deduped = list(dict.fromkeys(flattened))
# Note we are not going to split these, and instead we'll return
# larger windows from concatenating the connected groups.
# This ensures context is retained for LLM q/a
⋮----
@abstractmethod
    def get_all_documents(self, where: str = "") -> List[Document]
⋮----
"""
        Get all documents in the current collection, possibly filtered by `where`.
        """
⋮----
@abstractmethod
    def get_documents_by_ids(self, ids: List[str]) -> List[Document]
⋮----
"""
        Get documents by their ids.
        Args:
            ids (List[str]): List of document ids.

        Returns:
            List[Document]: List of documents
        """
⋮----
@abstractmethod
    def delete_collection(self, collection_name: str) -> None
⋮----
def show_if_debug(self, doc_score_pairs: List[Tuple[Document, float]]) -> None
</file>

<file path="plugins/langroid/skills/patterns/quiet-mode.md">
# Quiet Mode - Suppressing Verbose Agent Output

Suppress Langroid's verbose agent output while showing your own custom progress.

## Key Imports

```python
from langroid.utils.configuration import quiet_mode, settings
```

## Context Manager (Recommended)

```python
from langroid.utils.configuration import quiet_mode

# Wrap agent runs in quiet_mode context
print("Starting writer...")

with quiet_mode():
    result = writer_task.run("Write the proposal")

print(f"Done! {len(result)} chars")
```

## Global Setting

```python
from langroid.utils.configuration import settings

settings.quiet = True   # Enable globally
result = task.run(...)
settings.quiet = False  # Disable
```

## What Gets Suppressed

- Agent streaming output
- Intermediate messages and tool outputs
- Rich console spinners/status messages
- Response statistics (show_stats)
- Debug information

## Pattern: Multi-Step Workflow with Progress

```python
from langroid.utils.configuration import quiet_mode

def run_workflow():
    print("Phase 1: Writing proposal...")
    with quiet_mode():
        proposal = writer_task.run("Write proposal")
    print(f"  ✓ Proposal written ({len(proposal)} chars)")

    print("Phase 2: Reviewing...")
    with quiet_mode():
        edits = reviewer_task.run(f"Review:\n{proposal}")
    print(f"  ✓ Found {len(edits)} issues")

    for i, edit in enumerate(edits, 1):
        print(f"  Applying edit {i}/{len(edits)}...")
        with quiet_mode():
            result = editor_task.run(edit)
        print(f"    ✓ Applied")

    print("Done!")
```

## Thread Safety

- Uses thread-local storage
- Supports nesting (once quiet, stays quiet in nested contexts)
- Exception-safe (reverts even on error)

```python
with quiet_mode():
    with quiet_mode(quiet=False):
        # Still quiet - once enabled, stays enabled in nesting
        assert settings.quiet
```

## Key Files in Langroid Repo

- `langroid/utils/configuration.py` - Main implementation (lines 111-128)
- `langroid/utils/output/status.py` - Status output helper
- `langroid/agent/batch.py` - Real-world usage example
- `tests/main/test_quiet_mode.py` - Test examples
</file>

<file path="plugins/langroid/skills/patterns/SKILL.md">
---
name: patterns
description: Design patterns for the Langroid multi-agent LLM framework. Covers
  agent configuration, tools, task control, and integrations.
---

# Langroid Patterns

## Instructions

Below is an INDEX of design patterns organized by category. Each item describes
WHAT you might want to implement, followed by a REFERENCE to a document with
a complete code example.

Scan this index to find patterns matching your needs, then consult the
corresponding document.

---

## Agent & Task Basics

1. **Task Returns Tool Directly**

   Create a Langroid Agent equipped with a single Tool (a ToolMessage), and wrap
   it in a Task so that running the task returns that ToolMessage directly. Use
   this pattern when you want a simple LLM agent that returns a structured
   response.

   - Reference: `./task-return-tool.md`

---

## Tool Handlers

2. **Stateful Handler on Agent**

   Define a STATEFUL tool handler as a METHOD on the agent (not inside the
   ToolMessage). Use this pattern when: (a) the tool handler needs to execute
   external operations (API calls, database queries, file I/O), (b) you need to
   track state across retries (e.g., failure counter), (c) the handler needs
   access to agent-level resources (connections, configs), or (d) you want
   Langroid to automatically loop errors back to the LLM for self-correction.
   The method name must match the `request` field of the ToolMessage. Return a
   string for errors (LLM sees it and can retry), or DoneTool(content=result)
   to terminate successfully.

   - Reference: `./agent-tool-handler-with-state.md`

3. **Handler with Validation**

   Validate tool output against agent state before accepting it. Use this
   pattern when: (a) the LLM's tool output must preserve certain content from
   the input (e.g., placeholders, required fields), (b) you want automatic
   retry if validation fails, (c) you need to compare tool output against
   context the LLM received. Define a handler method on a custom agent class
   that stores the input context as state, validates the tool output, and
   returns an error string for retry or AgentDoneTool for success (note: use
   AgentDoneTool, NOT DoneTool). Use `done_sequences=["T[ToolName], A"]` so the
   handler runs before task termination.

   - Reference: `./agent-handler-validation-with-state.md`

---

## Task Control

4. **Terminate on Specific Tool**

   Terminate a Task only when a SPECIFIC tool is called. Use
   `TaskConfig(done_sequences=["T[ToolName]"])` to exit immediately when that
   tool is emitted, or `TaskConfig(done_sequences=["T[ToolName], A"])` to exit
   after the tool is emitted AND handled by the agent. Use this when an agent
   has multiple tools but you only want one specific tool to trigger task
   termination.

   - Reference: `./done-sequences-specific-tool.md`

5. **Batch Processing**

   Run the SAME task on MULTIPLE inputs concurrently using `run_batch_tasks()`.
   Use this pattern when: (a) you need to process many items with the same
   agent/task logic, (b) you want parallelism without manual asyncio/threading,
   (c) you need state isolation between items (each gets a cloned agent with
   fresh message history), (d) you want to avoid connection exhaustion from
   creating too many agents manually. Each item gets a cloned task+agent, runs
   independently, results collected in order. Supports batch_size for
   concurrency limiting.

   - Reference: `./run-batch-tasks.md`

---

## Integration & Output

6. **MCP Tools Integration**

   Enable a Langroid agent to use MCP (Model Context Protocol) tools from an
   external MCP server like Claude Code. Use this pattern when: (a) you want
   your agent to use file editing tools (Read, Edit, Write) from Claude Code,
   (b) you need to connect to any MCP server via stdio transport, (c) you want
   to enable ALL tools from an MCP server or just SPECIFIC tools selectively,
   (d) you want to customize/post-process MCP tool results before returning to
   the LLM. Uses `@mcp_tool` decorator for specific tools or `get_tools_async()`
   for all tools.

   - Reference: `./mcp-tool-integration.md`

7. **Quiet Mode**

   Suppress verbose Langroid agent output (streaming, tool JSON, intermediate
   messages) while showing your own custom progress messages. Use this pattern
   when: (a) you want clean CLI output showing only milestone events, (b) you're
   running a multi-step workflow and want to show progress without agent noise,
   (c) you need thread-safe output control. Use `quiet_mode()` context manager
   to wrap agent task.run() calls, then print your own messages outside the
   context.

   - Reference: `./quiet-mode.md`
</file>

<file path="issues/20251010-concurrent-rag.md">
# Concurrent DocChatAgent Batch Execution

**Date:** 2025-10-10  
**Status:** Resolved  
**Priority:** Medium

## Summary
Batch DocChatAgent runs submitted via `run_batch_tasks(..., sequential=False)` were completing one-by-one because `DocChatAgent.llm_response_async` awaited the fully synchronous `answer_from_docs`, blocking the event loop. Cloned tasks therefore serialized on retrieval/LLM work even though `asyncio.gather` was used.

## Fix
- Wrap `answer_from_docs` with `asyncio.to_thread` inside `DocChatAgent.llm_response_async`, letting each request execute on the default thread pool while the event loop schedules other tasks.
- Generalize vector-store cloning: `ChatAgent.clone()` now delegates to `vecdb.clone()`, the base `VectorStore` deep-copies config and instantiates a fresh store, and `QdrantDB.clone()` simply relies on the base behaviour to spin up independent clients for cloud deployments while keeping local instances shared for file-lock safety.
- Rework `examples/docqa/rag-concurrent.py` to drive task clones with `asyncio.as_completed`, capture per-question START/WORKER/COMPLETE events (including thread IDs and timings), add a `--log-only` mode plus filtering instructions for clean concurrency proof, and expose a `--use-builtin-batch` flag to exercise the original `run_batch_tasks` harness.
- Update the debug script to pass through `query_proxies`, keeping its instrumentation compatible with the main agent, and add a DocChat `run_batch_tasks` regression test covering multiple vector stores.

## Verification
- `uv run python examples/docqa/rag-concurrent.py --num-questions=3`
- `uv run python examples/docqa/rag-concurrent.py --sequential --num-questions=3`
- `uv run python examples/docqa/rag-concurrent.py --num-questions=3 --log-only`
- `uv run python examples/docqa/rag-concurrent.py --use-builtin-batch --num-questions=3 --log-only`
- `uv run python examples/docqa/rag-concurrent-debug.py --num_questions=3`

Concurrent runs now finish ~2× faster than the sequential baseline, the log summary shows overlapping worker threads, and the new regression test (`pytest tests/main/test_vector_stores.py::test_doc_chat_batch_with_vecdb_cloning[...]`) passes across supported vector stores, confirming both concurrency and cloned-store isolation.*** End Patch

<!--AGENT -- look at this new error:-->
## Update 2025-10-11: error involving cross-encoding re-ranker


tests/test_concurrent_rag_simple.py:193:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
.venv/lib/python3.11/site-packages/langroid/agent/batch.py:354: in run_batch_task_gen
    return run_batched_tasks(
.venv/lib/python3.11/site-packages/langroid/agent/batch.py:265: in run_batched_tasks
    return asyncio.run(run_all_batched_tasks(inputs, batch_size))
../../.pyenv/versions/3.11.0/lib/python3.11/asyncio/runners.py:190: in run
    return runner.run(main)
../../.pyenv/versions/3.11.0/lib/python3.11/asyncio/runners.py:118: in run
    return self._loop.run_until_complete(task)
../../.pyenv/versions/3.11.0/lib/python3.11/asyncio/base_events.py:650: in run_until_complete
    return future.result()
.venv/lib/python3.11/site-packages/langroid/agent/batch.py:231: in run_all_batched_tasks
    results = await _process_batch_async(
.venv/lib/python3.11/site-packages/langroid/agent/batch.py:179: in _process_batch_async
    results = [handle_error(e) for _ in inputs]
.venv/lib/python3.11/site-packages/langroid/agent/batch.py:179: in <listcomp>
    results = [handle_error(e) for _ in inputs]
.venv/lib/python3.11/site-packages/langroid/agent/batch.py:102: in handle_error
    raise e
.venv/lib/python3.11/site-packages/langroid/agent/batch.py:162: in _process_batch_async
    await asyncio.gather(
.venv/lib/python3.11/site-packages/langroid/agent/batch.py:330: in _do_task
    result = await task_i.run_async(
.venv/lib/python3.11/site-packages/langroid/agent/task.py:1020: in run_async
    await self.step_async()
.venv/lib/python3.11/site-packages/langroid/agent/task.py:1352: in step_async
    result = await self.response_async(r, turns)
.venv/lib/python3.11/site-packages/langroid/agent/task.py:1711: in response_async
    result = await response_fn(self.pending_message)
.venv/lib/python3.11/site-packages/langroid/agent/special/doc_chat_agent.py:864: in llm_response_async
    response = await asyncio.to_thread(self.answer_from_docs, query_str)
../../.pyenv/versions/3.11.0/lib/python3.11/asyncio/threads.py:25: in to_thread
    return await loop.run_in_executor(None, func_call)
../../.pyenv/versions/3.11.0/lib/python3.11/asyncio/futures.py:287: in __await__
    yield self  # This tells Task to wait for completion.
../../.pyenv/versions/3.11.0/lib/python3.11/asyncio/futures.py:203: in result
    raise self._exception.with_traceback(self._exception_tb)
../../.pyenv/versions/3.11.0/lib/python3.11/concurrent/futures/thread.py:58: in run
    result = self.fn(*self.args, **self.kwargs)
.venv/lib/python3.11/site-packages/langroid/agent/special/doc_chat_agent.py:1605: in answer_from_docs
    query, extracts = self.get_relevant_extracts(query)
.venv/lib/python3.11/site-packages/langroid/agent/special/doc_chat_agent.py:1495: in get_relevant_extracts
    passages = self.get_relevant_chunks(query, proxies)  # no LLM involved
.venv/lib/python3.11/site-packages/langroid/agent/special/doc_chat_agent.py:1433: in get_relevant_chunks
    passages = self.rerank_with_cross_encoder(query, passages)
.venv/lib/python3.11/site-packages/langroid/agent/special/doc_chat_agent.py:1115: in rerank_with_cross_encoder
    scores = model.predict([(query, p.content) for p in passages])
.venv/lib/python3.11/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:336: in predict
    self.model.to(self._target_device)
.venv/lib/python3.11/site-packages/transformers/modeling_utils.py:4110: in to
    return super().to(*args, **kwargs)
.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1355: in to
    return self._apply(convert)
.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:915: in _apply
    module._apply(fn)
.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:915: in _apply
    module._apply(fn)
.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:915: in _apply
    module._apply(fn)
.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:915: in _apply
    module._apply(fn)
.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:915: in _apply
    module._apply(fn)
.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:915: in _apply
    module._apply(fn)
.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:942: in _apply
    param_applied = fn(param)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

t = Parameter containing:
tensor(..., device='meta', size=(1536, 384), requires_grad=True)

    def convert(t):
        try:
            if convert_to_format is not None and t.dim() in (4, 5):
                return t.to(
                    device,
                    dtype if t.is_floating_point() or t.is_complex() else None,
                    non_blocking,
                    memory_format=convert_to_format,
                )
            return t.to(
                device,
                dtype if t.is_floating_point() or t.is_complex() else None,
                non_blocking,
            )
        except NotImplementedError as e:
            if str(e) == "Cannot copy out of meta tensor; no data!":
>               raise NotImplementedError(
                    f"{e} Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() "
                    f"when moving module from meta to a different device."
                ) from None
E               NotImplementedError: Cannot copy out of meta tensor; no data! Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() when moving module from meta to a different device.

.venv/lib/python3.11/site-packages/torch/nn/modules/module.py:1348: NotImplementedError
</file>

<file path="langroid/agent/chat_document.py">
class ChatDocAttachment(BaseModel)
⋮----
# any additional data that should be attached to the document
model_config = ConfigDict(extra="allow")
⋮----
class StatusCode(str, Enum)
⋮----
"""Codes meant to be returned by task.run(). Some are not used yet."""
⋮----
OK = "OK"
ERROR = "ERROR"
DONE = "DONE"
STALLED = "STALLED"
INF_LOOP = "INF_LOOP"
KILL = "KILL"
FIXED_TURNS = "FIXED_TURNS"  # reached intended number of turns
MAX_TURNS = "MAX_TURNS"  # hit max-turns limit
MAX_COST = "MAX_COST"
MAX_TOKENS = "MAX_TOKENS"
TIMEOUT = "TIMEOUT"
NO_ANSWER = "NO_ANSWER"
USER_QUIT = "USER_QUIT"
⋮----
class ChatDocMetaData(DocMetaData)
⋮----
parent_id: str = ""  # msg (ChatDocument) to which this is a response
child_id: str = ""  # ChatDocument that has response to this message
agent_id: str = ""  # ChatAgent that generated this message
msg_idx: int = -1  # index of this message in the agent `message_history`
sender: Entity  # sender of the message
# tool_id corresponding to single tool result in ChatDocument.content
oai_tool_id: str | None = None
tool_ids: List[str] = []  # stack of tool_ids; used by OpenAIAssistant
block: None | Entity = None
sender_name: str = ""
recipient: str = ""
usage: Optional[LLMTokenUsage] = None
cached: bool = False
displayed: bool = False
has_citation: bool = False
status: Optional[StatusCode] = None
⋮----
@property
    def parent(self) -> Optional["ChatDocument"]
⋮----
@property
    def child(self) -> Optional["ChatDocument"]
⋮----
class ChatDocLoggerFields(BaseModel)
⋮----
sender_entity: Entity = Entity.USER
⋮----
block: Entity | None = None
tool_type: str = ""
tool: str = ""
content: str = ""
⋮----
@classmethod
    def tsv_header(cls) -> str
⋮----
field_names = cls().model_dump().keys()
⋮----
class ChatDocument(Document)
⋮----
"""
    Represents a message in a conversation among agents. All responders of an agent
    have signature ChatDocument -> ChatDocument (modulo None, str, etc),
    and so does the Task.run() method.

    Attributes:
        oai_tool_calls (Optional[List[OpenAIToolCall]]):
            Tool-calls from an OpenAI-compatible API
        oai_tool_id2results (Optional[OrderedDict[str, str]]):
            Results of tool-calls from OpenAI (dict is a map of tool_id -> result)
        oai_tool_choice: ToolChoiceTypes | Dict[str, str]: Param controlling how the
            LLM should choose tool-use in its response
            (auto, none, required, or a specific tool)
        function_call (Optional[LLMFunctionCall]):
            Function-call from an OpenAI-compatible API
                (deprecated by OpenAI, in favor of tool-calls)
        tool_messages (List[ToolMessage]): Langroid ToolMessages extracted from
            - `content` field (via JSON parsing),
            - `oai_tool_calls`, or
            - `function_call`
        metadata (ChatDocMetaData): Metadata for the message, e.g. sender, recipient.
        attachment (None | ChatDocAttachment): Any additional data attached.
    """
⋮----
reasoning: str = ""  # reasoning produced by a reasoning LLM
content_any: Any = None  # to hold arbitrary data returned by responders
# Original LLM response text including inline thought signatures
# (e.g. <thinking>...</thinking>). Only populated when reasoning was
# extracted from inline tags in the message text. Used by to_LLMMessage()
# to preserve thought signatures in message history, which is critical
# for models like Gemini 3 Flash and Amazon Nova that rely on seeing
# their own thought tags in context to maintain reasoning ability.
content_with_reasoning: Optional[str] = None
files: List[FileAttachment] = []  # list of file attachments
oai_tool_calls: Optional[List[OpenAIToolCall]] = None
oai_tool_id2result: Optional[OrderedDict[str, str]] = None
oai_tool_choice: ToolChoiceTypes | Dict[str, Dict[str, str] | str] = "auto"
function_call: Optional[LLMFunctionCall] = None
# tools that are explicitly added by agent response/handler,
# or tools recognized in the ChatDocument as handle-able tools
tool_messages: List[ToolMessage] = []
# all known tools in the msg that are in an agent's llm_tools_known list,
# even if non-used/handled
# (the list is populated by Agent.has_tool_message_attempt())
all_tool_messages: Optional[List[ToolMessage]] = None
# ID of the agent that populated all_tool_messages (for cache validity)
all_tool_messages_agent_id: Optional[str] = None
⋮----
metadata: ChatDocMetaData
attachment: None | ChatDocAttachment = None
⋮----
def __init__(self, **data: Any)
⋮----
@staticmethod
    def deepcopy(doc: ChatDocument) -> ChatDocument
⋮----
new_doc = copy.deepcopy(doc)
⋮----
@staticmethod
    def from_id(id: str) -> Optional["ChatDocument"]
⋮----
@staticmethod
    def delete_id(id: str) -> None
⋮----
"""Remove ChatDocument with given id from ObjectRegistry,
        and all its descendants.
        """
chat_doc = ChatDocument.from_id(id)
# first delete all descendants
⋮----
next_chat_doc = chat_doc.child
⋮----
chat_doc = next_chat_doc
⋮----
def __str__(self) -> str
⋮----
fields = self.log_fields()
tool_str = ""
⋮----
tool_str = f"{fields.tool_type}[{fields.tool}]: "
recipient_str = ""
⋮----
recipient_str = f"=>{fields.recipient}: "
⋮----
def get_tool_names(self) -> List[str]
⋮----
"""
        Get names of attempted tool usages (JSON or non-JSON) in the content
            of the message.
        Returns:
            List[str]: list of *attempted* tool names
            (We say "attempted" since we ONLY look at the `request` component of the
            tool-call representation, and we're not fully parsing it into the
            corresponding tool message class)

        """
tool_candidates = XMLToolMessage.find_candidates(self.content)
⋮----
tool_candidates = extract_top_level_json(self.content)
⋮----
tools = [json.loads(tc).get("request") for tc in tool_candidates]
⋮----
tool_dicts = [
tools = [td.get("request") for td in tool_dicts if td is not None]
⋮----
def log_fields(self) -> ChatDocLoggerFields
⋮----
"""
        Fields for logging in csv/tsv logger
        Returns:
            List[str]: list of fields
        """
tool_type = ""  # FUNC or TOOL
tool = ""  # tool name or function name
⋮----
# Skip tool detection for system messages - they contain tool instructions,
# not actual tool calls
⋮----
oai_tools = (
⋮----
tool_type = "FUNC"
tool = self.function_call.name
⋮----
tool_type = "OAI_TOOL"
tool = ",".join(t.function.name for t in oai_tools)  # type: ignore
⋮----
json_tools = self.get_tool_names()
⋮----
json_tools = []
⋮----
tool_type = "TOOL"
tool = json_tools[0]
recipient = self.metadata.recipient
content = self.content
sender_entity = self.metadata.sender
sender_name = self.metadata.sender_name
⋮----
def tsv_str(self) -> str
⋮----
field_values = fields.model_dump().values()
⋮----
def pop_tool_ids(self) -> None
⋮----
"""
        Pop the last tool_id from the stack of tool_ids.
        """
⋮----
@staticmethod
    def _clean_fn_call(fc: LLMFunctionCall | None) -> None
⋮----
# Sometimes an OpenAI LLM (esp gpt-4o) may generate a function-call
# with oddities:
# (a) the `name` is set, as well as `arguments.request` is set,
#  and in langroid we use the `request` value as the `name`.
#  In this case we override the `name` with the `request` value.
# (b) the `name` looks like "functions blah" or just "functions"
#   In this case we strip the "functions" part.
⋮----
request = fc.arguments.get("request")
⋮----
"""
        Convert LLMResponse to ChatDocument.
        Args:
            response (LLMResponse): LLMResponse to convert.
            displayed (bool): Whether this response was displayed to the user.
            recognize_recipient_in_content (bool): Whether to parse message text
                for recipient routing (``TO[<recipient>]:`` and JSON
                ``{"recipient": ...}``). Default True.
        Returns:
            ChatDocument: ChatDocument representation of this LLMResponse.
        """
⋮----
message = message.strip()
⋮----
message = ""
⋮----
# there must be at least one if it's not None
⋮----
@staticmethod
    def from_str(msg: str) -> "ChatDocument"
⋮----
# first check whether msg is structured as TO <recipient>: <message>
⋮----
# check if any top level json specifies a 'recipient'
recipient = top_level_json_field(msg, "recipient")
message = msg  # retain the whole msg in this case
⋮----
"""
        Convert LLMMessage to ChatDocument.

        Args:
            message (LLMMessage): LLMMessage to convert.
            sender_name (str): Name of the sender. Defaults to "".
            recipient (str): Name of the recipient. Defaults to "".

        Returns:
            ChatDocument: ChatDocument representation of this LLMMessage.
        """
# Map LLMMessage Role to ChatDocument Entity
role_to_entity = {
⋮----
sender_entity = role_to_entity.get(message.role, Entity.USER)
⋮----
"""
        Convert to list of LLMMessage, to incorporate into msg-history sent to LLM API.
        Usually there will be just a single LLMMessage, but when the ChatDocument
        contains results from multiple OpenAI tool-calls, we would have a sequence
        LLMMessages, one per tool-call result.

        Args:
            message (str|ChatDocument): Message to convert.
            oai_tools (Optional[List[OpenAIToolCall]]): Tool-calls currently awaiting
                response, from the ChatAgent's latest message.
        Returns:
            List[LLMMessage]: list of LLMMessages corresponding to this ChatDocument.
        """
⋮----
sender_role = Role.USER
⋮----
message = ChatDocument.from_str(message)
# Prefer content_with_reasoning when available — this preserves
# inline thought signatures (e.g. <thinking>...</thinking>) in
# message history, which certain models (Gemini 3 Flash, Amazon
# Nova) need to maintain reasoning across turns.
# content_with_reasoning is only set when inline tags were
# actually extracted, so this won't interfere with models that
# provide reasoning via a separate API field.
content = (
fun_call = message.function_call
oai_tool_calls = message.oai_tool_calls
⋮----
# This may happen when a (parent agent's) LLM generates a
# a Function-call, and it ends up being sent to the current task's
# LLM (possibly because the function-call is mis-named or has other
# issues and couldn't be handled by handler methods).
# But a function-call can only be generated by an entity with
# Role.ASSISTANT, so we instead put the content of the function-call
# in the content of the message.
⋮----
fun_call = None
⋮----
# same reasoning as for function-call above
⋮----
oai_tool_calls = None
# some LLM APIs (e.g. gemini) don't like empty msg
content = content or " "
sender_name = message.metadata.sender_name
tool_ids = message.metadata.tool_ids
tool_id = tool_ids[-1] if len(tool_ids) > 0 else ""
chat_document_id = message.id()
⋮----
sender_role = Role.SYSTEM
⋮----
# This is a response to a function call, so set the role to FUNCTION.
sender_role = Role.FUNCTION
sender_name = message.metadata.parent.function_call.name
⋮----
pending_tool_ids = [tc.id for tc in oai_tools]
# The ChatAgent has pending OpenAI tool-call(s),
# so the current ChatDocument contains
# results for some/all/none of them.
⋮----
# Case 1:
# There was exactly 1 pending tool-call, and in this case
# the result would be a plain string in `content`
⋮----
# Case 2:
# ChatDocument.content has result of a single tool-call
⋮----
# There were > 1 tool-calls awaiting response,
⋮----
sender_role = Role.ASSISTANT
⋮----
tool_id=tool_id,  # for OpenAI Assistant
</file>

<file path="langroid/parsing/parse_json.py">
def is_valid_json(json_str: str) -> bool
⋮----
"""Check if the input string is a valid JSON.

    Args:
        json_str (str): The input string to check.

    Returns:
        bool: True if the input string is a valid JSON, False otherwise.
    """
⋮----
def flatten(nested_list) -> Iterator[str]:  # type: ignore
⋮----
"""Flatten a nested list into a single list of strings"""
⋮----
def get_json_candidates(s: str) -> List[str]
⋮----
"""Get top-level JSON candidates, i.e. strings between curly braces."""
# Define the grammar for matching curly braces
curly_braces = original_text_for(nested_expr("{", "}"))
⋮----
# Parse the string
⋮----
results = curly_braces.search_string(s)
# Properly convert nested lists to strings
⋮----
def parse_imperfect_json(json_string: str) -> Union[Dict[str, Any], List[Any]]
⋮----
# First, try parsing with ast.literal_eval
⋮----
result = ast.literal_eval(json_string)
⋮----
# If ast.literal_eval fails or returns non-dict/list, try repair_json
json_repaired_obj = repair_json(json_string, return_objects=True)
⋮----
# fallback on yaml
yaml_result = yaml.safe_load(json_string)
⋮----
# If all methods fail, raise ValueError
⋮----
def try_repair_json_yaml(s: str) -> str | None
⋮----
"""
    Attempt to load as json, and if it fails, try repairing the JSON.
    If that fails, replace any \n with space as a last resort.
    NOTE - replacing \n with space will result in format loss,
    which may matter in generated code (e.g. python, toml, etc)
    """
s_repaired_obj = repair_json(s, return_objects=True)
⋮----
s_repaired_obj = s_repaired_obj[0]
⋮----
s_repaired_obj = None
⋮----
return json.dumps(s_repaired_obj)  # type: ignore
⋮----
yaml_result = yaml.safe_load(s)
⋮----
# If it still fails, replace any \n with space as a last resort
s = s.replace("\n", " ")
⋮----
return None  # all failed
⋮----
def extract_top_level_json(s: str) -> List[str]
⋮----
"""Extract all top-level JSON-formatted substrings from a given string.

    Args:
        s (str): The input string to search for JSON substrings.

    Returns:
        List[str]: A list of top-level JSON-formatted substrings.
    """
# Find JSON object and array candidates
json_candidates = get_json_candidates(s)
maybe_repaired_jsons = map(try_repair_json_yaml, json_candidates)
⋮----
def top_level_json_field(s: str, f: str) -> Any
⋮----
"""
    Extract the value of a field f from a top-level JSON object.
    If there are multiple, just return the first.

    Args:
        s (str): The input string to search for JSON substrings.
        f (str): The field to extract from the JSON object.

    Returns:
        str: The value of the field f in the top-level JSON object, if any.
            Otherwise, return an empty string.

    Note:
        This function is designed to never crash. If any exception occurs during
        JSON parsing or field extraction, it gracefully returns an empty string.
    """
⋮----
jsons = extract_top_level_json(s)
⋮----
json_data = json.loads(j)
⋮----
# Some responses wrap candidate JSON objects in a list; scan them.
⋮----
# If this specific JSON fails to parse, continue to next candidate
⋮----
# Catch any unexpected errors to ensure we never crash
⋮----
def datetime_to_json(obj: Any) -> Any
⋮----
# Let json.dumps() handle the raising of TypeError for non-serializable objects
</file>

<file path="langroid/agent/special/doc_chat_agent.py">
# # langroid/agent/special/doc_chat_agent.py
"""
Agent that supports asking queries about a set of documents, using
retrieval-augmented generation (RAG).

Functionality includes:
- summarizing a document, with a custom instruction; see `summarize_docs`
- asking a question about a document; see `answer_from_docs`

Note: to use the sentence-transformer embeddings, you must install
langroid with the [hf-embeddings] extra, e.g.:

pip install "langroid[hf-embeddings]"

"""
⋮----
@cache
def apply_nest_asyncio() -> None
⋮----
logger = logging.getLogger(__name__)
⋮----
@dataclass
class _CrossEncoderCacheEntry
⋮----
model: "CrossEncoder"
lock: threading.RLock
⋮----
_CROSS_ENCODER_CACHE: Dict[str, _CrossEncoderCacheEntry] = {}
_CROSS_ENCODER_CACHE_LOCK = threading.Lock()
⋮----
def _auto_cross_encoder_device() -> str
⋮----
mps = getattr(torch.backends, "mps", None)
⋮----
actual_device = device or _auto_cross_encoder_device()
cache_key = f"{model_name}::{actual_device}"
entry = _CROSS_ENCODER_CACHE.get(cache_key)
⋮----
model = CrossEncoder(model_name, device=actual_device)
entry = _CrossEncoderCacheEntry(model=model, lock=threading.RLock())
⋮----
DEFAULT_DOC_CHAT_SYSTEM_MESSAGE = """
⋮----
CHUNK_ENRICHMENT_DELIMITER = "\n<##-##-##>\n"
⋮----
# Check if  module exists in sys.path
spec = importlib.util.find_spec("sentence_transformers")
has_sentence_transformers = spec is not None
⋮----
has_sentence_transformers = False
⋮----
hf_embed_config = SentenceTransformerEmbeddingsConfig(
⋮----
oai_embed_config = OpenAIEmbeddingsConfig(
⋮----
class ChunkEnrichmentAgentConfig(ChatAgentConfig)
⋮----
batch_size: int = 50
delimiter: str = CHUNK_ENRICHMENT_DELIMITER
enrichment_prompt_fn: Callable[[str], str] = lambda x: x
⋮----
class DocChatAgentConfig(ChatAgentConfig)
⋮----
system_message: str = DEFAULT_DOC_CHAT_SYSTEM_MESSAGE
summarize_prompt: str = SUMMARY_ANSWER_PROMPT_GPT4
# extra fields to include in content as key=value pairs
# (helps retrieval for table-like data)
add_fields_to_content: List[str] = []
filter_fields: List[str] = []  # fields usable in filter
retrieve_only: bool = False  # only retr relevant extracts, don't gen summary answer
extraction_granularity: int = 1  # granularity (in sentences) for relev extraction
filter: str | None = (
⋮----
None  # filter condition for various lexical/semantic search fns
⋮----
conversation_mode: bool = True  # accumulate message history?
# retain retrieved context? Setting to True increases token consumption, but
# helps LLM fix citation errors and improve accuracy of follow-up questions.
retain_context: bool = False
# In assistant mode, DocChatAgent receives questions from another Agent,
# and those will already be in stand-alone form, so in this mode
# there is no need to convert them to stand-alone form.
assistant_mode: bool = False
# Use LLM to generate hypothetical answer A to the query Q,
# and use the embed(A) to find similar chunks in vecdb.
# Referred to as HyDE in the paper:
# https://arxiv.org/pdf/2212.10496.pdf
# It is False by default; its benefits depends on the context.
hypothetical_answer: bool = False
# Optional config for chunk enrichment agent, e.g. to enrich
# chunks with hypothetical questions, or keywords to increase
# the "semantic surface area" of the chunks, which may help
# improve retrieval.
chunk_enrichment_config: Optional[ChunkEnrichmentAgentConfig] = None
⋮----
n_relevant_chunks: int = 3  # how many relevant chunks to retrieve finally
n_similar_chunks: int = 3  # how many similar chunks to retrieve, by each method
n_query_rephrases: int = 0
n_neighbor_chunks: int = 0  # how many neighbors on either side of match to retrieve
n_fuzzy_neighbor_words: int = 100  # num neighbor words to retrieve for fuzzy match
use_fuzzy_match: bool = True
use_bm25_search: bool = True
use_reciprocal_rank_fusion: bool = False
cross_encoder_reranking_model: str = (  # ignored if use_reciprocal_rank_fusion=True
cross_encoder_device: Optional[str] = None  # default to CPU when None
rerank_diversity: bool = True  # rerank to maximize diversity?
rerank_periphery: bool = True  # rerank to avoid Lost In the Middle effect?
rerank_after_adding_context: bool = True  # rerank after adding context window?
# RRF (Reciprocal Rank Fusion) score = 1/(rank + reciprocal_rank_fusion_constant)
# see https://learn.microsoft.com/en-us/azure/search/hybrid-search-ranking#how-rrf-ranking-works
reciprocal_rank_fusion_constant: float = 60.0
cache: bool = True  # cache results
debug: bool = False
stream: bool = True  # allow streaming where needed
split: bool = True  # use chunking
relevance_extractor_config: None | RelevanceExtractorAgentConfig = (
⋮----
llm=None  # use the parent's llm unless explicitly set here
⋮----
doc_paths: List[str | bytes] = []
default_paths: List[str] = [
parsing: ParsingConfig = ParsingConfig(  # modify as needed
⋮----
chunk_size=1000,  # aim for this many tokens per chunk
overlap=100,  # overlap between chunks
⋮----
# aim to have at least this many chars per chunk when
# truncating due to punctuation
⋮----
discard_chunk_chars=5,  # discard chunks with fewer than this many chars
# set deprecated n_similar_docs to None; use n_similar_chunks above instead
⋮----
n_neighbor_ids=0,  # num chunk IDs to store on either side of each chunk
⋮----
# NOTE: PDF parsing is extremely challenging, and each library
# has its own strengths and weaknesses.
# Try one that works for your use case.
# or "unstructured", "fitz", "pymupdf4llm", "pypdf"
⋮----
crawler_config: Optional[BaseCrawlerConfig] = TrafilaturaConfig()
⋮----
# Allow vecdb to be None in case we want to explicitly set it later
vecdb: Optional[VectorStoreConfig] = QdrantDBConfig(
⋮----
llm: LLMConfig = OpenAIGPTConfig(
prompts: PromptsConfig = PromptsConfig(
⋮----
def _append_metadata_source(orig_source: str, source: str) -> str
⋮----
class DocChatAgent(ChatAgent)
⋮----
"""
    Agent for chatting with a collection of documents.
    """
⋮----
# Handle backward compatibility for deprecated n_similar_docs
⋮----
# Use the deprecated value for both parameters
⋮----
def _clone_extra_state(self, new_agent: "ChatAgent") -> None
⋮----
def clear(self) -> None
⋮----
"""Clear the document collection and the specific collection in vecdb"""
⋮----
collection_name = self.vecdb.config.collection_name
⋮----
# Note we may have used a vecdb with a config.collection_name
# different from the agent's config.vecdb.collection_name!!
⋮----
# Close the old vecdb before creating a new one
old_vecdb = self.vecdb
⋮----
def ingest(self) -> None
⋮----
"""
        Chunk + embed + store docs specified by self.config.doc_paths
        """
⋮----
# we must be using a previously defined collection
# But let's get all the chunked docs so we can
# do keyword and other non-vector searches
⋮----
self.ingest_doc_paths(self.config.doc_paths)  # type: ignore
⋮----
"""Split, ingest docs from specified paths,
        do not add these to config.doc_paths.

        Args:
            paths: document paths, urls or byte-content of docs.
                The bytes option is intended to support cases where a document
                has already been read in as bytes (e.g. from an API or a database),
                and we want to avoid having to write it to a temporary file
                just to read it back in.
            metadata: List of metadata dicts, one for each path.
                If a single dict is passed in, it is used for all paths.
            doc_type: DocumentType to use for parsing, if known.
                MUST apply to all docs if specified.
                This is especially useful when the `paths` are of bytes type,
                to help with document type detection.
        Returns:
            List of Document objects
        """
⋮----
paths = [paths]
all_paths = paths
paths_meta: Dict[int, Any] = {}
urls_meta: Dict[int, Any] = {}
idxs = range(len(all_paths))
⋮----
urls = [all_paths[i] for i in url_idxs]
paths = [all_paths[i] for i in path_idxs]
bytes_list = [all_paths[i] for i in bytes_idxs]
⋮----
idx2meta = {
⋮----
)  # appease mypy
⋮----
idx2meta = {p: metadata for p in idxs}
⋮----
idx2meta = {p: metadata.model_dump() for p in idxs}
urls_meta = {u: idx2meta[u] for u in url_idxs}
paths_meta = {p: idx2meta[p] for p in path_idxs}
docs: List[Document] = []
parser: Parser = Parser(self.config.parsing)
⋮----
meta = urls_meta.get(ui, {})
loader = URLLoader(
⋮----
)  # type: ignore
url_docs = loader.load()
# update metadata of each doc with meta
⋮----
orig_source = d.metadata.source
⋮----
if len(paths) > 0:  # paths OR bytes are handled similarly
⋮----
meta = paths_meta.get(pi, {})
p = all_paths[pi]
path_docs = RepoLoader.get_documents(
⋮----
n_docs = len(docs)
n_splits = self.ingest_docs(docs, split=self.config.split)
⋮----
n_urls = len(urls)
n_paths = len(paths)
⋮----
path_reps = [p if isinstance(p, str) else "bytes" for p in paths]
print("\n".join([u for u in urls if isinstance(u, str)]))  # appease mypy
⋮----
"""
        Chunk docs into pieces, map each chunk to vec-embedding, store in vec-db

        Args:
            docs: List of Document objects
            split: Whether to split docs into chunks. Default is True.
                If False, docs are treated as "chunks" and are not split.
            metadata: List of metadata dicts, one for each doc, to augment
                whatever metadata is already in the doc.
                [ASSUME no conflicting keys between the two metadata dicts.]
                If a single dict is passed in, it is used for all docs.
        """
⋮----
m_dict = m if isinstance(m, dict) else m.model_dump()  # type: ignore
d.metadata = d.metadata.model_copy(update=m_dict)  # type: ignore
⋮----
docs = self.parser.split(docs)
⋮----
# we're not splitting, so we mark each doc as a chunk
⋮----
docs = self.enrich_chunks(docs)
⋮----
# If any additional fields need to be added to content,
# add them as key=value pairs for all docs, before batching.
# This helps retrieval for table-like data.
# Note we need to do this at stage so that the embeddings
# are computed on the full content with these additional fields.
⋮----
fields = [
⋮----
key_vals = extract_fields(d, fields)
⋮----
docs = docs[: self.config.parsing.max_chunks]
# vecdb should take care of adding docs in batches;
# batching can be controlled via vecdb.config.batch_size
⋮----
return 0  # Return 0 since no documents were added
⋮----
def retrieval_tool(self, msg: RetrievalTool) -> str
⋮----
"""Handle the RetrievalTool message"""
⋮----
content_doc = self.answer_from_docs(msg.query)
⋮----
"""
        Convert dataframe so it is compatible with Document class:
        - has "content" column
        - has an "id" column to be used as Document.metadata.id

        Args:
            df: dataframe to convert
            content: name of content column
            metadata: list of metadata column names

        Returns:
            Tuple[pd.DataFrame, List[str]]: dataframe, metadata
                - dataframe: dataframe with "content" column and "id" column
                - metadata: list of metadata column names, including "id"
        """
⋮----
# rename content column to "content", leave existing column intact
df = df.rename(columns={content: "content"}, inplace=False)
⋮----
actual_metadata = metadata.copy()
⋮----
docs = dataframe_to_documents(df, content="content", metadata=metadata)
ids = [str(d.id()) for d in docs]
⋮----
"""
        Ingest a dataframe into vecdb.
        """
⋮----
# When ingesting a dataframe we will no longer do any chunking,
# so we mark each doc as a chunk.
# TODO - revisit this since we may still want to chunk large text columns
⋮----
def set_filter(self, filter: str) -> None
⋮----
"""
        Setup `self.chunked_docs` and `self.chunked_docs_clean`
        based on possible filter.
        These will be used in various non-vector-based search functions,
        e.g. self.get_similar_chunks_bm25(), self.get_fuzzy_matches(), etc.

        Args:
            docs: List of Document objects. This is empty when we are calling this
                method after initial doc ingestion.
            filter: Filter condition for various lexical/semantic search fns.
        """
⋮----
# no filter, so just use the docs passed in
⋮----
def get_field_values(self, fields: list[str]) -> Dict[str, str]
⋮----
"""Get string-listing of possible values of each field,
        e.g.
        {
            "genre": "crime, drama, mystery, ... (10 more)",
            "certificate": "R, PG-13, PG, R",
        }
        The field names may have "metadata." prefix, e.g. "metadata.genre".
        """
field_values: Dict[str, Set[str]] = {}
# make empty set for each field
⋮----
# get all documents and accumulate possible values of each field until 10
docs = self.vecdb.get_all_documents()  # only works for vecdbs that support this
⋮----
# extract fields from d
doc_field_vals = extract_fields(d, fields)
# the `field` returned by extract_fields may contain only the last
# part of the field name, e.g. "genre" instead of "metadata.genre",
# so we use the orig_field name to fill in the values
⋮----
# For each field make a string showing list of possible values,
# truncate to 20 values, and if there are more, indicate how many
# more there are, e.g. Genre: crime, drama, mystery, ... (20 more)
field_values_list = {}
⋮----
vals = list(field_values[f])
n = len(vals)
remaining = n - 20
vals = vals[:20]
⋮----
# make a string of the values, ensure they are strings
⋮----
def doc_length(self, docs: List[Document]) -> int
⋮----
"""
        Calc token-length of a list of docs
        Args:
            docs: list of Document objects
        Returns:
            int: number of tokens
        """
⋮----
def user_docs_ingest_dialog(self) -> None
⋮----
"""
        Ask user to select doc-collection, enter filenames/urls, and ingest into vecdb.
        """
⋮----
n_deletes = self.vecdb.clear_empty_collections()
collections = self.vecdb.list_collections()
collection_name = "NEW"
is_new_collection = False
replace_collection = False
⋮----
n = len(collections)
delete_str = (
⋮----
choice = Prompt.ask(
⋮----
confirm = Prompt.ask(
⋮----
collection_name = collections[int(choice) - 1]
⋮----
replace_collection = choice == "y"
⋮----
is_new_collection = True
collection_name = Prompt.ask(
⋮----
default_urls_str = (
⋮----
inputs = get_list_from_user()
⋮----
inputs = self.config.default_paths
self.config.doc_paths = inputs  # type: ignore
⋮----
query_str: str | None
⋮----
query_str = message.content
⋮----
query_str = message
⋮----
# direct query to LLM
query_str = query_str[1:] if query_str is not None else None
⋮----
response = super().llm_response(query_str)
⋮----
response = self.answer_from_docs(query_str)
# Citation details (if any) are NOT generated by LLM
# (We extract these from LLM's numerical citations),
# so render them here
⋮----
response = await super().llm_response_async(query_str)
⋮----
# Offload blocking retrieval/LLM work to default thread pool so
# asyncio batch runners can make progress concurrently.
response = await asyncio.to_thread(self.answer_from_docs, query_str)
⋮----
@staticmethod
    def doc_string(docs: List[Document]) -> str
⋮----
"""
        Generate a string representation of a list of docs.
        Args:
            docs: list of Document objects
        Returns:
            str: string representation
        """
contents = [d.content for d in docs]
sources = [d.metadata.source for d in docs]
sources = [f"SOURCE: {s}" if s is not None else "" for s in sources]
⋮----
"""
        Given a question and a list of (possibly) doc snippets,
        generate an answer if possible
        Args:
            question: question to answer
            passages: list of `Document` objects each containing a possibly relevant
                snippet, and metadata
        Returns:
            a `Document` object containing the answer,
            and metadata containing source citations

        """
⋮----
passages_str = self.doc_string(passages)
# Substitute Q and P into the templatized prompt
⋮----
final_prompt = self.config.summarize_prompt.format(
⋮----
# Generate the final verbatim extract based on the final prompt.
# Note this will send entire message history, plus this final_prompt
# to the LLM, and self.message_history will be updated to include
# 2 new LLMMessage objects:
# one for `final_prompt`, and one for the LLM response
⋮----
answer_doc = super().llm_response(final_prompt)
⋮----
# respond with temporary context
answer_doc = super()._llm_response_temp_context(question, final_prompt)
⋮----
answer_doc = super().llm_response_forget(final_prompt)
⋮----
final_answer = answer_doc.content.strip()
⋮----
# extract references like [^2], [^3], etc. from the final answer
citations = extract_markdown_references(final_answer)
# format the cited references as a string suitable for markdown footnote
⋮----
content=final_answer,  # does not contain citations
⋮----
source=citations_str,  # only the reference headers
source_content=full_citations_str,  # reference + content
⋮----
def llm_hypothetical_answer(self, query: str) -> str
⋮----
# TODO: provide an easy way to
# Adjust this prompt depending on context.
answer = self.llm_response_forget(
⋮----
def enrich_chunks(self, docs: List[Document]) -> List[Document]
⋮----
"""
        Enrich chunks using Agent configured with self.config.chunk_enrichment_config.

        We assume that the system message of the agent is set in such a way
        that when we run
        ```
        prompt = self.config.chunk_enrichment_config.enrichment_prompt_fn(text)
        result = await agent.llm_response_forget_async(prompt)
        ```

        then `result.content` will contain the augmentation to the text.

        Args:
            docs: List of document chunks to enrich

        Returns:
            List[Document]: Documents (chunks) enriched with additional text,
                separated by a delimiter.
        """
⋮----
enrichment_config = self.config.chunk_enrichment_config
agent = ChatAgent(enrichment_config)
⋮----
# Process chunks in parallel using run_batch_agent_method
questions_batch = run_batch_agent_method(
⋮----
# Combine original content with generated questions
augmented_docs = []
⋮----
# Combine original content with questions in a structured way
combined_content = (
⋮----
new_doc = doc.model_copy(
⋮----
def llm_rephrase_query(self, query: str) -> List[str]
⋮----
rephrases = self.llm_response_forget(
⋮----
# find similar docs using bm25 similarity:
# these may sometimes be more likely to contain a relevant verbatim extract
⋮----
docs_scores = find_closest_matches_with_bm25(
⋮----
self.chunked_docs_clean,  # already pre-processed!
⋮----
# find similar docs using fuzzy matching:
⋮----
fuzzy_match_docs = find_fuzzy_matches_in_docs(
⋮----
device = self.config.cross_encoder_device
entry = _get_cross_encoder_entry(
pair_inputs = [(query, p.content) for p in passages]
⋮----
scores = entry.model.predict(pair_inputs, show_progress_bar=False)
# Convert to [0,1] so we might could use a cutoff later.
scores = 1.0 / (1 + np.exp(-np.array(scores)))
# get top k scoring passages
sorted_pairs = sorted(
passages = [d for _, d in sorted_pairs]
⋮----
def rerank_with_diversity(self, passages: List[Document]) -> List[Document]
⋮----
"""
        Rerank a list of items in such a way that each successive item is least similar
        (on average) to the earlier items.

        Args:
        query (str): The query for which the passages are relevant.
        passages (List[Document]): A list of Documents to be reranked.

        Returns:
        List[Documents]: A reranked list of Documents.
        """
⋮----
emb_model = self.vecdb.embedding_model
emb_fn = emb_model.embedding_fn()
embs = emb_fn([p.content for p in passages])
embs_arr = [np.array(e) for e in embs]
indices = list(range(len(passages)))
⋮----
# Helper function to compute average similarity to
# items in the current result list.
def avg_similarity_to_result(i: int, result: List[int]) -> float
⋮----
return sum(  # type: ignore
⋮----
# copy passages to items
result = [indices.pop(0)]  # Start with the first item.
⋮----
# Find the item that has the least average similarity
# to items in the result list.
least_similar_item = min(
⋮----
# return passages in order of result list
⋮----
def rerank_to_periphery(self, passages: List[Document]) -> List[Document]
⋮----
"""
        Rerank to avoid Lost In the Middle (LIM) problem,
        where LLMs pay more attention to items at the ends of a list,
        rather than the middle. So we re-rank to make the best passages
        appear at the periphery of the list.
        https://arxiv.org/abs/2307.03172

        Example reranking:
        1 2 3 4 5 6 7 8 9 ==> 1 3 5 7 9 8 6 4 2

        Args:
            passages (List[Document]): A list of Documents to be reranked.

        Returns:
            List[Documents]: A reranked list of Documents.

        """
# Splitting items into odds and evens based on index, not value
odds = passages[::2]
evens = passages[1::2][::-1]
⋮----
# Merging them back together
⋮----
"""
        In each doc's metadata, there may be a window_ids field indicating
        the ids of the chunks around the current chunk. We use these stored
        window_ids to retrieve the desired number
        (self.config.n_neighbor_chunks) of neighbors
        on either side of the current chunk.

        Args:
            docs_scores (List[Tuple[Document, float]]): List of pairs of documents
                to add context windows to together with their match scores.

        Returns:
            List[Tuple[Document, float]]: List of (Document, score) tuples.
        """
⋮----
# Do not add context window when there are other fields besides just
# content and metadata, since we do not know how to set those other fields
# for newly created docs with combined content.
⋮----
"""
        Get semantic search results from vecdb.
        Args:
            query (str): query to search for
            k (int): number of results to return
        Returns:
            List[Tuple[Document, float]]: List of (Document, score) tuples.
        """
⋮----
# Note: for dynamic filtering based on a query, users can
# use the `temp_update` context-manager to pass in a `filter` to self.config,
# e.g.:
# with temp_update(self.config, {"filter": "metadata.source=='source1'"}):
#     docs_scores = self.get_semantic_search_results(query, k=k)
# This avoids having pass the `filter` argument to every function call
# upstream of this one.
# The `temp_update` context manager is defined in
# `langroid/utils/pydantic_utils.py`
⋮----
"""
        The retrieval stage in RAG: get doc-chunks that are most "relevant"
        to the query (and possibly any proxy queries), from the document-store,
        which currently is the vector store,
        but in theory could be any document store, or even web-search.
        This stage does NOT involve an LLM, and the retrieved chunks
        could either be pre-chunked text (from the initial pre-processing stage
        where chunks were stored in the vector store), or they could be
        dynamically retrieved based on a window around a lexical match.

        These are the steps (some optional based on config):
        - semantic search based on vector-embedding distance, from vecdb
        - lexical search using bm25-ranking (keyword similarity)
        - fuzzy matching (keyword similarity)
        - re-ranking of doc-chunks by relevance to query, using cross-encoder,
           and pick top k

        Args:
            query: original query (assumed to be in stand-alone form)
            query_proxies: possible rephrases, or hypothetical answer to query
                    (e.g. for HyDE-type retrieval)

        Returns:

        """
⋮----
# if we are using cross-encoder reranking or reciprocal rank fusion (RRF),
# we can retrieve more docs during retrieval, and leave it to the cross-encoder
# or RRF reranking to whittle down to self.config.n_similar_chunks
retrieval_multiple = (
⋮----
docs_and_scores: List[Tuple[Document, float]] = []
⋮----
# sort by score descending
docs_and_scores = sorted(
⋮----
# keep only docs with unique d.id()
id2_rank_semantic = {d.id(): i for i, (d, _) in enumerate(docs_and_scores)}
id2doc = {d.id(): d for d, _ in docs_and_scores}
# make sure we get unique docs
passages = [id2doc[id] for id in id2_rank_semantic.keys()]
⋮----
id2_rank_bm25 = {}
⋮----
# TODO: Add score threshold in config
docs_scores = self.get_similar_chunks_bm25(query, retrieval_multiple)
⋮----
# if we're not re-ranking with a cross-encoder, and have RRF enabled,
# instead of accumulating the bm25 results into passages,
# we collect these ranks for Reciprocal Rank Fusion down below.
docs_scores = sorted(docs_scores, key=lambda x: x[1], reverse=True)
id2_rank_bm25 = {d.id(): i for i, (d, _) in enumerate(docs_scores)}
⋮----
# eliminate duplicate ids
passages = [id2doc[id] for id in id2doc.keys()]
⋮----
id2_rank_fuzzy = {}
⋮----
fuzzy_match_doc_scores = self.get_fuzzy_matches(query, retrieval_multiple)
⋮----
# if we're not re-ranking with a cross-encoder,
# instead of accumulating the fuzzy match results into passages,
⋮----
fuzzy_match_doc_scores = sorted(
id2_rank_fuzzy = {
⋮----
# Since we're not using cross-enocder re-ranking,
# we need to re-order the retrieved chunks from potentially three
# different retrieval methods (semantic, bm25, fuzzy), where the
# similarity scores are on different scales.
# We order the retrieved chunks using Reciprocal Rank Fusion (RRF) score.
# Combine the ranks from each id2doc_rank_* dict into a single dict,
# where the reciprocal rank score is the sum of
# 1/(rank + self.config.reciprocal_rank_fusion_constant).
# See https://learn.microsoft.com/en-us/azure/search/hybrid-search-ranking
#
# Note: diversity/periphery-reranking below may modify the final ranking.
id2_reciprocal_score = {}
⋮----
# Use max_rank instead of infinity to avoid bias against
# single-method docs
max_rank = self.config.n_similar_chunks * retrieval_multiple
rank_semantic = id2_rank_semantic.get(id_, max_rank + 1)
rank_bm25 = id2_rank_bm25.get(id_, max_rank + 1)
rank_fuzzy = id2_rank_fuzzy.get(id_, max_rank + 1)
c = self.config.reciprocal_rank_fusion_constant
reciprocal_fusion_score = (
⋮----
# sort the docs by the reciprocal score, in descending order
id2_reciprocal_score = OrderedDict(
# each method retrieved up to retrieval_multiple * n_similar_chunks,
# so we need to take the top n_similar_chunks from the combined list
passages = [
# passages must have distinct ids
⋮----
passages_scores = [(p, 0.0) for p in passages]
passages_scores = self.add_context_window(passages_scores)
passages = [p for p, _ in passages_scores]
# now passages can potentially have a lot of doc chunks,
# so we re-rank them using a cross-encoder scoring model
# (provided that `reciprocal_rank_fusion` is not enabled),
# and pick top k where k = config..n_similar_chunks
# https://www.sbert.net/examples/applications/retrieve_rerank
⋮----
passages = self.rerank_with_cross_encoder(query, passages)
⋮----
# reorder to increase diversity among top docs
passages = self.rerank_with_diversity(passages)
⋮----
# reorder so most important docs are at periphery
# (see Lost In the Middle issue).
passages = self.rerank_to_periphery(passages)
⋮----
@no_type_check
    def get_relevant_extracts(self, query: str) -> Tuple[str, List[Document]]
⋮----
"""
        Get list of (verbatim) extracts from doc-chunks relevant to answering a query.

        These are the stages (some optional based on config):
        - use LLM to convert query to stand-alone query
        - optionally use LLM to rephrase query to use below
        - optionally use LLM to generate hypothetical answer (HyDE) to use below.
        - get_relevant_chunks(): get doc-chunks relevant to query and proxies
        - use LLM to get relevant extracts from doc-chunks

        Args:
            query (str): query to search for

        Returns:
            query (str): stand-alone version of input query
            List[Document]: list of relevant extracts

        """
collection_name = (
has_vecdb_collection = (
⋮----
# Regardless of whether we are in conversation mode or not,
# for relevant doc/chunk extraction, we must convert the query
# to a standalone query to get more relevant results.
⋮----
query = self.llm.followup_to_standalone(self.dialog, query)
⋮----
proxies = []
⋮----
answer = self.llm_hypothetical_answer(query)
proxies = [answer]
⋮----
rephrases = self.llm_rephrase_query(query)
⋮----
passages = self.get_relevant_chunks(query, proxies)  # no LLM involved
⋮----
passages = self.chunked_docs
⋮----
extracts = passages
⋮----
# these are async calls, one per passage; turn off streaming
extracts = self.get_verbatim_extracts(query, passages)
extracts = [e for e in extracts if e.content != NO_ANSWER]
⋮----
def remove_chunk_enrichments(self, passages: List[Document]) -> List[Document]
⋮----
"""Remove any enrichments (like hypothetical questions, or keywords)
        from documents.
        Only cleans if enrichment was enabled in config.

        Args:
            passages: List of documents to clean

        Returns:
            List of documents with only original content
        """
⋮----
delimiter = self.config.chunk_enrichment_config.delimiter
⋮----
"""
        Run RelevanceExtractorAgent in async/concurrent mode on passages,
        to extract portions relevant to answering query, from each passage.
        Args:
            query (str): query to answer
            passages (List[Documents]): list of passages to extract from

        Returns:
            List[Document]: list of Documents containing extracts and metadata.
        """
passages = self.remove_chunk_enrichments(passages)
⋮----
agent_cfg = self.config.relevance_extractor_config
⋮----
# no relevance extraction: simply return passages
⋮----
# Use main DocChatAgent's LLM if not provided explicitly:
# this reduces setup burden on the user
⋮----
agent_cfg.llm.stream = False  # disable streaming for concurrent calls
⋮----
agent = RelevanceExtractorAgent(agent_cfg)
task = Task(
⋮----
extracts: list[str] = run_batch_tasks(
⋮----
)  # type: ignore
⋮----
# Caution: Retain ALL other fields in the Documents (which could be
# other than just `content` and `metadata`), while simply replacing
# `content` with the extracted portions
passage_extracts = []
⋮----
p_copy = p.model_copy()
⋮----
def answer_from_docs(self, query: str) -> ChatDocument
⋮----
"""
        Answer query based on relevant docs from the VecDB

        Args:
            query (str): query to answer

        Returns:
            Document: answer
        """
response = ChatDocument(
# query may be updated to a stand-alone version
⋮----
# only return extracts, skip LLM-based summary answer
meta = dict(
# copy metadata from first doc, unclear what to do here.
⋮----
metadata=ChatDocMetaData(**meta),  # type: ignore
⋮----
response = self.get_summary_answer(query, extracts)
⋮----
self.response = response  # save last response
⋮----
"""Summarize all docs"""
⋮----
full_text = "\n\n".join([d.content for d in self.original_docs])
⋮----
tot_tokens = self.parser.num_tokens(full_text)
MAX_INPUT_TOKENS = (
⋮----
# truncate
full_text = self.parser.tokenizer.decode(
⋮----
prompt = f"""
⋮----
summary = ChatAgent.llm_response(self, prompt)
⋮----
def justify_response(self) -> ChatDocument | None
⋮----
"""Show evidence for last response"""
⋮----
source = self.response.metadata.source
</file>

<file path="mkdocs.yml">
site_name: "langroid"
repo_name: langroid/langroid
site_description: "Langroid LLM App Development Framework"
repo_url: https://github.com/langroid/langroid
site_url: https://langroid.github.io/langroid

edit_uri: ""
extra_css:
  - stylesheets/extra.css

theme:
  logo: assets/orange-logo-lambda-563.png
  favicon: assets/orange-logo-lambda-563.png
  features:
    - navigation.tabs
#    - navigation.tracking
#    - navigation.sections
#    - navigation.indexes
    - toc
    - content.code.copy
    - content.code.select
    - content.code.annotate
  icon:
      repo: fontawesome/brands/github
  name: material
  custom_dir: docs/overrides
  palette:
    # Palette toggle for light mode
    - scheme: default
      primary: indigo
      accent: indigo
      toggle:
        icon: material/brightness-7
        name: Switch to dark mode

    # Palette toggle for dark mode
    - scheme: slate
      primary: indigo
      accent: indigo
      toggle:
        icon: material/brightness-4
        name: Switch to light mode

plugins:
  - blog:
      archive: false
      blog_toc: true
      categories: false
      blog_dir: blog

  - rss:
      enabled: true
      match_path: blog/posts/.*
      image: https://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Feed-icon.svg/128px-Feed-icon.svg.png
      date_from_meta:
        as_creation: date
      categories:
        - categories
        - tags
  - search
  - autorefs
  #- awesome-pages
  - gen-files:
      scripts:
      - docs/auto_docstring.py
      #- docs/gen_ref_pages.py
  - literate-nav:
      nav_file: SUMMARY.md
  - mkdocstrings:
      default_handler: python
      handlers:
        python:
          paths: [.]
          options:
            members_order: source
            separate_signature: false
            filters: ["!^_"]
            docstring_options:
              ignore_init_summary: true
            merge_init_into_class: true
  - section-index

watch:
  - langroid

nav:
  - Home: index.md
  - Blog: blog/index.md
  - Getting Started:
    - quick-start/index.md
    - Setup: quick-start/setup.md
    - LLM interaction: quick-start/llm-interaction.md
    - Simple Chat Agent: quick-start/chat-agent.md
    - Task Delegation: quick-start/multi-agent-task-delegation.md
    - Two Agent Chat: quick-start/two-agent-chat-num.md
    - Three Agent Chat: quick-start/three-agent-chat-num.md
    - Agent with Tools/Functions: quick-start/chat-agent-tool.md
    - Three Agents, with Routing: quick-start/three-agent-chat-num-router.md
    - Agent with Retrieval: quick-start/chat-agent-docs.md
  # defer to gen-files + literate-nav
  - FAQ: FAQ.md
  - Notes-Updates:
      - Overview: notes/overview.md
      - XML-based Tools: notes/xml-tools.md
      - Async Streaming: notes/async-streaming.md
      - Knowledge Graphs: notes/knowledge-graphs.md
      - Gemini LLMs, Embeddings, Vertex AI: notes/gemini.md
      - LLM-based Pdf Parsing: notes/llm-pdf-parser.md
      - Large Tool Results: notes/large-tool-results.md
      - GLHF.chat Support: notes/glhf-chat.md
      - Structured Output: notes/structured-output.md
      - Tool Handlers: notes/tool-message-handler.md
      - Task Termination: notes/task-termination.md
      - Message Routing: notes/message-routing.md
      - Llama.cpp Embeddings: notes/llama-cpp-embeddings.md
      - Azure OpenAI models: notes/azure-openai-models.md
      - Custom Azure OpenAI client: notes/custom-azure-client.md
      - Enriching Chunks for Retrieval: notes/enriching-for-retrieval.md
      - Reasoning Content: notes/reasoning-content.md
      - Weaviate: notes/weaviate.md
      - Handling LLM Non-Tool Messages: notes/handle-llm-no-tool.md
      - PGVector: notes/pgvector.md
      - Pinecone: notes/pinecone.md
      - Tavily Search Tool: notes/tavily_search.md
      - Seltz Search Tool: notes/seltz_search.md
      - Marker Pdf Parser: notes/marker-pdf.md
      - URLLoader : notes/url_loader.md
      - Crawl4AI Crawler: notes/crawl4ai.md
      - LangDB AI Gateway: notes/langdb.md
      - Portkey AI Gateway: notes/portkey.md
      - Markitdown Parsers: notes/markitdown.md
      - LiteLLM Proxy: notes/litellm-proxy.md
      - Chunking: notes/chunking.md
      - Image, PDF Input: notes/file-input.md
      - MCP Tools: notes/mcp-tools.md
      - Code-Injection Protection: notes/code-injection-protection.md
      - TaskTool: notes/task-tool.md
      - Local Qdrant VectorDB Cleanup: notes/qdrant-resource-cleanup.md
      - OpenAI HTTP Client Configuration: notes/openai-http-client.md
      - OpenAI Client Caching: notes/openai-client-caching.md
      - Cross-Encoder Reranking: notes/cross-encoder.md
      - Task Logs: notes/html-logger.md
      - Pydantic v2 Migration: notes/pydantic-v2-migration.md

  - Examples:
    - Guide: examples/guide.md
    - Hierarchical Agent Computation: examples/agent-tree.md
    - Demos:
      - Audience Targeting: demos/targeting/audience-targeting.md
  - Tutorials:
    - Langroid Tour: tutorials/langroid-tour.md
    - Supported LLMs: tutorials/supported-models.md
    - Local LLM Setup: tutorials/local-llm-setup.md
    - Non-OpenAI LLMs: tutorials/non-openai-llms.md
    - SQLChatAgent: tutorials/postgresql-agent.md
    - LLM Usage Options: tutorials/llm-usage-options.md
  - Code/API Docs: reference/
#  - API Documentation:
#    - language_models: api/language_models_base.md


markdown_extensions:
  - footnotes
  - toc:
      permalink: true
  - attr_list
  - md_in_html
  - pymdownx.emoji:
      emoji_index: !!python/name:material.extensions.emoji.twemoji
      emoji_generator: !!python/name:material.extensions.emoji.to_svg
  - admonition
  - pymdownx.details
  - pymdownx.superfences
  - pymdownx.highlight:
      anchor_linenums: true
      line_spans: __span
      pygments_lang_class: true
      use_pygments: true
  - pymdownx.inlinehilite
  - pymdownx.snippets
  - pymdownx.arithmatex:
      generic: true
  - markdown.extensions.attr_list:
extra_javascript:
  - javascripts/mathjax.js
  - https://polyfill.io/v3/polyfill.min.js?features=es6
  - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js
</file>

<file path="README.md">
<div align="center">
  <img src="https://raw.githubusercontent.com/langroid/langroid/main/docs/assets/langroid-card-lambda-ossem-rust-1200-630.png" alt="Logo"
        width="400" align="center">
</div>

<div align="center">

[![PyPI - Version](https://img.shields.io/pypi/v/langroid)](https://pypi.org/project/langroid/)
[![Downloads](https://img.shields.io/pypi/dm/langroid)](https://pypi.org/project/langroid/)
[![Pytest](https://github.com/langroid/langroid/actions/workflows/pytest.yml/badge.svg)](https://github.com/langroid/langroid/actions/workflows/pytest.yml)
[![codecov](https://codecov.io/gh/langroid/langroid/graph/badge.svg)](https://codecov.io/gh/langroid/langroid)
[![Multi-Architecture DockerHub](https://github.com/langroid/langroid/actions/workflows/docker-publish.yml/badge.svg)](https://github.com/langroid/langroid/actions/workflows/docker-publish.yml)

[![Static Badge](https://img.shields.io/badge/Documentation-blue?link=https%3A%2F%2Flangroid.github.io%2Flangroid%2F&link=https%3A%2F%2Flangroid.github.io%2Flangroid%2F)](https://langroid.github.io/langroid)
[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langroid/langroid/blob/main/examples/Langroid_quick_start.ipynb)
[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?style=flat&logo=discord&logoColor=white)](https://discord.gg/ZU36McDgDs)
[![Substack](https://img.shields.io/badge/Substack-%23006f5c.svg?style=flat&logo=substack&logoColor=FF6719)](https://langroid.substack.com/p/langroid-harness-llms-with-multi-agent-programming)

</div>

<h3 align="center">
  <a target="_blank" 
    href="https://langroid.github.io/langroid/" rel="dofollow">
      <strong>Documentation</strong></a>
  &middot;
  <a target="_blank" href="https://github.com/langroid/langroid-examples" rel="dofollow">
      <strong>Examples Repo</strong></a>
  &middot;
  <a target="_blank" href="https://discord.gg/ZU36McDgDs" rel="dofollow">
      <strong>Discord</strong></a>
  &middot;
  <a target="_blank" href="https://github.com/langroid/langroid/blob/main/CONTRIBUTING.md" rel="dofollow">
      <strong>Contributing</strong></a>

  <br />
</h3>

`Langroid` is an intuitive, lightweight, extensible and principled
Python framework to easily build LLM-powered applications, from CMU and UW-Madison researchers. 
You set up Agents, equip them with optional components (LLM, 
vector-store and tools/functions), assign them tasks, and have them 
collaboratively solve a problem by exchanging messages. 
This Multi-Agent paradigm is inspired by the
[Actor Framework](https://en.wikipedia.org/wiki/Actor_model)
(but you do not need to know anything about this!). 

`Langroid` is a fresh take on LLM app-development, where considerable thought has gone 
into simplifying the developer experience; 
it does not use `Langchain`, or any other LLM framework, 
and works with [practically any LLM](https://langroid.github.io/langroid/tutorials/supported-models/).

🔥 ✨ A Claude Code [plugin](#claude-code-plugin-optional) is available to
accelerate Langroid development with built-in patterns and best practices.


🔥 Read the (WIP) [overview of the langroid architecture](https://langroid.github.io/langroid/blog/2024/08/15/overview-of-langroids-multi-agent-architecture-prelim/), 
 and a [quick tour of Langroid](https://langroid.github.io/langroid/tutorials/langroid-tour/).

🔥 MCP Support: Allow any LLM-Agent to leverage MCP Servers via Langroid's simple
[MCP tool adapter](https://langroid.github.io/langroid/notes/mcp-tools/) that converts 
the server's tools into Langroid's `ToolMessage` instances.

📢 Companies are using/adapting Langroid in **production**. Here is a quote:

>[Nullify](https://www.nullify.ai) uses AI Agents for secure software development. 
> It finds, prioritizes and fixes vulnerabilities. We have internally adapted Langroid's multi-agent orchestration framework in production, after evaluating CrewAI, Autogen, LangChain, Langflow, etc. We found Langroid to be far superior to those frameworks in terms of ease of setup and flexibility. Langroid's Agent and Task abstractions are intuitive, well thought out, and provide a great developer  experience. We wanted the quickest way to get something in production. With other frameworks it would have taken us weeks, but with Langroid we got to good results in minutes. Highly recommended! <br> -- Jacky Wong, Head of AI at Nullify.


🔥 See this [Intro to Langroid](https://lancedb.substack.com/p/langoid-multi-agent-programming-framework)
blog post from the LanceDB team

🔥 Just published in ML for Healthcare (2024): a Langroid-based Multi-Agent RAG system for 
pharmacovigilance, see [blog post](https://langroid.github.io/langroid/blog/2024/08/12/malade-multi-agent-architecture-for-pharmacovigilance/)


We welcome contributions: See the [contributions](https://github.com/langroid/langroid/blob/main/CONTRIBUTING.md) document
for ideas on what to contribute.

Are you building LLM Applications, or want help with Langroid for your company, 
or want to prioritize Langroid features for your company use-cases? 
[Prasad Chalasani](https://www.linkedin.com/in/pchalasani/) is available for consulting
(advisory/development): pchalasani at gmail dot com.

Sponsorship is also accepted via [GitHub Sponsors](https://github.com/sponsors/langroid)

**Questions, Feedback, Ideas? Join us on [Discord](https://discord.gg/ZU36McDgDs)!**

# Quick glimpse of coding with Langroid
This is just a teaser; there's much more, like function-calling/tools, 
Multi-Agent Collaboration, Structured Information Extraction, DocChatAgent 
(RAG), SQLChatAgent, non-OpenAI local/remote LLMs, etc. Scroll down or see docs for more.
See the Langroid Quick-Start [Colab](https://colab.research.google.com/github/langroid/langroid/blob/main/examples/Langroid_quick_start.ipynb)
that builds up to a 2-agent information-extraction example using the OpenAI ChatCompletion API. 
See also this [version](https://colab.research.google.com/drive/190Tk7t4AdY1P9F_NlZ33-YEoGnHweQQ0) that uses the OpenAI Assistants API instead.

🔥 just released! [Example](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/chat-multi-extract-local.py) 
script showing how you can use Langroid multi-agents and tools
to extract structured information from a document using **only a local LLM**
(Mistral-7b-instruct-v0.2).

```python
import langroid as lr
import langroid.language_models as lm

# set up LLM
llm_cfg = lm.OpenAIGPTConfig( # or OpenAIAssistant to use Assistant API 
  # any model served via an OpenAI-compatible API
  chat_model=lm.OpenAIChatModel.GPT4o, # or, e.g., "ollama/mistral"
)
# use LLM directly
mdl = lm.OpenAIGPT(llm_cfg)
response = mdl.chat("What is the capital of Ontario?", max_tokens=10)

# use LLM in an Agent
agent_cfg = lr.ChatAgentConfig(llm=llm_cfg)
agent = lr.ChatAgent(agent_cfg)
agent.llm_response("What is the capital of China?") 
response = agent.llm_response("And India?") # maintains conversation state 

# wrap Agent in a Task to run interactive loop with user (or other agents)
task = lr.Task(agent, name="Bot", system_message="You are a helpful assistant")
task.run("Hello") # kick off with user saying "Hello"

# 2-Agent chat loop: Teacher Agent asks questions to Student Agent
teacher_agent = lr.ChatAgent(agent_cfg)
teacher_task = lr.Task(
  teacher_agent, name="Teacher",
  system_message="""
    Ask your student concise numbers questions, and give feedback. 
    Start with a question.
    """
)
student_agent = lr.ChatAgent(agent_cfg)
student_task = lr.Task(
  student_agent, name="Student",
  system_message="Concisely answer the teacher's questions.",
  single_round=True,
)

teacher_task.add_sub_task(student_task)
teacher_task.run()
```

# 🔥 Updates/Releases

<details>
<summary> <b>Click to expand</b></summary>

- **Aug 2025:**
  - [0.59.0](https://github.com/langroid/langroid/releases/tag/0.59.0) Complete Pydantic V2 Migration - 
    5-50x faster validation, modern Python patterns, 100% backward compatible.
- **Jul 2025:**
  - [0.58.0](https://github.com/langroid/langroid/releases/tag/0.58.0) Crawl4AI integration - 
    browser-based web crawling with Playwright for JavaScript-heavy sites, no API key required (thank you @abab-dev!).
  - [0.57.0](https://github.com/langroid/langroid/releases/tag/0.57.0) HTML Logger for interactive task visualization - 
    self-contained HTML logs with collapsible entries, auto-refresh, and persistent UI state.
- **Jun 2025:**
  - [0.56.0](https://github.com/langroid/langroid/releases/tag/0.56.0) `TaskTool` for delegating tasks to sub-agents - 
    enables agents to spawn sub-agents with specific tools and configurations.
  - [0.55.0](https://github.com/langroid/langroid/releases/tag/0.55.0) Event-based task termination with `done_sequences` - 
    declarative task completion using event patterns.
  - [0.54.0](https://github.com/langroid/langroid/releases/tag/0.54.0) Portkey AI Gateway support - access 200+ models 
    across providers through unified API with caching, retries, observability.
- **Mar-Apr 2025:**
  - [0.53.0](https://github.com/langroid/langroid/releases/tag/0.53.0) MCP Tools Support.
  - [0.52.0](https://github.com/langroid/langroid/releases/tag/0.52.0) Multimodal support, i.e. allow PDF, image 
    inputs to LLM.
  - [0.51.0](https://github.com/langroid/langroid/releases/tag/0.51.0) `LLMPdfParser`, generalizing 
    `GeminiPdfParser` to parse documents directly with LLM.
  - [0.50.0](https://github.com/langroid/langroid/releases/tag/0.50.0) Structure-aware Markdown chunking with chunks 
    enriched by section headers.
  - [0.49.0](https://github.com/langroid/langroid/releases/tag/0.49.0) Enable easy switch to LiteLLM Proxy-server 
  - [0.48.0](https://github.com/langroid/langroid/releases/tag/0.48.0) Exa Crawler, Markitdown Parser
  - [0.47.0](https://github.com/langroid/langroid/releases/tag/0.47.0) Support Firecrawl URL scraper/crawler - 
    thanks @abab-dev
  - [0.46.0](https://github.com/langroid/langroid/releases/tag/0.46.0) Support LangDB LLM Gateway - thanks @MrunmayS.
  - [0.45.0](https://github.com/langroid/langroid/releases/tag/0.45.0) Markdown parsing with `Marker` - thanks @abab-dev
  - [0.44.0](https://github.com/langroid/langroid/releases/tag/0.44.0) Late imports to reduce startup time. Thanks 
    @abab-dev
- **Feb 2025:**
  - [0.43.0](https://github.com/langroid/langroid/releases/tag/0.43.0): `GeminiPdfParser` for parsing PDF using 
    Gemini LLMs - Thanks @abab-dev.
  - [0.42.0](https://github.com/langroid/langroid/releases/tag/0.42.0): `markitdown` parser for `pptx,xlsx,xls` files 
    Thanks @abab-dev.
  - [0.41.0](https://github.com/langroid/langroid/releases/tag/0.41.0): `pinecone` vector-db (Thanks @coretado), 
    `Tavily` web-search (Thanks @Sozhan308), `Exa` web-search (Thanks @MuddyHope).
  - [0.40.0](https://github.com/langroid/langroid/releases/tag/0.40.0): `pgvector` vector-db. Thanks @abab-dev.
  - [0.39.0](https://github.com/langroid/langroid/releases/tag/0.39.0): `ChatAgentConfig.handle_llm_no_tool` for 
    handling LLM "forgetting" to use a tool.
  - [0.38.0](https://github.com/langroid/langroid/releases/tag/0.38.0): Gemini embeddings - Thanks @abab-dev)
  - [0.37.0](https://github.com/langroid/langroid/releases/tag/0.37.0): New PDF Parsers: `docling`, `pymupdf4llm`
- **Jan 2025:**
  - [0.36.0](https://github.com/langroid/langroid/releases/tag/0.36.0): Weaviate vector-db support (thanks @abab-dev).
  - [0.35.0](https://github.com/langroid/langroid/releases/tag/0.35.0): Capture/Stream reasoning content from 
    Reasoning LLMs (e.g. DeepSeek-R1, OpenAI o1) in addition to final answer.
  - [0.34.0](https://github.com/langroid/langroid/releases/tag/0.34.0): DocChatAgent 
    chunk enrichment to improve retrieval. (collaboration with @dfm88). 
  - [0.33.0](https://github.com/langroid/langroid/releases/tag/0.33.3) Move from Poetry to uv! (thanks @abab-dev).
  - [0.32.0](https://github.com/langroid/langroid/releases/tag/0.32.0) DeepSeek v3 support.
- **Dec 2024:**
  - [0.31.0](https://github.com/langroid/langroid/releases/tag/0.31.0) Azure OpenAI Embeddings
  - [0.30.0](https://github.com/langroid/langroid/releases/tag/0.30.0) Llama-cpp embeddings (thanks @Kwigg).
  - [0.29.0](https://github.com/langroid/langroid/releases/tag/0.29.0) Custom Azure OpenAI Client (thanks 
    @johannestang).
  - [0.28.0](https://github.com/langroid/langroid/releases/tag/0.28.0) `ToolMessage`: `_handler` field to override 
default handler method name in `request` field (thanks @alexagr).
  - [0.27.0](https://github.com/langroid/langroid/releases/tag/0.27.0) OpenRouter Support.
  - [0.26.0](https://github.com/langroid/langroid/releases/tag/0.26.0) Update to latest Chainlit.
  - [0.25.0](https://github.com/langroid/langroid/releases/tag/0.25.0) True Async Methods for agent and 
    user-response (thanks @alexagr).
- **Nov 2024:**
  - **[0.24.0](https://langroid.github.io/langroid/notes/structured-output/)**: 
     Enables support for `Agent`s with strict JSON schema output format on compatible LLMs and strict mode for the OpenAI tools API.
    (thanks @nilspalumbo).
  - **[0.23.0](https://langroid.github.io/langroid/tutorials/local-llm-setup/#local-llms-hosted-on-glhfchat)**: 
      support for LLMs (e.g. `Qwen2.5-Coder-32b-Instruct`) hosted on glhf.chat 
  - **[0.22.0](https://langroid.github.io/langroid/notes/large-tool-results/)**: 
     Optional parameters to truncate large tool results.
  - **[0.21.0](https://langroid.github.io/langroid/notes/gemini/)** Direct support for Gemini models via OpenAI client instead of using LiteLLM.
  - **[0.20.0](https://github.com/langroid/langroid/releases/tag/0.20.0)** Support for 
    ArangoDB Knowledge Graphs.
- **Oct 2024:**
  - **[0.18.0]** [LLMConfig.async_stream_quiet](https://langroid.github.io/langroid/notes/async-streaming/) flag to 
    turn off LLM output in async + stream mode.
  - **[0.17.0]** XML-based tools, see [docs](https://langroid.github.io/langroid/notes/xml-tools/).
- **Sep 2024:**
  - **[0.16.0](https://github.com/langroid/langroid/releases/tag/0.16.0)**  Support for OpenAI `o1-mini` and `o1-preview` models.
  - **[0.15.0](https://github.com/langroid/langroid/releases/tag/0.15.0)** Cerebras API support -- run llama-3.1 models hosted on Cerebras Cloud (very fast inference).
  - **[0.14.0](https://github.com/langroid/langroid/releases/tag/0.14.0)** `DocChatAgent` uses Reciprocal Rank Fusion (RRF) to rank chunks retrieved by different methods.
  - **[0.12.0](https://github.com/langroid/langroid/releases/tag/0.12.0)** `run_batch_task` new option -- `stop_on_first_result` - allows termination of batch as soon as any task returns a result.  
- **Aug 2024:**
  - **[0.11.0](https://github.com/langroid/langroid/releases/tag/0.11.0)** Polymorphic `Task.run(), Task.run_async`.
  - **[0.10.0](https://github.com/langroid/langroid/releases/tag/0.10.0)** Allow tool handlers to return arbitrary result type, including other tools.
  - **[0.9.0](https://github.com/langroid/langroid/releases/tag/0.9.0)** Orchestration Tools, to signal various task statuses, and to pass messages between agents.
  - **[0.7.0](https://github.com/langroid/langroid/releases/tag/0.7.0)** OpenAI tools API support, including multi-tools.
- **Jul 2024:**
  - **[0.3.0](https://github.com/langroid/langroid/releases/tag/0.3.0)**: Added [FastEmbed](https://qdrant.github.io/fastembed/qdrant/Usage_With_Qdrant/) embeddings from Qdrant
- **Jun 2024:**
  - **0.2.0:** Improved lineage tracking, granular sub-task configs, and a new tool, `RewindTool`, 
    that lets an agent "rewind and redo" a past message (and all dependent messages are cleared out 
    thanks to the lineage tracking). Read notes [here](https://github.com/langroid/langroid/releases/tag/0.2.0).
- **May 2024:** 
  - **Slimmer langroid**: All document-parsers (i.e. pdf, doc, docx) and most 
    vector-databases (except qdrant) 
    are now optional/extra dependencies, which helps reduce build size, script 
    start-up time, and install time. For convenience various grouping of "extras" are 
    provided, e.g. `doc-chat`, `db` (for database-related dependencies). See updated 
    install instructions below and in the docs.
  - **Few-shot examples** for tools: when defining a [ToolMessage](https://langroid.github.io/langroid/quick-start/chat-agent-tool/#example-find-the-smallest-number-in-a-list), previously you were able to include a classmethod named `examples`,
    and a random example from this list would be used to generate a 1-shot example 
    for the LLM. This has been improved so you can now supply a list of examples 
    where each example is either a tool instance, or a tuple of (description, 
    tool instance), where the description is a "thought" that leads the LLM to use 
    the tool (see example in the [docs](https://langroid.github.io/langroid/quick-start/chat-agent-tool/#example-find-the-smallest-number-in-a-list)). In some scenarios this can improve LLM tool 
    generation accuracy. Also, now instead of a random example, ALL examples are used to generate few-shot 
    examples.     
  - [Infinite loop detection](https://github.com/langroid/langroid/blob/0ed30eb467b00d5eaf2933b577a4b2cc37de1aa1/langroid/agent/task.py#L1121) for task loops of cycle-length <= 10 (configurable 
    in [`TaskConfig`](https://langroid.github.io/langroid/reference/agent/task/#langroid.agent.task.TaskConfig). Only detects _exact_ loops, rather than _approximate_ loops where the entities are saying essentially similar (but not exactly the same) things repeatedly.
  - "@"-addressing: any entity can address any other by name, which can be the name 
    of an agent's responder ("llm", "user", "agent") or a sub-task name. This is a 
    simpler alternative to the `RecipientTool` mechanism, with the tradeoff that 
    since it's not a tool, there's no way to enforce/remind the LLM to explicitly 
    specify an addressee (in scenarios where this is important).
  - [Much-Improved Citation](https://github.com/langroid/langroid/issues/477) 
    generation and display when using `DocChatAgent`.
  - `gpt-4o` is now the default LLM throughout; Update tests and examples to work 
    with this LLM; use tokenizer corresponding to the LLM.
  - `gemini 1.5 pro` support via `litellm`
  - `QdrantDB:` update to support learned sparse embeddings.
- **Apr 2024:**
  - **0.1.236**: Support for open LLMs hosted on Groq, e.g. specify 
    `chat_model="groq/llama3-8b-8192"`.
      See [tutorial](https://langroid.github.io/langroid/tutorials/local-llm-setup/).
  - **0.1.235**: `Task.run(), Task.run_async(), run_batch_tasks` have `max_cost` 
    and `max_tokens` params to exit when tokens or cost exceed a limit. The result 
    `ChatDocument.metadata` now includes a `status` field which is a code indicating a 
     task completion reason code. Also `task.run()` etc can be invoked with an explicit
     `session_id` field which is used as a key to look up various settings in Redis cache.
    Currently only used to look up "kill status" - this allows killing a running task, either by `task.kill()`
    or by the classmethod `Task.kill_session(session_id)`.
    For example usage, see the `test_task_kill` in [tests/main/test_task.py](https://github.com/langroid/langroid/blob/main/tests/main/test_task.py)
  
- **Mar 2024:**
  - **0.1.216:** Improvements to allow concurrent runs of `DocChatAgent`, see the
    [`test_doc_chat_agent.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_doc_chat_agent.py)
    in particular the `test_doc_chat_batch()`;
    New task run utility: [`run_batch_task_gen`](https://github.com/langroid/langroid/blob/main/langroid/agent/batch.py) 
    where a task generator can be specified, to generate one task per input. 
  - **0.1.212:** ImagePdfParser: support for extracting text from image-based PDFs.
    (this means `DocChatAgent` will now work with image-pdfs).
  - **0.1.194 - 0.1.211:** Misc fixes, improvements, and features:
    - Big enhancement in RAG performance (mainly, recall) due to a [fix in Relevance 
      Extractor](https://github.com/langroid/langroid/releases/tag/0.1.209)
    - `DocChatAgent` [context-window fixes](https://github.com/langroid/langroid/releases/tag/0.1.208)
    - Anthropic/Claude3 support via Litellm
    - `URLLoader`: detect file time from header when URL doesn't end with a 
      recognizable suffix like `.pdf`, `.docx`, etc.
    - Misc lancedb integration fixes
    - Auto-select embedding config based on whether `sentence_transformer` module is available.
    - Slim down dependencies, make some heavy ones optional, e.g. `unstructured`, 
      `haystack`, `chromadb`, `mkdocs`, `huggingface-hub`, `sentence-transformers`.
    - Easier top-level imports from `import langroid as lr`
    - Improve JSON detection, esp from weak LLMs
- **Feb 2024:** 
  - **0.1.193:** Support local LLMs using Ollama's new OpenAI-Compatible server: 
     simply specify `chat_model="ollama/mistral"`. See [release notes](https://github.com/langroid/langroid/releases/tag/0.1.193).
  - **0.1.183:** Added Chainlit support via [callbacks](https://github.com/langroid/langroid/blob/main/langroid/agent/callbacks/chainlit.py). 
   See [examples](https://github.com/langroid/langroid/tree/main/examples/chainlit).
- **Jan 2024:**
  - **0.1.175** 
    - [Neo4jChatAgent](https://github.com/langroid/langroid/tree/main/langroid/agent/special/neo4j) to chat with a neo4j knowledge-graph.
      (Thanks to [Mohannad](https://github.com/Mohannadcse)!). The agent uses tools to query the Neo4j schema and translate user queries to Cypher queries,
      and the tool handler executes these queries, returning them to the LLM to compose
      a natural language response (analogous to how `SQLChatAgent` works).
      See example [script](https://github.com/langroid/langroid/tree/main/examples/kg-chat) using this Agent to answer questions about Python pkg dependencies.
    - Support for `.doc` file parsing (in addition to `.docx`)
    - Specify optional [`formatter` param](https://github.com/langroid/langroid/releases/tag/0.1.171) 
      in `OpenAIGPTConfig` to ensure accurate chat formatting for local LLMs. 
  - **[0.1.157](https://github.com/langroid/langroid/releases/tag/0.1.157):** `DocChatAgentConfig` 
     has a new param: `add_fields_to_content`, to specify additional document fields to insert into 
     the main `content` field, to help improve retrieval.
  - **[0.1.156](https://github.com/langroid/langroid/releases/tag/0.1.156):** New Task control signals
     PASS_TO, SEND_TO; VectorStore: Compute Pandas expression on documents; LanceRAGTaskCreator creates 3-agent RAG system with Query Planner, Critic and RAG Agent.
- **Dec 2023:**
  - **0.1.154:** (For details see release notes of [0.1.149](https://github.com/langroid/langroid/releases/tag/0.1.149)
      and [0.1.154](https://github.com/langroid/langroid/releases/tag/0.1.154)). 
    - `DocChatAgent`: Ingest Pandas dataframes and filtering.
    - `LanceDocChatAgent` leverages `LanceDB` vector-db for efficient vector search
     and full-text search and filtering.
    - Improved task and multi-agent control mechanisms
    - `LanceRAGTaskCreator` to create a 2-agent system consisting of a `LanceFilterAgent` that
      decides a filter and rephrase query to send to a RAG agent.
  - **[0.1.141](https://github.com/langroid/langroid/releases/tag/0.1.141):**
    API Simplifications to reduce boilerplate:
    auto-select an available OpenAI model (preferring gpt-4o), simplifies defaults.
    Simpler `Task` initialization with default `ChatAgent`.
- **Nov 2023:**
  - **[0.1.126](https://github.com/langroid/langroid/releases/tag/0.1.126):**
     OpenAIAssistant agent: Caching Support. 
  - **0.1.117:** Support for OpenAI Assistant API tools: Function-calling, 
    Code-intepreter, and Retriever (RAG), file uploads. These work seamlessly 
    with Langroid's task-orchestration.
    Until docs are ready, it's best to see these usage examples:
    
    - **Tests:**
      - [test_openai_assistant.py](https://github.com/langroid/langroid/blob/main/tests/main/test_openai_assistant.py)
      - [test_openai_assistant_async.py](https://github.com/langroid/langroid/blob/main/tests/main/test_openai_assistant_async.py)

    - **Example scripts:**
      - [The most basic chat app](https://github.com/langroid/langroid/blob/main/examples/basic/oai-asst-chat.py)
      - [Chat with code interpreter](https://github.com/langroid/langroid/blob/main/examples/basic/oai-code-chat.py)
      - [Chat with retrieval (RAG)](https://github.com/langroid/langroid/blob/main/examples/docqa/oai-retrieval-assistant.py)
      - [2-agent RAG chat](https://github.com/langroid/langroid/blob/main/examples/docqa/oai-retrieval-2.py)
  - **0.1.112:** [`OpenAIAssistant`](https://github.com/langroid/langroid/blob/main/langroid/agent/openai_assistant.py) is a subclass of `ChatAgent` that 
    leverages the new OpenAI Assistant API. It can be used as a drop-in 
    replacement for `ChatAgent`, and relies on the Assistant API to
    maintain conversation state, and leverages persistent threads and 
    assistants to reconnect to them if needed. Examples: 
    [`test_openai_assistant.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_openai_assistant.py),
    [`test_openai_assistant_async.py`](https://github.com/langroid/langroid/blob/main/tests/main/test_openai_assistant_async.py)
  - **0.1.111:** Support latest OpenAI model: `GPT4_TURBO`
(see [test_llm.py](https://github.com/langroid/langroid/blob/main/tests/main/test_llm.py) for example usage)
  - **0.1.110:** Upgrade from OpenAI v0.x to v1.1.1 (in preparation for 
    Assistants API and more); (`litellm` temporarily disabled due to OpenAI 
    version conflict).
- **Oct 2023:**
  - **0.1.107:** `DocChatAgent` re-rankers: `rank_with_diversity`, `rank_to_periphery` (lost in middle).
  - **0.1.102:** `DocChatAgentConfig.n_neighbor_chunks > 0` allows returning context chunks around match.
  - **0.1.101:** `DocChatAgent` uses `RelevanceExtractorAgent` to have 
    the LLM extract relevant portions of a chunk using 
    sentence-numbering, resulting in huge speed up and cost reduction 
    compared to the naive "sentence-parroting" approach (writing out full 
    sentences out relevant whole sentences) which `LangChain` uses in their 
    `LLMChainExtractor`.
  - **0.1.100:** API update: all of Langroid is accessible with a single import, i.e. `import langroid as lr`. See the [documentation]("https://langroid.github.io/langroid/") for usage.
  - **0.1.99:** Convenience batch functions to run tasks, agent methods on a list of inputs concurrently in async mode. See examples in [test_batch.py](https://github.com/langroid/langroid/blob/main/tests/main/test_batch.py).
  - **0.1.95:** Added support for [Momento Serverless Vector Index](https://docs.momentohq.com/vector-index)
  - **0.1.94:** Added support for [LanceDB](https://lancedb.github.io/lancedb/) vector-store -- allows vector, Full-text, SQL search.
  - **0.1.84:** Added [LiteLLM](https://docs.litellm.ai/docs/providers), so now Langroid can be used with over 100 LLM providers (remote or local)! 
     See guide [here](https://langroid.github.io/langroid/tutorials/non-openai-llms/).
- **Sep 2023:**
  - **0.1.78:** Async versions of several Task, Agent and LLM methods; 
      Nested Pydantic classes are now supported for LLM Function-calling, Tools, Structured Output.    
  - **0.1.76:** DocChatAgent: support for loading `docx` files (preliminary).
  - **0.1.72:** Many improvements to DocChatAgent: better embedding model, 
          hybrid search to improve retrieval, better pdf parsing, re-ranking retrieved results with cross-encoders. 
  - **Use with local LLama Models:** see tutorial [here](https://langroid.github.io/langroid/blog/2023/09/14/using-langroid-with-local-llms/)
  - **Langroid Blog/Newsletter Launched!**: First post is [here](https://substack.com/notes/post/p-136704592) -- Please subscribe to stay updated. 
  - **0.1.56:** Support Azure OpenAI. 
  - **0.1.55:** Improved [`SQLChatAgent`](https://github.com/langroid/langroid/blob/main/langroid/agent/special/sql/sql_chat_agent.py) that efficiently retrieves relevant schema info when translating natural language to SQL.  
- **Aug 2023:**
  - **[Hierarchical computation](https://langroid.github.io/langroid/examples/agent-tree/)** example using Langroid agents and task orchestration.
  - **0.1.51:** Support for global state, see [test_global_state.py](https://github.com/langroid/langroid/blob/main/tests/main/test_global_state.py).
  - **🐳 Langroid Docker image**, available, see instructions below.
  - [**RecipientTool**](https://github.com/langroid/langroid/blob/main/langroid/agent/tools/recipient_tool.py) enables (+ enforces) LLM to 
specify an intended recipient when talking to 2 or more agents. 
See [this test](https://github.com/langroid/langroid/blob/main/tests/main/test_recipient_tool.py) for example usage.
  - **Example:** [Answer questions](https://github.com/langroid/langroid/blob/main/examples/docqa/chat-search.py) using Google Search + vecdb-retrieval from URL contents. 
  - **0.1.39:** [`GoogleSearchTool`](https://github.com/langroid/langroid/blob/main/langroid/agent/tools/google_search_tool.py) to enable Agents (their LLM) to do Google searches via function-calling/tools.
    See [this chat example](https://github.com/langroid/langroid/blob/main/examples/basic/chat-search.py) for how easy it is to add this tool to an agent.
  - **Colab notebook** to try the quick-start examples: [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langroid/langroid/blob/main/examples/Langroid_quick_start.ipynb) 
  - **0.1.37:** Added [`SQLChatAgent`](https://github.com/langroid/langroid/blob/main/langroid/agent/special/sql_chat_agent.py) -- thanks to our latest contributor [Rithwik Babu](https://github.com/rithwikbabu)!
  - Multi-agent Example: [Autocorrect chat](https://github.com/langroid/langroid/blob/main/examples/basic/autocorrect.py)
- **July 2023:** 
  - **0.1.30:** Added [`TableChatAgent`](https://github.com/langroid/langroid/blob/main/langroid/agent/special/table_chat_agent.py) to 
    [chat](https://github.com/langroid/langroid/blob/main/examples/data-qa/table_chat.py) with tabular datasets (dataframes, files, URLs): LLM generates Pandas code,
    and code is executed using Langroid's tool/function-call mechanism. 
  - **Demo:** 3-agent system for Audience [Targeting](https://langroid.github.io/langroid/demos/targeting/audience-targeting/).
  - **0.1.27**: Added [support](https://github.com/langroid/langroid/blob/main/langroid/cachedb/momento_cachedb.py) 
    for [Momento Serverless Cache](https://www.gomomento.com/) as an alternative to Redis.
  - **0.1.24**: [`DocChatAgent`](https://github.com/langroid/langroid/blob/main/langroid/agent/special/doc_chat_agent.py) 
    now [accepts](https://github.com/langroid/langroid/blob/main/langroid/parsing/document_parser.py) PDF files or URLs.

</details>

# 🚀 Demo
Suppose you want to extract structured information about the key terms 
of a commercial lease document. You can easily do this with Langroid using a two-agent system,
as we show in the [langroid-examples](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/chat_multi_extract.py) repo.
(See [this script](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/chat-multi-extract-local.py)
for a version with the same functionality using a local Mistral-7b model.)
The demo showcases just a few of the many features of Langroid, such as:
- Multi-agent collaboration: `LeaseExtractor` is in charge of the task, and its LLM (GPT4) generates questions 
to be answered by the `DocAgent`.
- Retrieval augmented question-answering, with **source-citation**: `DocAgent` LLM (GPT4) uses retrieval from a vector-store to 
answer the `LeaseExtractor`'s questions, cites the specific excerpt supporting the answer. 
- Function-calling (also known as tool/plugin): When it has all the information it 
needs, the `LeaseExtractor` LLM presents the information in a structured 
format using a Function-call. 

Here is what it looks like in action 
(a pausable mp4 video is [here](https://vimeo.com/871429249)).

![Demo](https://raw.githubusercontent.com/langroid/langroid/main/docs/assets/demos/lease-extractor-demo.gif)


# ⚡ Highlights
(For a more up-to-date list see the 
[Updates/Releases](https://github.com/langroid/langroid?tab=readme-ov-file#-updatesreleases) 
section above)
- **Agents as first-class citizens:** The [Agent](https://langroid.github.io/langroid/reference/agent/base/#langroid.agent.base.Agent) class encapsulates LLM conversation state,
  and optionally a vector-store and tools. Agents are a core abstraction in Langroid;
  Agents act as _message transformers_, and by default provide 3 _responder_ methods, one corresponding to each entity: LLM, Agent, User.
- **Tasks:** A [Task](https://langroid.github.io/langroid/reference/agent/task/) class wraps an Agent, and gives the agent instructions (or roles, or goals), 
  manages iteration over an Agent's responder methods, 
  and orchestrates multi-agent interactions via hierarchical, recursive
  task-delegation. The `Task.run()` method has the same 
  type-signature as an Agent's responder's methods, and this is key to how 
  a task of an agent can delegate to other sub-tasks: from the point of view of a Task,
  sub-tasks are simply additional responders, to be used in a round-robin fashion 
  after the agent's own responders.
- **Modularity, Reusability, Loose coupling:** The `Agent` and `Task` abstractions allow users to design
  Agents with specific skills, wrap them in Tasks, and combine tasks in a flexible way.
- **LLM Support**: Langroid supports OpenAI LLMs as well as LLMs from hundreds of 
providers ([local/open](https://langroid.github.io/langroid/tutorials/local-llm-setup/) or [remote/commercial](https://langroid.github.io/langroid/tutorials/non-openai-llms/)) via proxy libraries and local model servers
such as [ollama](https://github.com/ollama), [oobabooga](https://github.com/oobabooga/text-generation-webui), 
  [LiteLLM](https://docs.litellm.ai/docs/providers) that in effect mimic the OpenAI API. See the [supported LLMs](https://langroid.github.io/langroid/tutorials/supported-models/). 
- **Caching of LLM responses:** Langroid supports [Redis](https://redis.com/try-free/) to cache LLM responses.
- **Vector-stores**: [Qdrant](https://qdrant.tech/), [Chroma](https://www.trychroma.com/), LanceDB, Pinecone, PostgresDB (PGVector), Weaviate are currently supported.
  Vector stores allow for Retrieval-Augmented-Generation (RAG).
- **Grounding and source-citation:** Access to external documents via vector-stores 
   allows for grounding and source-citation.
- **Observability, Logging, Lineage:** Langroid generates detailed logs of multi-agent interactions and
  maintains provenance/lineage of messages, so that you can trace back
  the origin of a message.
- **[Tools/Plugins/Function-calling](https://langroid.github.io/langroid/quick-start/chat-agent-tool/)**:
  Langroid supports OpenAI's [function calling](https://platform.openai.com/docs/guides/gpt/function-calling), as
  well as an equivalent `ToolMessage` mechanism which works with
  any LLM, not just OpenAI's.
  Function calling and tools have the same developer-facing interface, implemented
  using [Pydantic](https://docs.pydantic.dev/latest/),
  which makes it very easy to define tools/functions and enable agents
  to use them. Benefits of using Pydantic are that you never have to write
  complex JSON specs for function calling, and when the LLM
  hallucinates malformed JSON, the Pydantic error message is sent back to
  the LLM so it can fix it.

--- 

# ⚙️ Installation and Setup

### Install `langroid`
Langroid requires Python 3.11+. We recommend using a virtual environment.
Use `pip` to install a bare-bones slim version of `langroid` (from PyPi) to your virtual 
environment:
```bash
pip install langroid
```
The core Langroid package lets you use OpenAI Embeddings models via their API. 
If you instead want to use the `sentence-transformers` embedding models from HuggingFace, 
install Langroid like this: 
```bash
pip install "langroid[hf-embeddings]"
```
For many practical scenarios, you may need additional optional dependencies:
- To use various document-parsers, install langroid with the `doc-chat` extra:
    ```bash
    pip install "langroid[doc-chat]"
    ```
- For "chat with databases", use the `db` extra:
    ```bash
    pip install "langroid[db]"
    ```
- You can specify multiple extras by separating them with commas, e.g.:
    ```bash
    pip install "langroid[doc-chat,db]"
    ```
- To simply install _all_ optional dependencies, use the `all` extra (but note that this will result in longer load/startup times and a larger install size):
    ```bash
    pip install "langroid[all]"
    ```
<details>
<summary><b>Optional Installs for using SQL Chat with a PostgreSQL DB </b></summary>

If you are using `SQLChatAgent` 
(e.g. the script [`examples/data-qa/sql-chat/sql_chat.py`](https://github.com/langroid/langroid/blob/main/examples/data-qa/sql-chat/sql_chat.py)),
with a postgres db, you will need to:

- Install PostgreSQL dev libraries for your platform, e.g.
  - `sudo apt-get install libpq-dev` on Ubuntu,
  - `brew install postgresql` on Mac, etc.
- Install langroid with the postgres extra, e.g. `pip install langroid[postgres]`
  or `poetry add "langroid[postgres]"` or `poetry install -E postgres`,
  (or the corresponding `uv` versions, e.g. `uv add "langroid[postgres]"`
  or `uv pip install langroid[postgres]`).
  If this gives you an error, try `pip install psycopg2-binary` in your virtualenv.
</details>

📝 If you get strange errors involving `mysqlclient`, try doing `pip uninstall mysqlclient` followed by `pip install mysqlclient`.

### Claude Code Plugin (Optional)

This plugin provides two skills:

- `langroid:patterns` - Your Claude Code agent can leverage this skill to produce
  Langroid multi-agent code using proper design patterns.
- `langroid:add-pattern` - The agent can use this skill to record new patterns it
  learns, for future reference, either autonomously or when prompted by the user.

**Step 1: Add the Langroid marketplace**

From terminal:
```bash
claude plugin marketplace add langroid/langroid
```

Or within Claude Code:
```
/plugin marketplace add langroid/langroid
```

**Step 2: Install the Langroid plugin**

From terminal:
```bash
claude plugin install langroid@langroid
```

Or within Claude Code:
```
/plugin install langroid@langroid
```

Once installed, simply ask your Claude Code agent to implement Langroid patterns in
natural language, e.g.,

> set up a Langroid agent so it uses the EditTool, and wrap it in a task that ends as soon as the tool is generated

and it will automatically use the `langroid:patterns` skill to follow the right design pattern.

You can also ask Claude Code to record a new pattern when you discover one, e.g.,

> record this as a new Langroid pattern for setting up MCP tools


### Set up environment variables (API keys, etc)

To get started, all you need is an OpenAI API Key.
If you don't have one, see [this OpenAI Page](https://platform.openai.com/docs/quickstart).
(Note that while this is the simplest way to get started, Langroid works with practically any LLM, not just those from OpenAI. 
See the guides to using [Open/Local LLMs](https://langroid.github.io/langroid/tutorials/local-llm-setup/), 
and other [non-OpenAI](https://langroid.github.io/langroid/tutorials/non-openai-llms/) proprietary LLMs.) 

In the root of the repo, copy the `.env-template` file to a new file `.env`: 
```bash
cp .env-template .env
```
Then insert your OpenAI API Key. 
Your `.env` file should look like this (the organization is optional 
but may be required in some scenarios).
```bash
OPENAI_API_KEY=your-key-here-without-quotes
OPENAI_ORGANIZATION=optionally-your-organization-id
````

Alternatively, you can set this as an environment variable in your shell
(you will need to do this every time you open a new shell):
```bash
export OPENAI_API_KEY=your-key-here-without-quotes
```


<details>
<summary><b>Optional Setup Instructions (click to expand) </b></summary>

All of the following environment variable settings are optional, and some are only needed 
to use specific features (as noted below).

- **Qdrant** Vector Store API Key, URL. This is only required if you want to use Qdrant cloud.
  Alternatively [Chroma](https://docs.trychroma.com/) or [LanceDB](https://lancedb.com/) are also currently supported. 
  We use the local-storage version of Chroma, so there is no need for an API key.
- **Redis** Password, host, port: This is optional, and only needed to cache LLM API responses
  using Redis Cloud. Redis [offers](https://redis.com/try-free/) a free 30MB Redis account
  which is more than sufficient to try out Langroid and even beyond.
  If you don't set up these, Langroid will use a pure-python 
  Redis in-memory cache via the [Fakeredis](https://fakeredis.readthedocs.io/en/latest/) library.
- **Momento** Serverless Caching of LLM API responses (as an alternative to Redis). 
   To use Momento instead of Redis:
  - enter your Momento Token in the `.env` file, as the value of `MOMENTO_AUTH_TOKEN` (see example file below),
  - in the `.env` file set `CACHE_TYPE=momento` (instead of `CACHE_TYPE=redis` which is the default).
- **GitHub** Personal Access Token (required for apps that need to analyze git
  repos; token-based API calls are less rate-limited). See this
  [GitHub page](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens).
- **Google Custom Search API Credentials:** Only needed to enable an Agent to use the `GoogleSearchTool`.
  To use Google Search as an LLM Tool/Plugin/function-call, 
  you'll need to set up 
  [a Google API key](https://developers.google.com/custom-search/v1/introduction#identify_your_application_to_google_with_api_key),
  then [setup a Google Custom Search Engine (CSE) and get the CSE ID](https://developers.google.com/custom-search/docs/tutorial/creatingcse).
  (Documentation for these can be challenging, we suggest asking GPT4 for a step-by-step guide.)
  After obtaining these credentials, store them as values of 
  `GOOGLE_API_KEY` and `GOOGLE_CSE_ID` in your `.env` file. 
  Full documentation on using this (and other such "stateless" tools) is coming soon, but 
  in the meantime take a peek at this [chat example](https://github.com/langroid/langroid/blob/main/examples/basic/chat-search.py), which 
  shows how you can easily equip an Agent with a `GoogleSearchtool`.
  


If you add all of these optional variables, your `.env` file should look like this:
```bash
OPENAI_API_KEY=your-key-here-without-quotes
GITHUB_ACCESS_TOKEN=your-personal-access-token-no-quotes
CACHE_TYPE=redis # or momento
REDIS_PASSWORD=your-redis-password-no-quotes
REDIS_HOST=your-redis-hostname-no-quotes
REDIS_PORT=your-redis-port-no-quotes
MOMENTO_AUTH_TOKEN=your-momento-token-no-quotes # instead of REDIS* variables
QDRANT_API_KEY=your-key
QDRANT_API_URL=https://your.url.here:6333 # note port number must be included
GOOGLE_API_KEY=your-key
GOOGLE_CSE_ID=your-cse-id
```
</details>

<details>
<summary><b>Optional setup instructions for Microsoft Azure OpenAI(click to expand)</b></summary> 

When using Azure OpenAI, additional environment variables are required in the 
`.env` file.
This page [Microsoft Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart?tabs=command-line&pivots=programming-language-python#environment-variables)
provides more information, and you can set each environment variable as follows:

- `AZURE_OPENAI_API_KEY`, from the value of `API_KEY`
- `AZURE_OPENAI_API_BASE` from the value of `ENDPOINT`, typically looks like `https://your.domain.azure.com`.
- For `AZURE_OPENAI_API_VERSION`, you can use the default value in `.env-template`, and latest version can be found [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/whats-new#azure-openai-chat-completion-general-availability-ga)
- `AZURE_OPENAI_DEPLOYMENT_NAME` is the name of the deployed model, which is defined by the user during the model setup 
- `AZURE_OPENAI_MODEL_NAME` Azure OpenAI allows specific model names when you select the model for your deployment. You need to put precisly the exact model name that was selected. For example, GPT-4 (should be `gpt-4-32k` or `gpt-4`).
- `AZURE_OPENAI_MODEL_VERSION` is required if `AZURE_OPENAI_MODEL_NAME = gpt=4`, which will assist Langroid to determine the cost of the model  
</details>

---

# 🐳 Docker Instructions

We provide a containerized version of the [`langroid-examples`](https://github.com/langroid/langroid-examples) 
repository via this [Docker Image](https://hub.docker.com/r/langroid/langroid).
All you need to do is set up environment variables in the `.env` file.
Please follow these steps to setup the container:

```bash
# get the .env file template from `langroid` repo
wget -O .env https://raw.githubusercontent.com/langroid/langroid/main/.env-template

# Edit the .env file with your favorite editor (here nano), and remove any un-used settings. E.g. there are "dummy" values like "your-redis-port" etc -- if you are not using them, you MUST remove them.
nano .env

# launch the container (the appropriate image for your architecture will be pulled automatically)
docker run -it --rm  -v ./.env:/langroid/.env langroid/langroid:latest

# Use this command to run any of the scripts in the `examples` directory
python examples/<Path/To/Example.py> 
``` 



# 🎉 Usage Examples

These are quick teasers to give a glimpse of what you can do with Langroid
and how your code would look. 

⚠️ The code snippets below are intended to give a flavor of the code
and they are **not** complete runnable examples! For that we encourage you to 
consult the [`langroid-examples`](https://github.com/langroid/langroid-examples) 
repository.

ℹ️
The various LLM prompts and instructions in Langroid
have been tested to work well with GPT-4 (and to some extent GPT-4o).
Switching to other LLMs (local/open and proprietary) is easy (see guides mentioned above),
and may suffice for some applications, but in general you may see inferior results
unless you adjust the prompts and/or the multi-agent setup.


📖 Also see the
[`Getting Started Guide`](https://langroid.github.io/langroid/quick-start/)
for a detailed tutorial.



Click to expand any of the code examples below.
All of these can be run in a Colab notebook:
[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langroid/langroid/blob/main/examples/Langroid_quick_start.ipynb)

<details>
<summary> <b> Direct interaction with LLM </b> </summary>

```python
import langroid.language_models as lm

mdl = lm.OpenAIGPT(
    lm.OpenAIGPTConfig(
        chat_model=lm.OpenAIChatModel.GPT4o, # or, e.g.  "ollama/qwen2.5"
    ),
)

messages = [
  lm.LLMMessage(content="You are a helpful assistant",  role=lm.Role.SYSTEM), 
  lm.LLMMessage(content="What is the capital of Ontario?",  role=lm.Role.USER),
]

response = mdl.chat(messages, max_tokens=200)
print(response.message)
```
See the guides to use
([local/open LLMs](https://langroid.github.io/langroid/tutorials/local-llm-setup/) or [remote/commercial LLMs](https://langroid.github.io/langroid/tutorials/non-openai-llms/)).
</details>

<details>
<summary> <b> Interaction with non-OpenAI LLM (local or remote) </b> </summary>
Local model: if model is served at `http://localhost:8000`:

```python
cfg = lm.OpenAIGPTConfig(
  chat_model="local/localhost:8000", 
  chat_context_length=4096
)
mdl = lm.OpenAIGPT(cfg)
# now interact with it as above, or create an Agent + Task as shown below.
```
</details>

<details>
<summary> <b> Define an agent, set up a task, and run it </b> </summary>

```python
import langroid as lr

agent = lr.ChatAgent()

# get response from agent's LLM, and put this in an interactive loop...
# answer = agent.llm_response("What is the capital of Ontario?")
  # ... OR instead, set up a task (which has a built-in loop) and run it
task = lr.Task(agent, name="Bot") 
task.run() # ... a loop seeking response from LLM or User at each turn
```
</details>

<details>
<summary><b> Three communicating agents </b></summary>

A toy numbers game, where when given a number `n`:
- `repeater_task`'s LLM simply returns `n`,
- `even_task`'s LLM returns `n/2` if `n` is even, else says "DO-NOT-KNOW"
- `odd_task`'s LLM returns `3*n+1` if `n` is odd, else says "DO-NOT-KNOW"

Each of these `Task`s automatically configures a default `ChatAgent`.

```python
import langroid as lr
from langroid.utils.constants import NO_ANSWER

repeater_task = lr.Task(
    name = "Repeater",
    system_message="""
    Your job is to repeat whatever number you receive.
    """,
    llm_delegate=True, # LLM takes charge of task
    single_round=False, 
)

even_task = lr.Task(
    name = "EvenHandler",
    system_message=f"""
    You will be given a number. 
    If it is even, divide by 2 and say the result, nothing else.
    If it is odd, say {NO_ANSWER}
    """,
    single_round=True,  # task done after 1 step() with valid response
)

odd_task = lr.Task(
    name = "OddHandler",
    system_message=f"""
    You will be given a number n. 
    If it is odd, return (n*3+1), say nothing else. 
    If it is even, say {NO_ANSWER}
    """,
    single_round=True,  # task done after 1 step() with valid response
)
```
Then add the `even_task` and `odd_task` as sub-tasks of `repeater_task`, 
and run the `repeater_task`, kicking it off with a number as input:
```python
repeater_task.add_sub_task([even_task, odd_task])
repeater_task.run("3")
```

</details>

<details>
<summary><b> Simple Tool/Function-calling example </b></summary>

Langroid leverages Pydantic to support OpenAI's
[Function-calling API](https://platform.openai.com/docs/guides/gpt/function-calling)
as well as its own native tools. The benefits are that you don't have to write
any JSON to specify the schema, and also if the LLM hallucinates a malformed
tool syntax, Langroid sends the Pydantic validation error (suitably sanitized) 
to the LLM so it can fix it!

Simple example: Say the agent has a secret list of numbers, 
and we want the LLM to find the smallest number in the list. 
We want to give the LLM a `probe` tool/function which takes a
single number `n` as argument. The tool handler method in the agent
returns how many numbers in its list are at most `n`.

First define the tool using Langroid's `ToolMessage` class:


```python
import langroid as lr

class ProbeTool(lr.agent.ToolMessage):
  request: str = "probe" # specifies which agent method handles this tool
  purpose: str = """
        To find how many numbers in my list are less than or equal to  
        the <number> you specify.
        """ # description used to instruct the LLM on when/how to use the tool
  number: int  # required argument to the tool
```

Then define a `SpyGameAgent` as a subclass of `ChatAgent`, 
with a method `probe` that handles this tool:

```python
class SpyGameAgent(lr.ChatAgent):
  def __init__(self, config: lr.ChatAgentConfig):
    super().__init__(config)
    self.numbers = [3, 4, 8, 11, 15, 25, 40, 80, 90]

  def probe(self, msg: ProbeTool) -> str:
    # return how many numbers in self.numbers are less or equal to msg.number
    return str(len([n for n in self.numbers if n <= msg.number]))
```

We then instantiate the agent and enable it to use and respond to the tool:

```python
spy_game_agent = SpyGameAgent(
    lr.ChatAgentConfig(
        name="Spy",
        vecdb=None,
        use_tools=False, #  don't use Langroid native tool
        use_functions_api=True, # use OpenAI function-call API
    )
)
spy_game_agent.enable_message(ProbeTool)
```

For a full working example see the
[chat-agent-tool.py](https://github.com/langroid/langroid-examples/blob/main/examples/quick-start/chat-agent-tool.py)
script in the `langroid-examples` repo.
</details>

<details>
<summary> <b>Tool/Function-calling to extract structured information from text </b> </summary>

Suppose you want an agent to extract 
the key terms of a lease, from a lease document, as a nested JSON structure.
First define the desired structure via Pydantic models:

```python
from pydantic import BaseModel
class LeasePeriod(BaseModel):
    start_date: str
    end_date: str


class LeaseFinancials(BaseModel):
    monthly_rent: str
    deposit: str

class Lease(BaseModel):
    period: LeasePeriod
    financials: LeaseFinancials
    address: str
```

Then define the `LeaseMessage` tool as a subclass of Langroid's `ToolMessage`.
Note the tool has a required argument `terms` of type `Lease`:

```python
import langroid as lr

class LeaseMessage(lr.agent.ToolMessage):
    request: str = "lease_info"
    purpose: str = """
        Collect information about a Commercial Lease.
        """
    terms: Lease
```

Then define a `LeaseExtractorAgent` with a method `lease_info` that handles this tool,
instantiate the agent, and enable it to use and respond to this tool:

```python
class LeaseExtractorAgent(lr.ChatAgent):
    def lease_info(self, message: LeaseMessage) -> str:
        print(
            f"""
        DONE! Successfully extracted Lease Info:
        {message.terms}
        """
        )
        return json.dumps(message.terms.dict())
    
lease_extractor_agent = LeaseExtractorAgent()
lease_extractor_agent.enable_message(LeaseMessage)
```

See the [`chat_multi_extract.py`](https://github.com/langroid/langroid-examples/blob/main/examples/docqa/chat_multi_extract.py)
script in the `langroid-examples` repo for a full working example.
</details>

<details>
<summary><b> Chat with documents (file paths, URLs, etc) </b></summary>

Langroid provides a specialized agent class `DocChatAgent` for this purpose.
It incorporates document sharding, embedding, storage in a vector-DB, 
and retrieval-augmented query-answer generation.
Using this class to chat with a collection of documents is easy.
First create a `DocChatAgentConfig` instance, with a 
`doc_paths` field that specifies the documents to chat with.

```python
import langroid as lr
from langroid.agent.special import DocChatAgentConfig, DocChatAgent

config = DocChatAgentConfig(
  doc_paths = [
    "https://en.wikipedia.org/wiki/Language_model",
    "https://en.wikipedia.org/wiki/N-gram_language_model",
    "/path/to/my/notes-on-language-models.txt",
  ],
  vecdb=lr.vector_store.QdrantDBConfig(),
)
```

Then instantiate the `DocChatAgent` (this ingests the docs into the vector-store):

```python
agent = DocChatAgent(config)
```
Then we can either ask the agent one-off questions,
```python
agent.llm_response("What is a language model?")
```
or wrap it in a `Task` and run an interactive loop with the user:
```python
task = lr.Task(agent)
task.run()
```

See full working scripts in the 
[`docqa`](https://github.com/langroid/langroid-examples/tree/main/examples/docqa)
folder of the `langroid-examples` repo.
</details>

<details>
<summary><b> 🔥 Chat with tabular data (file paths, URLs, dataframes) </b></summary>

Using Langroid you can set up a `TableChatAgent` with a dataset (file path, URL or dataframe),
and query it. The Agent's LLM generates Pandas code to answer the query, 
via function-calling (or tool/plugin), and the Agent's function-handling method
executes the code and returns the answer.

Here is how you can do this:

```python
import langroid as lr
from langroid.agent.special import TableChatAgent, TableChatAgentConfig
```

Set up a `TableChatAgent` for a data file, URL or dataframe
(Ensure the data table has a header row; the delimiter/separator is auto-detected):
```python
dataset =  "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
# or dataset = "/path/to/my/data.csv"
# or dataset = pd.read_csv("/path/to/my/data.csv")
agent = TableChatAgent(
    config=TableChatAgentConfig(
        data=dataset,
    )
)
```
Set up a task, and ask one-off questions like this: 

```python
task = lr.Task(
  agent, 
  name = "DataAssistant",
  default_human_response="", # to avoid waiting for user input
)
result = task.run(
  "What is the average alcohol content of wines with a quality rating above 7?",
  turns=2 # return after user question, LLM fun-call/tool response, Agent code-exec result
) 
print(result.content)
```
Or alternatively, set up a task and run it in an interactive loop with the user:

```python
task = lr.Task(agent, name="DataAssistant")
task.run()
``` 

For a full working example see the 
[`table_chat.py`](https://github.com/langroid/langroid-examples/tree/main/examples/data-qa/table_chat.py)
script in the `langroid-examples` repo.


</details>

---

# ❤️ Thank you to our [supporters](https://github.com/langroid/langroid/stargazers)

If you like this project, please give it a star ⭐ and 📢 spread the word in your network or social media:

[![Share on Twitter](https://img.shields.io/twitter/url?style=social&url=https://github.com/langroid/langroid)](https://twitter.com/intent/tweet?text=Langroid%20is%20a%20powerful,%20elegant%20new%20framework%20to%20easily%20build%20%23LLM%20applications.%20You%20set%20up%20LLM-powered%20Agents%20with%20vector-stores,%20assign%20tasks,%20and%20have%20them%20collaboratively%20solve%20problems%20via%20message-transformations.%20https://github.com/langroid/langroid)
[![Share on LinkedIn](https://img.shields.io/badge/Share%20on-LinkedIn-blue)](https://www.linkedin.com/shareArticle?mini=true&url=https://github.com/langroid/langroid&title=Langroid:%20A%20Powerful,%20Elegant%20Framework&summary=Langroid%20is%20a%20powerful,%20elegant%20new%20framework%20to%20easily%20build%20%23LLM%20applications.%20You%20set%20up%20LLM-powered%20Agents%20with%20vector-stores,%20assign%20tasks,%20and%20have%20them%20collaboratively%20solve%20problems%20via%20message-transformations.)
[![Share on Hacker News](https://img.shields.io/badge/-Share%20on%20Hacker%20News-orange)](https://news.ycombinator.com/submitlink?u=https%3A%2F%2Fgithub.com%2Flangroid%2Flangroid&t=Harness%20LLMs%20with%20Multi-Agent%20Programming)
[![Share on Reddit](https://img.shields.io/badge/-Share%20on%20Reddit-blue)](https://www.reddit.com/submit?url=https%3A%2F%2Fgithub.com%2Flangroid%2Flangroid&title=Harness%20LLMs%20with%20Multi-Agent%20Programming)

Your support will help build Langroid's momentum and community.

# Langroid Co-Founders

- [Prasad Chalasani](https://www.linkedin.com/in/pchalasani/) (IIT BTech/CS, CMU PhD/ML; Independent ML Consultant)
- [Somesh Jha](https://www.linkedin.com/in/somesh-jha-80208015/) (IIT BTech/CS, CMU PhD/CS; Professor of CS, U Wisc at Madison)
</file>

<file path="langroid/language_models/base.py">
logger = logging.getLogger(__name__)
⋮----
def noop_fn(*args: List[Any], **kwargs: Dict[str, Any]) -> None
⋮----
async def async_noop_fn(*args: List[Any], **kwargs: Dict[str, Any]) -> None
⋮----
FunctionCallTypes = Literal["none", "auto"]
ToolChoiceTypes = Literal["none", "auto", "required"]
ToolTypes = Literal["function"]
⋮----
DEFAULT_CONTEXT_LENGTH = 16_000
⋮----
class StreamEventType(Enum)
⋮----
TEXT = 1
FUNC_NAME = 2
FUNC_ARGS = 3
TOOL_NAME = 4
TOOL_ARGS = 5
REASONING = 6
⋮----
class RetryParams(BaseSettings)
⋮----
max_retries: int = 5
initial_delay: float = 1.0
exponential_base: float = 1.3
jitter: bool = True
⋮----
class LLMConfig(BaseSettings)
⋮----
"""
    Common configuration for all language models.
    """
⋮----
type: str = "openai"
streamer: Optional[Callable[[Any], None]] = noop_fn
streamer_async: Optional[Callable[..., Awaitable[None]]] = async_noop_fn
api_base: str | None = None
formatter: None | str = None
# specify None if you want to use the full max output tokens of the model
max_output_tokens: int | None = 8192
timeout: int = 20  # timeout for API requests
chat_model: str = ""
completion_model: str = ""
temperature: float = 0.0
chat_context_length: int | None = None
async_stream_quiet: bool = False  # suppress streaming output in async mode?
completion_context_length: int | None = None
# if input length + max_output_tokens > context length of model,
# we will try shortening requested output
min_output_tokens: int = 64
use_completion_for_chat: bool = False  # use completion model for chat?
# use chat model for completion? For OpenAI models, this MUST be set to True!
use_chat_for_completion: bool = True
stream: bool = True  # stream output from API?
# TODO: we could have a `stream_reasoning` flag here to control whether to show
# reasoning output from reasoning models
cache_config: None | CacheDBConfig = RedisCacheConfig()
thought_delimiters: Tuple[str, str] = ("<think>", "</think>")
retry_params: RetryParams = RetryParams()
⋮----
@property
    def model_max_output_tokens(self) -> int
⋮----
class LLMFunctionCall(BaseModel)
⋮----
"""
    Structure of LLM response indicating it "wants" to call a function.
    Modeled after OpenAI spec for `function_call` field in ChatCompletion API.
    """
⋮----
name: str  # name of function to call
arguments: Optional[Dict[str, Any]] = None
⋮----
@staticmethod
    def from_dict(message: Dict[str, Any]) -> "LLMFunctionCall"
⋮----
"""
        Initialize from dictionary.
        Args:
            d: dictionary containing fields to initialize
        """
fun_call = LLMFunctionCall(name=message["name"])
fun_args_str = message["arguments"]
# sometimes may be malformed with invalid indents,
# so we try to be safe by removing newlines.
⋮----
fun_args_str = fun_args_str.replace("\n", "").strip()
dict_or_list = parse_imperfect_json(fun_args_str)
⋮----
fun_args = dict_or_list
⋮----
fun_args = None
⋮----
def __str__(self) -> str
⋮----
class LLMFunctionSpec(BaseModel)
⋮----
"""
    Description of a function available for the LLM to use.
    To be used when calling the LLM `chat()` method with the `functions` parameter.
    Modeled after OpenAI spec for `functions` fields in ChatCompletion API.
    """
⋮----
name: str
description: str
parameters: Dict[str, Any]
⋮----
class OpenAIToolCall(BaseModel)
⋮----
"""
    Represents a single tool call in a list of tool calls generated by OpenAI LLM API.
    See https://platform.openai.com/docs/api-reference/chat/create

    Attributes:
        id: The id of the tool call.
        type: The type of the tool call;
            only "function" is currently possible (7/26/24).
        function: The function call.
    """
⋮----
id: str | None = None
type: ToolTypes = "function"
function: LLMFunctionCall | None = None
extra_content: Dict[str, Any] | None = None
⋮----
@staticmethod
    def from_dict(message: Dict[str, Any]) -> "OpenAIToolCall"
⋮----
id = message["id"]
type = message["type"]
function = LLMFunctionCall.from_dict(message["function"])
extra_content = message.get("extra_content")
⋮----
class OpenAIToolSpec(BaseModel)
⋮----
type: ToolTypes
strict: Optional[bool] = None
function: LLMFunctionSpec
⋮----
class OpenAIJsonSchemaSpec(BaseModel)
⋮----
def to_dict(self) -> Dict[str, Any]
⋮----
json_schema: Dict[str, Any] = {
⋮----
class LLMTokenUsage(BaseModel)
⋮----
"""
    Usage of tokens by an LLM.
    """
⋮----
prompt_tokens: int = 0
cached_tokens: int = 0
completion_tokens: int = 0
cost: float = 0.0
calls: int = 0  # how many API calls - not used as of 2025-04-04
⋮----
def reset(self) -> None
⋮----
@property
    def total_tokens(self) -> int
⋮----
class Role(str, Enum)
⋮----
"""
    Possible roles for a message in a chat.
    """
⋮----
USER = "user"
SYSTEM = "system"
ASSISTANT = "assistant"
FUNCTION = "function"
TOOL = "tool"
⋮----
class LLMMessage(BaseModel)
⋮----
"""
    Class representing an entry in the msg-history sent to the LLM API.
    It could be one of these:
    - a user message
    - an LLM ("Assistant") response
    - a fn-call or tool-call-list from an OpenAI-compatible LLM API response
    - a result or results from executing a fn or tool-call(s)
    """
⋮----
role: Role
name: Optional[str] = None
tool_call_id: Optional[str] = None  # which OpenAI LLM tool this is a response to
tool_id: str = ""  # used by OpenAIAssistant
content: str
files: List[FileAttachment] = []
function_call: Optional[LLMFunctionCall] = None
tool_calls: Optional[List[OpenAIToolCall]] = None
timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
# link to corresponding chat document, for provenance/rewind purposes
chat_document_id: str = ""
⋮----
def api_dict(self, model: str, has_system_role: bool = True) -> Dict[str, Any]
⋮----
"""
        Convert to dictionary for API request, keeping ONLY
        the fields that are expected in an API call!
        E.g., DROP the tool_id, since it is only for use in the Assistant API,
            not the completion API.

        Args:
            has_system_role: whether the message has a system role (if not,
                set to "user" role)
        Returns:
            dict: dictionary representation of LLM message
        """
d = self.model_dump()
files: List[FileAttachment] = d.pop("files")
⋮----
# In there are files, then content is an array of
# different content-parts
⋮----
# if there is a key k = "role" with value "system", change to "user"
# in case has_system_role is False
⋮----
# drop None values since API doesn't accept them
dict_no_none = {k: v for k, v in d.items() if v is not None}
⋮----
# OpenAI API does not like empty name
⋮----
# arguments must be a string
⋮----
# convert tool calls to API format
⋮----
# arguments must be a string
⋮----
# IMPORTANT! drop fields that are not expected in API call
⋮----
content = "FUNC: " + json.dumps(self.function_call)
⋮----
content = self.content
name_str = f" ({self.name})" if self.name else ""
⋮----
class LLMResponse(BaseModel)
⋮----
"""
    Class representing response from LLM.
    """
⋮----
message: str
reasoning: str = ""  # optional reasoning text from reasoning models
# Original message text including inline thought signatures (e.g.
# <thinking>...</thinking>). Only set when reasoning was extracted
# from the message text via get_reasoning_final(); NOT set when
# reasoning comes from a separate API field (e.g. reasoning_content),
# since in that case the message text never contained thought tags.
message_with_reasoning: Optional[str] = None
# TODO tool_id needs to generalize to multi-tool calls
⋮----
oai_tool_calls: Optional[List[OpenAIToolCall]] = None
⋮----
usage: Optional[LLMTokenUsage] = None
cached: bool = False
⋮----
def tools_content(self) -> str
⋮----
def to_LLMMessage(self) -> LLMMessage
⋮----
"""Convert LLM response to an LLMMessage, to be included in the
        message-list sent to the API.
        This is currently NOT used in any significant way in the library, and is only
        provided as a utility to construct a message list for the API when directly
        working with an LLM object.

        In a `ChatAgent`, an LLM response is first converted to a ChatDocument,
        which is in turn converted to an LLMMessage via `ChatDocument.to_LLMMessage()`
        See `ChatAgent._prep_llm_messages()` and `ChatAgent.llm_response_messages`
        """
⋮----
"""
        If `message` or `function_call` of an LLM response contains an explicit
        recipient name, return this recipient name and `message` stripped
        of the recipient name if specified.

        Two cases:
        (a) `message` contains addressing string ``TO[<name>]:<content>``, or
        (b) `message` is empty and function_call/tool_call with explicit `recipient`

        Args:
            recognize_recipient_in_content (bool): When True (default), parses
                message text for ``TO[<recipient>]:<content>`` patterns and
                top-level JSON ``{"recipient": "..."}`` fields. When False,
                only function_call/tool_call ``recipient`` fields are checked.

        Returns:
            (str): name of recipient, which may be empty string if no recipient
            (str): content of message

        """
⋮----
# in this case we ignore message, since all information is in function_call
msg = ""
args = self.function_call.arguments
recipient = ""
⋮----
recipient = args.get("recipient", "")
⋮----
msg = self.message
⋮----
# get the first tool that has a recipient field, if any
⋮----
recipient = tc.function.arguments.get(
⋮----
)  # type: ignore
⋮----
# It's not a function or tool call, so continue looking to see
# if a recipient is specified in the message.
⋮----
# First check if message contains "TO: <recipient> <content>"
⋮----
# check if there is a top level json that specifies 'recipient',
# and retain the entire message as content.
⋮----
recipient_name = top_level_json_field(msg, "recipient") if msg else ""
content = msg
⋮----
# Define an abstract base class for language models
class LanguageModel(ABC)
⋮----
"""
    Abstract base class for language models.
    """
⋮----
# usage cost by model, accumulates here
usage_cost_dict: Dict[str, LLMTokenUsage] = {}
⋮----
def __init__(self, config: LLMConfig = LLMConfig())
⋮----
@staticmethod
    def create(config: Optional[LLMConfig]) -> Optional["LanguageModel"]
⋮----
"""
        Create a language model.
        Args:
            config: configuration for language model
        Returns: instance of language model
        """
⋮----
openai: Union[Type[AzureGPT], Type[OpenAIGPT]]
⋮----
openai = AzureGPT
⋮----
openai = OpenAIGPT
cls = dict(
return cls(config)  # type: ignore
⋮----
@staticmethod
    def user_assistant_pairs(lst: List[str]) -> List[Tuple[str, str]]
⋮----
"""
        Given an even-length sequence of strings, split into a sequence of pairs

        Args:
            lst (List[str]): sequence of strings

        Returns:
            List[Tuple[str,str]]: sequence of pairs of strings
        """
evens = lst[::2]
odds = lst[1::2]
⋮----
"""
        From the chat history, extract system prompt, user-assistant turns, and
        final user msg.

        Args:
            messages (List[LLMMessage]): List of messages in the chat history

        Returns:
            Tuple[str, List[Tuple[str,str]], str]:
                system prompt, user-assistant turns, final user msg

        """
# Handle various degenerate cases
messages = [m for m in messages]  # copy
DUMMY_SYS_PROMPT = "You are a helpful assistant."
DUMMY_USER_PROMPT = "Follow the instructions above."
⋮----
system_prompt = messages[0].content
⋮----
# now we have messages = [Sys,...]
⋮----
# now we have messages = [Sys, msg, ...]
⋮----
# now we have messages = [Sys, user, ...]
⋮----
# now we have messages = [Sys, user, ..., user]
# so we omit the first and last elements and make pairs of user-asst messages
conversation = [m.content for m in messages[1:-1]]
user_prompt = messages[-1].content
pairs = LanguageModel.user_assistant_pairs(conversation)
⋮----
@abstractmethod
    def set_stream(self, stream: bool) -> bool
⋮----
"""Enable or disable streaming output from API.
        Return previous value of stream."""
⋮----
@abstractmethod
    def get_stream(self) -> bool
⋮----
"""Get streaming status"""
⋮----
@abstractmethod
    def generate(self, prompt: str, max_tokens: int = 200) -> LLMResponse
⋮----
@abstractmethod
    async def agenerate(self, prompt: str, max_tokens: int = 200) -> LLMResponse
⋮----
"""
        Get chat-completion response from LLM.

        Args:
            messages: message-history to send to the LLM
            max_tokens: max tokens to generate
            tools: tools available for the LLM to use in its response
            tool_choice: tool call mode, one of "none", "auto", "required",
                or a dict specifying a specific tool.
            functions: functions available for LLM to call (deprecated)
            function_call: function calling mode, "auto", "none", or a specific fn
                    (deprecated)
        """
⋮----
"""Async version of `chat`. See `chat` for details."""
⋮----
def __call__(self, prompt: str, max_tokens: int) -> LLMResponse
⋮----
@staticmethod
    def _fallback_model_names(model: str) -> List[str]
⋮----
parts = model.split("/")
fallbacks = []
⋮----
def info(self) -> ModelInfo
⋮----
"""Info of relevant chat model"""
orig_model = (
⋮----
def completion_info(self) -> ModelInfo
⋮----
"""Info of relevant completion model"""
⋮----
def supports_functions_or_tools(self) -> bool
⋮----
"""
        Does this Model's API support "native" tool-calling, i.e.
        can we call the API with arguments that contain a list of available tools,
        and their schemas?
        Note that, given the plethora of LLM provider APIs this determination is
        imperfect at best, and leans towards returning True.
        When the API calls fails with an error indicating tools are not supported,
        then users are encouraged to use the Langroid-based prompt-based
        ToolMessage mechanism, which works with ANY LLM. To enable this,
        in your ChatAgentConfig, set `use_functions_api=False`, and `use_tools=True`.
        """
⋮----
def chat_context_length(self) -> int
⋮----
def completion_context_length(self) -> int
⋮----
def chat_cost(self) -> Tuple[float, float, float]
⋮----
"""
        Return the cost per 1000 tokens for chat completions.

        Returns:
            Tuple[float, float, float]: (input_cost, cached_cost, output_cost)
                per 1000 tokens
        """
⋮----
def reset_usage_cost(self) -> None
⋮----
counter = self.usage_cost_dict[mdl]
⋮----
"""
        Update usage cost for this LLM.
        Args:
            chat (bool): whether to update for chat or completion model
            prompts (int): number of tokens used for prompts
            completions (int): number of tokens used for completions
            cost (float): total token cost in USD
        """
mdl = self.config.chat_model if chat else self.config.completion_model
⋮----
@classmethod
    def usage_cost_summary(cls) -> str
⋮----
s = ""
⋮----
@classmethod
    def tot_tokens_cost(cls) -> Tuple[int, float]
⋮----
"""
        Return total tokens used and total cost across all models.
        """
total_tokens = 0
total_cost = 0.0
⋮----
def get_reasoning_final(self, message: str) -> Tuple[str, str]
⋮----
"""Extract "reasoning" and "final answer" from an LLM response, if the
        reasoning is found within configured delimiters, like <think>, </think>.
        E.g.,
        '<think> Okay, let's see, the user wants... </think> 2 + 3 = 5'

        Args:
            message (str): message from LLM

        Returns:
            Tuple[str, str]: reasoning, final answer
        """
⋮----
parts = message.split(start)
⋮----
"""
        Given a chat history and a question, convert it to a standalone question.
        Args:
            chat_history: list of tuples of (question, answer)
            query: follow-up question

        Returns: standalone version of the question
        """
history = collate_chat_history(chat_history)
⋮----
prompt = f"""
⋮----
follow_up_question = f"""
⋮----
standalone = self.chat(
⋮----
class StreamingIfAllowed
⋮----
"""Context to temporarily enable or disable streaming, if allowed globally via
    `settings.stream`"""
⋮----
def __init__(self, llm: LanguageModel, stream: bool = True)
⋮----
def __enter__(self) -> None
⋮----
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None
</file>

<file path="langroid/language_models/model_info.py">
logger = logging.getLogger(__name__)
⋮----
class ModelProvider(str, Enum)
⋮----
"""Enum for model providers"""
⋮----
OPENAI = "openai"
ANTHROPIC = "anthropic"
DEEPSEEK = "deepseek"
GOOGLE = "google"
UNKNOWN = "unknown"
⋮----
class ModelName(str, Enum)
⋮----
"""Parent class for all model name enums"""
⋮----
class OpenAIChatModel(ModelName)
⋮----
"""Enum for OpenAI Chat models"""
⋮----
GPT3_5_TURBO = "gpt-3.5-turbo"
GPT4 = "gpt-4o"  # avoid deprecated gpt-4
GPT4_TURBO = "gpt-4-turbo"
GPT4o = "gpt-4o"
GPT4o_MINI = "gpt-4o-mini"
O1 = "o1"
O1_MINI = "o1-mini"
O3_MINI = "o3-mini"
O3 = "o3"
O4_MINI = "o4-mini"
GPT4_1 = "gpt-4.1"
GPT4_1_MINI = "gpt-4.1-mini"
GPT4_1_NANO = "gpt-4.1-nano"
GPT5 = "gpt-5"
GPT5_MINI = "gpt-5-mini"
GPT5_NANO = "gpt-5-nano"
GPT5_PRO = "gpt-5-pro"
GPT5_1 = "gpt-5.1"
GPT5_1_CODEX = "gpt-5.1-codex"
GPT5_1_CODEX_MINI = "gpt-5.1-codex-mini"
GPT5_1_CHAT = "gpt-5.1-chat"
GPT5_2 = "gpt-5.2"
GPT5_2_PRO = "gpt-5.2-pro"
GPT5_2_CHAT = "gpt-5.2-chat"
GPT_OSS_120b = "gpt-oss-120b"
GPT_OSS_20b = "gpt-oss-20b"
⋮----
class OpenAICompletionModel(str, Enum)
⋮----
"""Enum for OpenAI Completion models"""
⋮----
DAVINCI = "davinci-002"
BABBAGE = "babbage-002"
⋮----
class AnthropicModel(ModelName)
⋮----
"""Enum for Anthropic models"""
⋮----
CLAUDE_3_OPUS = "claude-3-opus-latest"
CLAUDE_3_SONNET = "claude-3-sonnet-latest"
CLAUDE_3_HAIKU = "claude-3-haiku-latest"
CLAUDE_3_5_SONNET = "claude-3-5-sonnet-latest"
CLAUDE_3_7_SONNET = "claude-3-7-sonnet-latest"
CLAUDE_4_OPUS = "claude-opus-4"
CLAUDE_4_SONNET = "claude-sonnet-4"
CLAUDE_4_HAIKU = "claude-haiku-4"
CLAUDE_4_5_OPUS = "claude-opus-4-5"
CLAUDE_4_5_SONNET = "claude-sonnet-4-5"
CLAUDE_4_5_HAIKU = "claude-haiku-4-5"
⋮----
class DeepSeekModel(ModelName)
⋮----
"""Enum for DeepSeek models direct from DeepSeek API"""
⋮----
DEEPSEEK = "deepseek/deepseek-chat"
DEEPSEEK_R1 = "deepseek/deepseek-reasoner"
OPENROUTER_DEEPSEEK_R1 = "openrouter/deepseek/deepseek-r1"
⋮----
class GeminiModel(ModelName)
⋮----
"""Enum for Gemini models"""
⋮----
GEMINI_1_5_FLASH = "gemini-1.5-flash"
GEMINI_1_5_FLASH_8B = "gemini-1.5-flash-8b"
GEMINI_1_5_PRO = "gemini-1.5-pro"
GEMINI_2_FLASH = "gemini-2.0-flash"
GEMINI_2_FLASH_LITE = "gemini-2.0-flash-lite"
GEMINI_2_FLASH_THINKING = "gemini-2.0-flash-thinking-exp"
GEMINI_2_PRO = "gemini-2.0-pro-exp-02-05"
GEMINI_2_5_FLASH = "gemini-2.5-flash"
GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite"
GEMINI_2_5_PRO = "gemini-2.5-pro"
GEMINI_3_FLASH = "gemini-3-flash"
GEMINI_3_PRO = "gemini-3-pro"
⋮----
class OpenAI_API_ParamInfo(BaseModel)
⋮----
"""
    Parameters exclusive to some models, when using OpenAI API
    """
⋮----
# model-specific params at top level
params: Dict[str, List[str]] = dict(
# model-specific params in extra_body
extra_parameters: Dict[str, List[str]] = dict(
⋮----
class ModelInfo(BaseModel)
⋮----
"""
    Consolidated information about LLM, related to capacity, cost and API
    idiosyncrasies. Reasonable defaults for all params in case there's no
    specific info available.
    """
⋮----
name: str = "unknown"
provider: ModelProvider = ModelProvider.UNKNOWN
context_length: int = 16_000
max_cot_tokens: int = 0  # max chain of thought (thinking) tokens where applicable
max_output_tokens: int = 8192  # Maximum number of output tokens - model dependent
input_cost_per_million: float = 0.0  # Cost in USD per million input tokens
cached_cost_per_million: float = 0.0  # Cost in USD per million cached tokens
output_cost_per_million: float = 0.0  # Cost in USD per million output tokens
allows_streaming: bool = True  # Whether model supports streaming output
allows_system_message: bool = True  # Whether model supports system messages
rename_params: Dict[str, str] = {}  # Rename parameters for OpenAI API
unsupported_params: List[str] = []
has_structured_output: bool = False  # Does model API support structured output?
has_tools: bool = True  # Does model API support tools/function-calling?
needs_first_user_message: bool = False  # Does API need first msg to be from user?
description: Optional[str] = None
⋮----
GEMINI_CANONICAL_MODEL_NAMES = {model.value for model in GeminiModel}
DEFAULT_MODEL_INFO = ModelInfo()
WARNED_UNKNOWN_MODELS: set[tuple[str, ...]] = set()
⋮----
# Model information registry
MODEL_INFO: Dict[str, ModelInfo] = {
⋮----
# OpenAI Models
⋮----
# Anthropic Models
⋮----
# DeepSeek Models
⋮----
# Gemini Models
⋮----
# Gemini 2.5 Models
⋮----
# Gemini 3 Models
⋮----
"""Get model information by name or enum value"""
# Sequence of models to try, starting with the primary model
models_to_try = [model] + fallback_models
⋮----
# Find the first model in the sequence that has info defined using next()
# on a generator expression that filters out None results from _get_model_info
found_info = next(
⋮----
None,  # Default value if the iterator is exhausted (no valid info found)
⋮----
normalized_models = _normalize_model_names(models_to_try)
⋮----
def _get_model_info(model: str | ModelName) -> ModelInfo | None
⋮----
def _normalize_model_names(models: List[str | ModelName]) -> List[str]
⋮----
normalized_models: List[str] = []
seen: set[str] = set()
⋮----
normalized_model = _normalize_gemini_model_name(_model_name(model))
⋮----
def _normalize_gemini_model_name(model: str) -> str | None
⋮----
base_model = model.rsplit("/", 1)[-1]
⋮----
preview_base = base_model.split("-preview", maxsplit=1)[0]
⋮----
def _warn_unknown_model(models: List[str | ModelName]) -> None
⋮----
model_names = tuple(_model_name(model) for model in models)
⋮----
def _model_name(model: str | ModelName) -> str
</file>

<file path="langroid/agent/chat_agent.py">
console = Console()
⋮----
logger = logging.getLogger(__name__)
⋮----
class ChatAgentConfig(AgentConfig)
⋮----
"""
    Configuration for ChatAgent

    Attributes:
        system_message: system message to include in message sequence
             (typically defines role and task of agent).
             Used only if `task` is not specified in the constructor.
        user_message: user message to include in message sequence.
             Used only if `task` is not specified in the constructor.
        use_tools: whether to use our own ToolMessages mechanism
        handle_llm_no_tool (Any): desired agent_response when
            LLM generates non-tool msg.
        use_functions_api: whether to use functions/tools native to the LLM API
                (e.g. OpenAI's `function_call` or `tool_call` mechanism)
        use_tools_api: When `use_functions_api` is True, if this is also True,
            the OpenAI tool-call API is used, rather than the older/deprecated
            function-call API. However the tool-call API has some tricky aspects,
            hence we set this to False by default.
        strict_recovery: whether to enable strict schema recovery when there
            is a tool-generation error.
        enable_orchestration_tool_handling: whether to enable handling of orchestration
            tools, e.g. ForwardTool, DoneTool, PassTool, etc.
        output_format: When supported by the LLM (certain OpenAI LLMs
            and local LLMs served by providers such as vLLM), ensures
            that the output is a JSON matching the corresponding
            schema via grammar-based decoding
        handle_output_format: When `output_format` is a `ToolMessage` T,
            controls whether T is "enabled for handling".
        use_output_format: When `output_format` is a `ToolMessage` T,
            controls whether T is "enabled for use" (by LLM) and
            instructions on using T are added to the system message.
        instructions_output_format: Controls whether we generate instructions for
            `output_format` in the system message.
        use_tools_on_output_format: Controls whether to automatically switch
            to the Langroid-native tools mechanism when `output_format` is set.
            Note that LLMs may generate tool calls which do not belong to
            `output_format` even when strict JSON mode is enabled, so this should be
            enabled when such tool calls are not desired.
        output_format_include_defaults: Whether to include fields with default arguments
            in the output schema
        full_citations: Whether to show source reference citation + content for each
            citation, or just the main reference citation.
        search_for_tools_everywhere: Whether to search for tools everywhere,
            or only in specific LLM response elements based on use_tools /
            use_functions_api / use_tools_api config settings.
        recognize_recipient_in_content: Whether to parse LLM response text content
            for recipient routing patterns, specifically:
            - ``TO[<recipient>]:<content>`` addressing format, and
            - JSON ``{"recipient": "<name>"}`` at the top level of the message.
            When False, only structured routing via function_call/tool_call
            ``recipient`` fields is recognized. Default is True.
            Note: this is distinct from ``TaskConfig.recognize_string_signals``,
            which controls Task-level signals like DONE, PASS, and SEND_TO.
            To fully disable all text-based routing, set both to False.
        context_overflow_strategy: Strategy for handling context overflow when
            message history exceeds model context length. Options:
            - "truncate": Truncate content of early messages (preserves all messages
              but with shortened content). This maintains the message sequence.
            - "drop_turns": Drop complete conversation turns (USER + all responses
              until next USER). More aggressive but cleaner for voice agents.
            Default is "truncate" for backward compatibility.
    """
⋮----
system_message: str = "You are a helpful assistant."
user_message: Optional[str] = None
handle_llm_no_tool: Any = None
use_tools: bool = True
use_functions_api: bool = False
use_tools_api: bool = True
strict_recovery: bool = True
enable_orchestration_tool_handling: bool = True
output_format: Optional[type] = None
handle_output_format: bool = True
use_output_format: bool = True
instructions_output_format: bool = True
output_format_include_defaults: bool = True
use_tools_on_output_format: bool = True
full_citations: bool = True  # show source + content for each citation?
search_for_tools_everywhere: bool = True
recognize_recipient_in_content: bool = True
context_overflow_strategy: Literal["truncate", "drop_turns"] = "truncate"
⋮----
def _set_fn_or_tools(self) -> None
⋮----
"""
        Enable Langroid Tool or OpenAI-like fn-calling,
        depending on config settings.
        """
⋮----
class ChatAgent(Agent)
⋮----
"""
    Chat Agent interacting with external env
    (could be human, or external tools).
    The agent (the LLM actually) is provided with an optional "Task Spec",
    which is a sequence of `LLMMessage`s. These are used to initialize
    the `task_messages` of the agent.
    In most applications we will use a `ChatAgent` rather than a bare `Agent`.
    The `Agent` class mainly exists to hold various common methods and attributes.
    One difference between `ChatAgent` and `Agent` is that `ChatAgent`'s
    `llm_response` method uses "chat mode" API (i.e. one that takes a
    message sequence rather than a single message),
    whereas the same method in the `Agent` class uses "completion mode" API (i.e. one
    that takes a single message).
    """
⋮----
"""
        Chat-mode agent initialized with task spec as the initial message sequence
        Args:
            config: settings for the agent

        """
⋮----
# An agent's "task" is defined by a system msg and an optional user msg;
# These are "priming" messages that kick off the agent's conversation.
⋮----
# if task contains a system msg, we override the config system msg
⋮----
# if task contains a user msg, we override the config user msg
⋮----
# system-level instructions for using tools/functions:
# We maintain these as tools/functions are enabled/disabled,
# and whenever an LLM response is sought, these are used to
# recreate the system message (via `_create_system_and_tools_message`)
# each time, so it reflects the current set of enabled tools/functions.
# (a) these are general instructions on using certain tools/functions,
#   if they are specified in a ToolMessage class as a classmethod `instructions`
⋮----
# (b) these are only for the builtin in Langroid TOOLS mechanism:
⋮----
# This variable is not None and equals a `ToolMessage` T, if and only if:
# (a) T has been set as the output_format of this agent, AND
# (b) T has been "enabled for use" ONLY for enforcing this output format, AND
# (c) T has NOT been explicitly "enabled for use" by this Agent.
⋮----
# As above but deals with "enabled for handling" instead of "enabled for use".
⋮----
# instructions specifically related to enforcing `output_format`
⋮----
# controls whether to disable strict schemas for this agent if
# strict mode causes exception
⋮----
# Tracks whether any strict tool is enabled; used to determine whether to set
# `self.disable_strict` on an exception
⋮----
# Tracks the set of tools on which we force-disable strict decoding
⋮----
# search for tools according to the agent configuration
⋮----
# Only enable HANDLING by `agent_response`, NOT LLM generation of these.
# This is useful where tool-handlers or agent_response generate these
# tools, and need to be handled.
# We don't want enable orch tool GENERATION by default, since that
# might clutter-up the LLM system message unnecessarily.
⋮----
def init_state(self) -> None
⋮----
"""
        Initialize the state of the agent. Just conversation state here,
        but subclasses can override this to initialize other state.
        """
⋮----
@staticmethod
    def from_id(id: str) -> "ChatAgent"
⋮----
"""
        Get an agent from its ID
        Args:
            agent_id (str): ID of the agent
        Returns:
            ChatAgent: The agent with the given ID
        """
⋮----
def clone(self, i: int = 0) -> "ChatAgent"
⋮----
"""Create i'th clone of this agent, ensuring tool use/handling is cloned.
        Important: We assume all member variables are in the __init__ method here
        and in the Agent class.
        TODO: We are attempting to clone an agent after its state has been
        changed in possibly many ways. Below is an imperfect solution. Caution advised.
        Revisit later.
        """
agent_cls = type(self)
# Use model_copy to preserve Pydantic subclass types (like MockLMConfig)
# instead of deepcopy which loses subclass information
config_copy = self.config.model_copy(deep=True)
⋮----
new_agent = agent_cls(config_copy)
⋮----
# Ensure each clone gets its own vecdb client when supported.
⋮----
def _clone_extra_state(self, new_agent: "ChatAgent") -> None
⋮----
"""Hook for subclasses to copy additional state into clones."""
⋮----
def _strict_mode_for_tool(self, tool: str | type[ToolMessage]) -> bool
⋮----
"""Should we enable strict mode for a given tool?"""
⋮----
tool_class = self.llm_tools_map[tool]
⋮----
tool_class = tool
name = tool_class.default_value("request")
⋮----
strict: Optional[bool] = tool_class.default_value("strict")
⋮----
strict = self._strict_tools_available()
⋮----
def _fn_call_available(self) -> bool
⋮----
"""Does this agent's LLM support function calling?"""
⋮----
def _strict_tools_available(self) -> bool
⋮----
"""Does this agent's LLM support strict tools?"""
⋮----
def _json_schema_available(self) -> bool
⋮----
"""Does this agent's LLM support strict JSON schema output format?"""
⋮----
def set_system_message(self, msg: str) -> None
⋮----
# if there is message history, update the system message in it
⋮----
def set_user_message(self, msg: str) -> None
⋮----
@property
    def task_messages(self) -> List[LLMMessage]
⋮----
"""
        The task messages are the initial messages that define the task
        of the agent. There will be at least a system message plus possibly a user msg.
        Returns:
            List[LLMMessage]: the task messages
        """
msgs = [self._create_system_and_tools_message()]
⋮----
def _drop_msg_update_tool_calls(self, msg: LLMMessage) -> None
⋮----
id2idx = {t.id: i for i, t in enumerate(self.oai_tool_calls)}
⋮----
# dropping tool result, so ADD the corresponding tool-call back
# to the list of pending calls!
id = msg.tool_call_id
⋮----
# dropping a msg with tool-calls, so DROP these from pending list
# as well as from id -> call map
⋮----
def clear_history(self, start: int = -2, end: int = -1) -> None
⋮----
"""
        Clear the message history, deleting  messages from index `start`,
        up to index `end`.

        Args:
            start (int): index of first message to delete; default = -2
                    (i.e. delete last 2 messages, typically these
                    are the last user and assistant messages)
            end (int): index of last message to delete; Default = -1
                    (i.e. delete all messages up to the last one)
        """
n = len(self.message_history)
⋮----
start = max(0, n + start)
end_ = n if end == -1 else end + 1
dropped = self.message_history[start:end_]
# consider the dropped msgs in REVERSE order, so we are
# carefully updating self.oai_tool_calls
⋮----
# clear out the chat document from the ObjectRegistry
⋮----
def update_history(self, message: str, response: str) -> None
⋮----
"""
        Update the message history with the latest user message and LLM response.
        Args:
            message (str): user message
            response: (str): LLM response
        """
⋮----
def tool_format_rules(self) -> str
⋮----
"""
        Specification of tool formatting rules
        (typically JSON-based but can be non-JSON, e.g. XMLToolMessage),
        based on the currently enabled usable `ToolMessage`s

        Returns:
            str: formatting rules
        """
# ONLY Usable tools (i.e. LLM-generation allowed),
usable_tool_classes: List[Type[ToolMessage]] = [
⋮----
format_instructions = "\n\n".join(
# if any of the enabled classes has json_group_instructions, then use that,
# else fall back to ToolMessage.json_group_instructions
⋮----
def tool_instructions(self) -> str
⋮----
"""
        Instructions for tools or function-calls, for enabled and usable Tools.
        These are inserted into system prompt regardless of whether we are using
        our own ToolMessage mechanism or the LLM's function-call mechanism.

        Returns:
            str: concatenation of instructions for all usable tools
        """
enabled_classes: List[Type[ToolMessage]] = list(self.llm_tools_map.values())
⋮----
instructions = []
⋮----
class_instructions = ""
⋮----
class_instructions = msg_cls.instructions()
⋮----
# example will be shown in tool_format_rules() when using TOOLs,
# so we don't need to show it here.
example = "" if self.config.use_tools else (msg_cls.usage_examples())
⋮----
example = "EXAMPLES:\n" + example
guidance = (
⋮----
instructions_str = "\n\n".join(instructions)
⋮----
def augment_system_message(self, message: str) -> None
⋮----
"""
        Augment the system message with the given message.
        Args:
            message (str): system message
        """
⋮----
def last_message_with_role(self, role: Role) -> LLMMessage | None
⋮----
"""from `message_history`, return the last message with role `role`"""
n_role_msgs = len([m for m in self.message_history if m.role == role])
⋮----
idx = self.nth_message_idx_with_role(role, n_role_msgs)
⋮----
def last_message_idx_with_role(self, role: Role) -> int
⋮----
"""Index of last message in message_history, with specified role.
        Return -1 if not found. Index = 0 is the first message in the history.
        """
indices_with_role = [
⋮----
def nth_message_idx_with_role(self, role: Role, n: int) -> int
⋮----
"""Index of `n`th message in message_history, with specified role.
        (n is assumed to be 1-based, i.e. 1 is the first message with that role).
        Return -1 if not found. Index = 0 is the first message in the history.
        """
⋮----
def update_last_message(self, message: str, role: str = Role.USER) -> None
⋮----
"""
        Update the last message that has role `role` in the message history.
        Useful when we want to replace a long user prompt, that may contain context
        documents plus a question, with just the question.
        Args:
            message (str): new message to replace with
            role (str): role of message to replace
        """
⋮----
# find last message in self.message_history with role `role`
⋮----
def delete_last_message(self, role: str = Role.USER) -> None
⋮----
"""
        Delete the last message that has role `role` from the message history.
        Args:
            role (str): role of message to delete
        """
⋮----
def _create_system_and_tools_message(self) -> LLMMessage
⋮----
"""
        (Re-)Create the system message for the LLM of the agent,
        taking into account any tool instructions that have been added
        after the agent was initialized.

        The system message will consist of:
        (a) the system message from the `task` arg in constructor, if any,
            otherwise the default system message from the config
        (b) the system tool instructions, if any
        (c) the system json tool instructions, if any

        Returns:
            LLMMessage object
        """
content = self.system_message
⋮----
# remove leading and trailing newlines and other whitespace
⋮----
def handle_message_fallback(self, msg: str | ChatDocument) -> Any
⋮----
"""
        Fallback method for the "no-tools" scenario, i.e., the current `msg`
        (presumably emitted by the LLM) does not have any tool that the agent
        can handle.
        NOTE: The `msg` may contain tools but either (a) the agent is not
        enabled to handle them, or (b) there's an explicit `recipient` field
        in the tool that doesn't match the agent's name.

        Uses the self.config.non_tool_routing to determine the action to take.

        This method can be overridden by subclasses, e.g.,
        to create a "reminder" message when a tool is expected but the LLM "forgot"
        to generate one.

        Args:
            msg (str | ChatDocument): The input msg to handle
        Returns:
            Any: The result of the handler method
        """
⋮----
# we ONLY use the `handle_llm_no_tool` config option when
# the msg is from LLM and does not contain ANY tools at all.
⋮----
no_tool_option = self.config.handle_llm_no_tool
⋮----
# in case the `no_tool_option` is one of the special NonToolAction vals
⋮----
# Otherwise just return `no_tool_option` as is:
# This can be any string, such as a specific nudge/reminder to the LLM,
# or even something like ResultTool etc.
⋮----
def unhandled_tools(self) -> set[str]
⋮----
"""The set of tools that are known but not handled.
        Useful in task flow: an agent can refuse to accept an incoming msg
        when it only has unhandled tools.
        """
⋮----
"""
        Add the tool (message class) to the agent, and enable either
        - tool USE (i.e. the LLM can generate JSON to use this tool),
        - tool HANDLING (i.e. the agent can handle JSON from this tool),

        Args:
            message_class: The ToolMessage class OR List of such classes to enable,
                for USE, or HANDLING, or both.
                If this is a list of ToolMessage classes, then the remain args are
                applied to all classes.
                Optional; if None, then apply the enabling to all tools in the
                agent's toolset that have been enabled so far.
            use: IF True, allow the agent (LLM) to use this tool (or all tools),
                else disallow
            handle: if True, allow the agent (LLM) to handle (i.e. respond to) this
                tool (or all tools)
            force: whether to FORCE the agent (LLM) to USE the specific
                 tool represented by `message_class`.
                 `force` is ignored if `message_class` is None.
            require_recipient: whether to require that recipient be specified
                when using the tool message (only applies if `use` is True).
            include_defaults: whether to include fields that have default values,
                in the "properties" section of the JSON format instructions.
                (Normally the OpenAI completion API ignores these fields,
                but the Assistant fn-calling seems to pay attn to these,
                and if we don't want this, we should set this to False.)
        """
⋮----
# Validate that use/handle are booleans, not accidentally passed tool classes
⋮----
param = "use" if isclass(use) else "handle"
⋮----
message_class = message_class.require_recipient()
⋮----
# XMLToolMessage is not compatible with OpenAI's Tools/functions API,
# so we disable use of functions API, enable langroid-native Tools,
# which are prompt-based.
⋮----
super().enable_message_handling(message_class)  # enables handling only
tools = self._get_tool_list(message_class)
⋮----
request = message_class.default_value("request")
⋮----
llm_function = message_class.llm_function_schema(defaults=include_defaults)
⋮----
# `t` was designated as "enabled for handling" ONLY for
# output_format enforcement, but we are explicitly ]
# enabling it for handling here, so we set the variable to None.
⋮----
tool_class = self.llm_tools_map[t]
allow_llm_use = tool_class._allow_llm_use
⋮----
allow_llm_use = allow_llm_use.default
⋮----
# `t` was designated as "enabled for use" ONLY for output_format
# enforcement, but we are explicitly enabling it for use here,
# so we set the variable to None.
⋮----
def _update_tool_instructions(self) -> None
⋮----
# Set tool instructions and JSON format instructions,
# in case Tools have been enabled/disabled.
⋮----
def _requests_and_tool_settings(self) -> tuple[Optional[set[str]], bool, bool]
⋮----
"""
        Returns the current set of enabled requests for inference and tools configs.
        Used for restoring setings overriden by `set_output_format`.
        """
⋮----
@property
    def all_llm_tools_known(self) -> set[str]
⋮----
"""All known tools; we include `output_format` if it is a `ToolMessage`."""
known = self.llm_tools_known
⋮----
"""
        Sets `output_format` to `output_type` and, if `force_tools` is enabled,
        switches to the native Langroid tools mechanism to ensure that no tool
        calls not of `output_type` are generated. By default, `force_tools`
        follows the `use_tools_on_output_format` parameter in the config.

        If `output_type` is None, restores to the state prior to setting
        `output_format`.

        If `use`, we enable use of `output_type` when it is a subclass
        of `ToolMesage`. Note that this primarily controls instruction
        generation: the model will always generate `output_type` regardless
        of whether `use` is set. Defaults to the `use_output_format`
        parameter in the config. Similarly, handling of `output_type` is
        controlled by `handle`, which defaults to the
        `handle_output_format` parameter in the config.

        `instructions` controls whether we generate instructions specifying
        the output format schema. Defaults to the `instructions_output_format`
        parameter in the config.

        `is_copy` is set when called via `__getitem__`. In that case, we must
        copy certain fields to ensure that we do not overwrite the main agent's
        setings.
        """
# Disable usage of an output format which was not specifically enabled
# by `enable_message`
⋮----
# Disable handling of an output format which did not specifically have
# handling enabled via `enable_message`
⋮----
# Reset any previous instructions
⋮----
force_tools = self.config.use_tools_on_output_format
⋮----
output_type = get_pydantic_wrapper(output_type)
⋮----
name = output_type.default_value("request")
⋮----
use = self.config.use_output_format
⋮----
handle = self.config.handle_output_format
⋮----
is_usable = name in self.llm_tools_usable.union(
is_handled = name in self.llm_tools_handled.union(
⋮----
# We must copy `llm_tools_usable` so the base agent
# is unmodified
⋮----
# If handling the tool, do the same for `llm_tools_handled`
⋮----
# Enable `output_type`
⋮----
# Do not override existing settings
⋮----
# If the `output_type` ToilMessage was not already enabled for
# use, this means we are ONLY enabling it for use specifically
# for enforcing this output format, so we set the
# `enabled_use_output_forma  to this output_type, to
# record that it should be disabled when `output_format` is changed
⋮----
# (same reasoning as for use-enabling)
⋮----
generated_tool_instructions = name in self.llm_tools_usable.union(
⋮----
generated_tool_instructions = False
⋮----
instructions = self.config.instructions_output_format
⋮----
# Already generated tool instructions as part of "enabling for use",
# so only need to generate a reminder to use this tool.
name = cast(ToolMessage, output_type).default_value("request")
⋮----
output_format_schema = output_type.llm_function_schema(
⋮----
output_format_schema = output_type.model_json_schema()
⋮----
def __getitem__(self, output_type: type) -> Self
⋮----
"""
        Returns a (shallow) copy of `self` with a forced output type.
        """
clone = copy.copy(self)
⋮----
"""
        Disable this agent from RESPONDING to a `message_class` (Tool). If
            `message_class` is None, then disable this agent from responding to ALL.
        Args:
            message_class: The ToolMessage class to disable; Optional.
        """
⋮----
"""
        Disable this agent from USING a message class (Tool).
        If `message_class` is None, then disable this agent from USING ALL tools.
        Args:
            message_class: The ToolMessage class to disable.
                If None, disable all.
        """
⋮----
def disable_message_use_except(self, message_class: Type[ToolMessage]) -> None
⋮----
"""
        Disable this agent from USING ALL messages EXCEPT a message class (Tool)
        Args:
            message_class: The only ToolMessage class to allow
        """
request = message_class.model_fields["request"].default
to_remove = [r for r in self.llm_tools_usable if r != request]
⋮----
def _load_output_format(self, message: ChatDocument) -> None
⋮----
"""
        If set, attempts to parse a value of type `self.output_format` from the message
        contents or any tool/function call and assigns it to `content_any`.
        """
⋮----
any_succeeded = False
attempts: list[str | LLMFunctionCall] = [
⋮----
content = json.loads(attempt)
⋮----
content = attempt.arguments
⋮----
content_any = self.output_format.model_validate(content)
⋮----
message.content_any = content_any.value  # type: ignore
⋮----
any_succeeded = True
⋮----
"""
        Extracts messages and tracks whether any errors occurred. If strict mode
        was enabled, disables it for the tool, else triggers strict recovery.
        """
⋮----
most_recent_sent_by_llm = (
was_llm = most_recent_sent_by_llm or (
⋮----
tools = super().get_tool_messages(msg, all_tools)
⋮----
# Check if tool class was attached to the exception
⋮----
tool_class = ve.tool_class  # type: ignore
⋮----
was_strict = (
# If the result of strict output for a tool using the
# OpenAI tools API fails to parse, we infer that the
# schema edits necessary for compatibility prevented
# adherence to the underlying `ToolMessage` schema and
# disable strict output for the tool
⋮----
# We will trigger the strict recovery mechanism to force
# the LLM to correct its output, allowing us to parse
⋮----
def _get_any_tool_message(self, optional: bool = True) -> type[ToolMessage] | None
⋮----
"""
        Returns a `ToolMessage` which wraps all enabled tools, excluding those
        where strict recovery is disabled. Used in strict recovery.
        """
possible_tools = tuple(
⋮----
any_tool_type = Union.__getitem__(possible_tools)  # type ignore
⋮----
maybe_optional_type = Optional[any_tool_type] if optional else any_tool_type
⋮----
class AnyTool(ToolMessage)
⋮----
purpose: str = "To call a tool/function."
request: str = "tool_or_function"
tool: maybe_optional_type  # type: ignore
⋮----
def response(self, agent: ChatAgent) -> None | str | ChatDocument
⋮----
# One-time use
⋮----
# As the ToolMessage schema accepts invalid
# `tool.request` values, reparse with the
# corresponding tool
request = self.tool.request
⋮----
tool = agent.llm_tools_map[request].model_validate_json(
⋮----
"""Returns instructions for strict recovery."""
optional_instructions = (
response_prefix = "If you intended to make such a call, r" if optional else "R"
instruction_prefix = "If you do so, b" if optional else "B"
⋮----
schema_instructions = (
⋮----
"""
        Truncate message at idx in msg history to `tokens` tokens.

        If inplace is True, the message is truncated in place, else
        it LEAVES the original message INTACT and returns a new message
        """
⋮----
llm_msg = self.message_history[idx]
⋮----
llm_msg = copy.deepcopy(self.message_history[idx])
orig_content = llm_msg.content
new_content = (
⋮----
else orig_content[: tokens * 4]  # approx truncation
⋮----
def _reduce_raw_tool_results(self, message: ChatDocument) -> None
⋮----
"""
        If message is the result of a ToolMessage that had
        a `_max_retained_tokens` set to a non-None value, then we replace contents
        with a placeholder message.
        """
parent_message: ChatDocument | None = message.parent
tools = [] if parent_message is None else parent_message.tool_messages
truncate_tools = []
⋮----
max_retained_tokens = t._max_retained_tokens
⋮----
max_retained_tokens = max_retained_tokens.default
⋮----
limiting_tool = truncate_tools[0] if len(truncate_tools) > 0 else None
⋮----
max_retained_tokens = limiting_tool._max_retained_tokens
⋮----
tool_name = limiting_tool.default_value("request")
max_tokens: int = max_retained_tokens
truncation_warning = f"""
⋮----
"""
        Respond to a single user message, appended to the message history,
        in "chat" mode
        Args:
            message (str|ChatDocument): message or ChatDocument object to respond to.
                If None, use the self.task_messages
        Returns:
            LLM response as a ChatDocument object
        """
⋮----
# If enabled and a tool error occurred, we recover by generating the tool in
# strict json mode
⋮----
AnyTool = self._get_any_tool_message()
⋮----
recovery_message = self._strict_recovery_instructions(AnyTool)
augmented_message = message
⋮----
augmented_message = recovery_message
⋮----
augmented_message = augmented_message + recovery_message
⋮----
# only use the augmented message for this one response...
result = self.llm_response(augmented_message)
# ... restore the original user message so that the AnyTool recover
# instructions don't persist in the message history
# (this can cause the LLM to use the AnyTool directly as a tool)
⋮----
msg = message if isinstance(message, str) else message.content
⋮----
tool_choice = (
⋮----
response = self.llm_response_messages(hist, output_len, tool_choice)
⋮----
# Preserve trail of tool_ids for OpenAI Assistant fn-calls
⋮----
"""
        Async version of `llm_response`. See there for details.
        """
⋮----
response = await self.llm_response_messages_async(
⋮----
def init_message_history(self) -> None
⋮----
"""
        Initialize the message history with the system message and user message
        """
⋮----
"""
        Prepare messages to be sent to self.llm_response_messages,
            which is the main method that calls the LLM API to get a response.
            If desired output tokens + message history exceeds the model context length,
            then first the max output tokens is reduced to fit, and if that is not
            possible, older messages may be truncated to accommodate at least
            self.config.llm.min_output_tokens of output.

        Returns:
            Tuple[List[LLMMessage], int]: (messages, output_len)
                messages = Full list of messages to send
                output_len = max expected number of tokens in response
        """
⋮----
# this means agent has been used to get LLM response already,
# and so the last message is an "assistant" response.
# We delete this last assistant response and re-generate it.
⋮----
# initial messages have not yet been loaded, so load them
⋮----
# for debugging, show the initial message history
⋮----
# update the system message with the latest tool instructions
⋮----
# either the message is a str, or it is a fresh ChatDocument
# different from the last message in the history
llm_msgs = ChatDocument.to_LLMMessage(message, self.oai_tool_calls)
# LLM only responds to the content, so only those msgs with
# non-empty content should be kept
llm_msgs = [m for m in llm_msgs if m.content.strip() != ""]
⋮----
# process tools if any
done_tools = [m.tool_call_id for m in llm_msgs if m.role == Role.TOOL]
⋮----
hist = self.message_history
output_len = self.config.llm.model_max_output_tokens
⋮----
CHAT_HISTORY_BUFFER = 300
# chat + output > max context length,
# so first try to shorten requested output len to fit;
# use an extra margin of CHAT_HISTORY_BUFFER tokens
# in case our calcs are off (and to allow for some extra tokens)
output_len = (
⋮----
# unacceptably small output len, so compress early parts of
# conversation history based on the configured strategy
strategy = self.config.context_overflow_strategy
⋮----
# Truncate content of individual messages while preserving
# the message sequence (important for LLM APIs that require
# alternating USER/ASSISTANT messages)
msg_idx_to_compress = 1  # don't touch system msg
# we will try compressing msg indices up to but not including
# last user msg
last_msg_idx_to_compress = (
n_truncated = 0
⋮----
# We want to preserve the first message (typically
# system msg) and last message (user msg).
⋮----
# compress the msg at idx `msg_idx_to_compress`
⋮----
output_len = min(
⋮----
# we MUST have truncated at least one msg
msg_tokens = self.chat_num_tokens()
⋮----
else:  # strategy == "drop_turns"
# Drop complete conversation turns. A complete turn is defined
# as a USER message followed by all messages until the next
# USER message. This is more aggressive but cleaner for voice
# agents with limited context.
n_dropped_turns = 0
⋮----
# Find the last USER message index
last_user_idx = self.last_message_idx_with_role(role=Role.USER)
⋮----
# Find the first complete turn to drop (skip system message)
first_user_idx = -1
⋮----
first_user_idx = i
⋮----
# Find the end of this turn: last message before next USER
next_user_idx = -1
⋮----
next_user_idx = i
⋮----
# Drop the turn
⋮----
# we MUST have dropped at least one turn
⋮----
# record the position of the corresponding LLMMessage in
# the message_history
⋮----
"""
        Get function/tool spec/output format arguments for
        OpenAI-compatible LLM API call
        """
functions: Optional[List[LLMFunctionSpec]] = None
fun_call: str | Dict[str, str] = "none"
tools: Optional[List[OpenAIToolSpec]] = None
force_tool: Optional[Dict[str, Dict[str, str] | str]] = None
⋮----
functions = [
fun_call = (
⋮----
def to_maybe_strict_spec(function: str) -> OpenAIToolSpec
⋮----
spec = self.llm_functions_map[function]
strict = self._strict_mode_for_tool(function)
⋮----
strict_spec = copy.deepcopy(spec)
⋮----
strict_spec = spec
⋮----
tools = [to_maybe_strict_spec(f) for f in self.llm_functions_usable]
force_tool = (
output_format = None
⋮----
spec = self.output_format.llm_function_schema(
⋮----
output_format = OpenAIJsonSchemaSpec(
⋮----
# We always require that outputs strictly match the schema
⋮----
param_spec = self.output_format.model_json_schema()
⋮----
"""
        Respond to a series of messages, e.g. with OpenAI ChatCompletion
        Args:
            messages: seq of messages (with role, content fields) sent to LLM
            output_len: max number of tokens expected in response.
                    If None, use the LLM's default model_max_output_tokens.
        Returns:
            Document (i.e. with fields "content", "metadata")
        """
⋮----
output_len = output_len or self.config.llm.model_max_output_tokens
streamer = noop_fn
⋮----
streamer = self.callbacks.start_llm_stream()
⋮----
with ExitStack() as stack:  # for conditionally using rich spinner
⋮----
# show rich spinner only if not streaming!
# (Why? b/c the intent of showing a spinner is to "show progress",
# and we don't need to do that when streaming, since
# streaming output already shows progress.)
cm = status(
⋮----
response = self.llm.chat(
⋮----
# Create temp ChatDocument for tool check, then clean up to avoid
# polluting ObjectRegistry (see PR #939 discussion)
temp_doc = ChatDocument.from_LLMResponse(
⋮----
response,  # .usage attrib is updated!
⋮----
chat_doc = ChatDocument.from_LLMResponse(
⋮----
# If using strict output format, parse the output JSON
⋮----
"""
        Async version of `llm_response_messages`. See there for details.
        """
⋮----
streamer_async = async_noop_fn
⋮----
streamer_async = await self.callbacks.start_llm_stream_async()
⋮----
response = await self.llm.achat(
⋮----
"""
        Call a callback method, only passing 'reasoning' if it accepts it.

        This provides backward compatibility for custom callbacks that don't
        have the 'reasoning' parameter in their signature.

        Args:
            callback_name: Name of the callback method (e.g., 'show_llm_response')
            reasoning: The reasoning content to pass if supported
            **kwargs: Other arguments to pass to the callback
        """
callback = getattr(self.callbacks, callback_name, None)
⋮----
# Check if callback accepts 'reasoning' param or **kwargs
⋮----
sig = inspect.signature(callback)
params = sig.parameters
accepts_reasoning = "reasoning" in params or any(
⋮----
# If we can't inspect the signature, assume it doesn't accept reasoning
accepts_reasoning = False
⋮----
is_cached = (
⋮----
# We would have already displayed the msg "live" ONLY if
# streaming was enabled, AND we did not find a cached response.
# If we are here, it means the response has not yet been displayed.
cached = f"[red]{self.indent}(cached)[/red]" if is_cached else ""
# Track whether we created a temp ChatDocument for cleanup
is_temp_doc = isinstance(response, LLMResponse)
chat_doc = (
# TODO: prepend TOOL: or OAI-TOOL: if it's a tool-call
⋮----
content = response.message
tools_content = response.tools_content()
⋮----
content = response.content
tools_content = ""
reasoning = response.reasoning if isinstance(response, LLMResponse) else ""
⋮----
# Clean up temp ChatDocument to avoid polluting ObjectRegistry
⋮----
# we are in the context immediately after an LLM responded,
# we won't have citations yet, so we're done
⋮----
citation = (
⋮----
reasoning="",  # Citations don't have reasoning
⋮----
def _llm_response_temp_context(self, message: str, prompt: str) -> ChatDocument
⋮----
"""
        Get LLM response to `prompt` (which presumably includes the `message`
        somewhere, along with possible large "context" passages),
        but only include `message` as the USER message, and not the
        full `prompt`, in the message history.
        Args:
            message: the original, relatively short, user request or query
            prompt: the full prompt potentially containing `message` plus context

        Returns:
            Document object containing the response.
        """
# we explicitly call THIS class's respond method,
# not a derived class's (or else there would be infinite recursion!)
with StreamingIfAllowed(self.llm, self.llm.get_stream()):  # type: ignore
answer_doc = cast(ChatDocument, ChatAgent.llm_response(self, prompt))
⋮----
"""
        Async version of `_llm_response_temp_context`. See there for details.
        """
⋮----
answer_doc = cast(
⋮----
"""
        LLM Response to single message, and restore message_history.
        In effect a "one-off" message & response that leaves agent
        message history state intact.

        Args:
            message (str|ChatDocument): message to respond to.

        Returns:
            A Document object with the response.

        """
# explicitly call THIS class's respond method,
⋮----
n_msgs = len(self.message_history)
⋮----
response = cast(ChatDocument, ChatAgent.llm_response(self, message))
# If there is a response, then we will have two additional
# messages in the message history, i.e. the user message and the
# assistant response. We want to (carefully) remove these two messages.
⋮----
msg = self.message_history.pop()
⋮----
"""
        Async version of `llm_response_forget`. See there for details.
        """
⋮----
response = cast(
⋮----
def chat_num_tokens(self, messages: Optional[List[LLMMessage]] = None) -> int
⋮----
"""
        Total number of tokens in the message history so far.

        Args:
            messages: if provided, compute the number of tokens in this list of
                messages, rather than the current message history.
        Returns:
            int: number of tokens in message history
        """
⋮----
hist = messages if messages is not None else self.message_history
⋮----
def _message_num_tokens(self, message: LLMMessage) -> int
⋮----
"""Count tokens for a message, including serialized user attachments."""
⋮----
def _attachment_num_tokens(self, message: LLMMessage) -> int
⋮----
"""
        Estimate attachment contribution using the serialized payload
        that is sent in the API request for user messages.
        """
⋮----
model = self._chat_model_name_for_attachments()
⋮----
def _chat_model_name_for_attachments(self) -> str
⋮----
"""Return the model name used for attachment serialization."""
⋮----
def message_history_str(self, i: Optional[int] = None) -> str
⋮----
"""
        Return a string representation of the message history
        Args:
            i: if provided, return only the i-th message when i is postive,
                or last k messages when i = -k.
        Returns:
        """
⋮----
def __del__(self) -> None
⋮----
"""
        Cleanup method called when the ChatAgent is garbage collected.
        Note: We don't close LLM clients here because they may be shared
        across multiple agents when client caching is enabled.
        The clients are managed centrally and cleaned up via atexit hooks.
        """
# Previously we closed clients here, but this caused issues when
# multiple agents shared the same cached client instance.
# Clients are now managed centrally in langroid.language_models.client_cache
</file>

<file path="langroid/language_models/openai_gpt.py">
OLLAMA_BASE_URL = f"http://{os.environ['OLLAMA_HOST']}/v1"
⋮----
OLLAMA_BASE_URL = "http://localhost:11434/v1"
⋮----
DEEPSEEK_BASE_URL = "https://api.deepseek.com/v1"
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai"
GLHF_BASE_URL = "https://glhf.chat/api/openai/v1"
OLLAMA_API_KEY = "ollama"
⋮----
VLLM_API_KEY = os.environ.get("VLLM_API_KEY", DUMMY_API_KEY)
LLAMACPP_API_KEY = os.environ.get("LLAMA_API_KEY", DUMMY_API_KEY)
⋮----
openai_chat_model_pref_list = [
⋮----
openai_completion_model_pref_list = [
⋮----
available_models = set(map(lambda m: m.id, OpenAI().models.list()))
⋮----
available_models = set()
⋮----
default_openai_chat_model = next(
default_openai_completion_model = next(
⋮----
class AccessWarning(Warning)
⋮----
@cache
def gpt_3_5_warning() -> None
⋮----
@cache
def parallel_strict_warning() -> None
⋮----
def noop() -> None
⋮----
"""Does nothing."""
⋮----
class OpenAICallParams(BaseModel)
⋮----
"""
    Various params that can be sent to an OpenAI API chat-completion call.
    When specified, any param here overrides the one with same name in the
    OpenAIGPTConfig.
    See OpenAI API Reference for details on the params:
    https://platform.openai.com/docs/api-reference/chat
    """
⋮----
max_tokens: int | None = None
temperature: float | None = None
frequency_penalty: float | None = None  # between -2 and 2
presence_penalty: float | None = None  # between -2 and 2
response_format: Dict[str, str] | None = None
logit_bias: Dict[int, float] | None = None  # token_id -> bias
logprobs: bool | None = None
top_p: float | None = None
reasoning_effort: str | None = None  # or "low" or "high" or "medium"
top_logprobs: int | None = None  # if int, requires logprobs=True
n: int | None = None  # how many completions to generate (n > 1 is NOT handled now)
stop: str | List[str] | None = None  # (list of) stop sequence(s)
seed: int | None = None
user: str | None = None  # user id for tracking
extra_body: Dict[str, Any] | None = None  # additional params for API request body
⋮----
def to_dict_exclude_none(self) -> Dict[str, Any]
⋮----
class LiteLLMProxyConfig(BaseSettings)
⋮----
"""Configuration for LiteLLM proxy connection."""
⋮----
api_key: str = ""  # read from env var LITELLM_API_KEY if set
api_base: str = ""  # read from env var LITELLM_API_BASE if set
⋮----
model_config = SettingsConfigDict(env_prefix="LITELLM_")
⋮----
class OpenAIGPTConfig(LLMConfig)
⋮----
"""
    Class for any LLM with an OpenAI-like API: besides the OpenAI models this includes:
    (a) locally-served models behind an OpenAI-compatible API
    (b) non-local models, using a proxy adaptor lib like litellm that provides
        an OpenAI-compatible API.
    (We could rename this class to OpenAILikeConfig, but we keep it as-is for now)

    Important Note:
    Due to the `env_prefix = "OPENAI_"` defined below,
    all of the fields below can be set AND OVERRIDDEN via env vars,
    # by upper-casing the name and prefixing with OPENAI_, e.g.
    # OPENAI_MAX_OUTPUT_TOKENS=1000.
    # If any of these is defined in this way in the environment
    # (either via explicit setenv or export or via .env file + load_dotenv()),
    # the environment variable takes precedence over the value in the config.
    """
⋮----
type: str = "openai"
api_key: str = DUMMY_API_KEY
organization: str = ""
api_base: str | None = None  # used for local or other non-OpenAI models
litellm: bool = False  # use litellm api?
litellm_proxy: LiteLLMProxyConfig = LiteLLMProxyConfig()
ollama: bool = False  # use ollama's OpenAI-compatible endpoint?
min_output_tokens: int = 1
use_chat_for_completion: bool = True  # do not change this, for OpenAI models!
timeout: int = 20
temperature: float = 0.2
seed: int | None = 42
params: OpenAICallParams | None = None
use_cached_client: bool = (
⋮----
True  # Whether to reuse cached clients (prevents resource exhaustion)
⋮----
# these can be any model name that is served at an OpenAI-compatible API end point
chat_model: str = default_openai_chat_model
chat_model_orig: Optional[str] = None
completion_model: str = default_openai_completion_model
run_on_first_use: Callable[[], None] = noop
parallel_tool_calls: Optional[bool] = None
# Supports constrained decoding which enforces that the output of the LLM
# adheres to a JSON schema
supports_json_schema: Optional[bool] = None
# Supports strict decoding for the generation of tool calls with
# the OpenAI Tools API; this ensures that the generated tools
# adhere to the provided schema.
supports_strict_tools: Optional[bool] = None
# a string that roughly matches a HuggingFace chat_template,
# e.g. "mistral-instruct-v0.2 (a fuzzy search is done to find the closest match)
formatter: str | None = None
hf_formatter: HFFormatter | None = None
langdb_params: LangDBParams = LangDBParams()
portkey_params: PortkeyParams = PortkeyParams()
headers: Dict[str, str] = {}
http_client_factory: Optional[Callable[[], Any]] = (
⋮----
None  # Factory: returns Client or (Client, AsyncClient)
⋮----
http_verify_ssl: bool = True  # Simple flag for SSL verification
http_client_config: Optional[Dict[str, Any]] = None  # Config dict for httpx.Client
⋮----
def __init__(self, **kwargs) -> None:  # type: ignore
⋮----
local_model = "api_base" in kwargs and kwargs["api_base"] is not None
⋮----
chat_model = kwargs.get("chat_model", "")
local_prefixes = ["local/", "litellm/", "ollama/", "vllm/", "llamacpp/"]
⋮----
local_model = True
⋮----
warn_gpt_3_5 = (
⋮----
existing_hook = kwargs.get("run_on_first_use", noop)
⋮----
def with_warning() -> None
⋮----
model_config = SettingsConfigDict(env_prefix="OPENAI_")
⋮----
"""
        Copy config while preserving nested model instances and subclasses.

        Important: Avoid reconstructing via `model_dump` as that coerces nested
        models to their annotated base types (dropping subclass-only fields).
        Instead, defer to Pydantic's native `model_copy`, which keeps nested
        `BaseModel` instances (and their concrete subclasses) intact.
        """
# Delegate to BaseSettings/BaseModel implementation to preserve types
return super().model_copy(update=update, deep=deep)  # type: ignore[return-value]
⋮----
def _validate_litellm(self) -> None
⋮----
"""
        When using liteLLM, validate whether all env vars required by the model
        have been set.
        """
⋮----
litellm.drop_params = True  # drop un-supported params without crashing
⋮----
self.seed = None  # some local mdls don't support seed
⋮----
keys_dict = litellm.utils.validate_environment(self.chat_model)
missing_keys = keys_dict.get("missing_keys", [])
⋮----
@classmethod
    def create(cls, prefix: str) -> Type["OpenAIGPTConfig"]
⋮----
"""Create a config class whose params can be set via a desired
        prefix from the .env file or env vars.
        E.g., using
        ```python
        OllamaConfig = OpenAIGPTConfig.create("ollama")
        ollama_config = OllamaConfig()
        ```
        you can have a group of params prefixed by "OLLAMA_", to be used
        with models served via `ollama`.
        This way, you can maintain several setting-groups in your .env file,
        one per model type.
        """
⋮----
class DynamicConfig(OpenAIGPTConfig)
⋮----
class OpenAIResponse(BaseModel)
⋮----
"""OpenAI response model, either completion or chat."""
⋮----
choices: List[Dict]  # type: ignore
usage: Dict  # type: ignore
⋮----
def litellm_logging_fn(model_call_dict: Dict[str, Any]) -> None
⋮----
"""Logging function for litellm"""
⋮----
api_input_dict = model_call_dict.get("additional_args", {}).get(
⋮----
text = escape(json.dumps(api_input_dict, indent=2))
⋮----
# Define a class for OpenAI GPT models that extends the base class
class OpenAIGPT(LanguageModel)
⋮----
"""
    Class for OpenAI LLMs
    """
⋮----
client: OpenAI | Groq | Cerebras | None
async_client: AsyncOpenAI | AsyncGroq | AsyncCerebras | None
⋮----
def __init__(self, config: OpenAIGPTConfig = OpenAIGPTConfig())
⋮----
"""
        Args:
            config: configuration for openai-gpt model
        """
# copy the config to avoid modifying the original; deep to decouple
# nested models while preserving their concrete subclasses
config = config.model_copy(deep=True)
⋮----
# save original model name such as `provider/model` before
# we strip out the `provider` - we retain the original in
# case some params are specific to a provider.
⋮----
# Run the first time the model is used
⋮----
# global override of chat_model,
# to allow quick testing with other models
⋮----
# there is a formatter specified, e.g.
# "litellm/ollama/mistral//hf" or
# "local/localhost:8000/v1//mistral-instruct-v0.2"
formatter = parts[1]
⋮----
# e.g. "litellm/ollama/mistral//hf" -> "litellm/ollama/mistral"
formatter = find_hf_formatter(self.config.chat_model)
⋮----
# e.g. "mistral"
⋮----
# e.g. "local/localhost:8000/v1//mistral-instruct-v0.2"
⋮----
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", DUMMY_API_KEY)
⋮----
# if model name starts with "litellm",
# set the actual model name by stripping the "litellm/" prefix
# and set the litellm flag to True
⋮----
# e.g. litellm/ollama/mistral
⋮----
# strip the "litellm/" prefix
# e.g. litellm/ollama/llama2 => ollama/llama2
⋮----
# expect this to be of the form "local/localhost:8000/v1",
# depending on how the model is launched locally.
# In this case the model served locally behind an OpenAI-compatible API
# so we can just use `openai.*` methods directly,
# and don't need a adaptor library like litellm
⋮----
self.config.seed = None  # some models raise an error when seed is set
# Extract the api_base from the model name after the "local/" prefix
⋮----
# use api_base from config if set, else fall back on OLLAMA_BASE_URL
⋮----
# If api_base is unset we use OpenAI's endpoint, which supports
# these features (with JSON schema restricted to a limited set of models)
⋮----
# if we're overriding chat model globally, set completion model to same
⋮----
# we want to format chats -> completions using this specific formatter
⋮----
# use groq-specific client
⋮----
# Create new clients without caching
⋮----
# use cerebras-specific client
⋮----
# TODO there is not async client, so should we do anything here?
⋮----
# in these cases, there's no specific client: OpenAI python client suffices
⋮----
# Use GEMINI_API_BASE env var if set (e.g. for Vertex AI),
# then config.api_base only if explicitly set by the user
# (not inherited from OPENAI_API_BASE via env_prefix),
# then fall back to the default Gemini endpoint.
gemini_api_base = os.getenv("GEMINI_API_BASE", "")
openai_api_base = os.getenv("OPENAI_API_BASE")
explicit_api_base = (
⋮----
project_id = self.config.langdb_params.project_id
⋮----
params = self.config.langdb_params
⋮----
# Parse the model string and extract provider/model
⋮----
# Set Portkey base URL
⋮----
# Set API key - use provider's API key from env if available
⋮----
# Add Portkey-specific headers
⋮----
# Create http_client if needed - Priority order:
# 1. http_client_factory (most flexibility, not cacheable)
# 2. http_client_config (cacheable, moderate flexibility)
# 3. http_verify_ssl=False (cacheable, simple SSL bypass)
http_client = None
async_http_client = None
http_client_config_used = None
⋮----
# Use the factory to create http_client (not cacheable)
http_client = self.config.http_client_factory()
⋮----
# set async_http_client to None - so that it will
# be created later
⋮----
# Use config dict (cacheable)
http_client_config_used = self.config.http_client_config
⋮----
# Simple SSL bypass (cacheable)
http_client_config_used = {"verify": False}
⋮----
client_kwargs: Dict[str, Any] = dict(
⋮----
# Create http_client from config for non-cached scenario
⋮----
async_client_kwargs: Dict[str, Any] = dict(
⋮----
# Create async http_client from config for non-cached scenario
⋮----
use_cache = self.config.cache_config is not None
⋮----
# switch to fresh redis config if needed
⋮----
# force use of fake redis if global cache_type is "fakeredis"
⋮----
def _openai_api_call_params(self, kwargs: Dict[str, Any]) -> Dict[str, Any]
⋮----
"""
        Prep the params to be sent to the OpenAI API
        (or any OpenAI-compatible API, e.g. from Ooba or LmStudio)
        for chat-completion.

        Order of priority:
        - (1) Params (mainly max_tokens) in the chat/achat/generate/agenerate call
                (these are passed in via kwargs)
        - (2) Params in OpenAIGPTConfig.params (of class OpenAICallParams)
        - (3) Specific Params in OpenAIGPTConfig (just temperature for now)
        """
params = dict(
⋮----
def is_openai_chat_model(self) -> bool
⋮----
openai_chat_models = [e.value for e in OpenAIChatModel]
⋮----
def is_openai_completion_model(self) -> bool
⋮----
openai_completion_models = [e.value for e in OpenAICompletionModel]
⋮----
def is_gemini_model(self) -> bool
⋮----
"""Are we using the gemini OpenAI-compatible API?"""
⋮----
def is_deepseek_model(self) -> bool
⋮----
deepseek_models = [e.value for e in DeepSeekModel]
⋮----
def unsupported_params(self) -> List[str]
⋮----
"""
        List of params that are not supported by the current model
        """
unsupported = set(self.info().unsupported_params)
⋮----
def rename_params(self) -> Dict[str, str]
⋮----
"""
        Map of param name -> new name for specific models.
        Currently main troublemaker is o1* series.
        """
⋮----
def chat_context_length(self) -> int
⋮----
"""
        Context-length for chat-completion models/endpoints.
        Get it from the config if explicitly given,
         otherwise use model_info based on model name, and fall back to
         generic model_info if there's no match.
        """
⋮----
def completion_context_length(self) -> int
⋮----
"""
        Context-length for completion models/endpoints.
        Get it from the config if explicitly given,
         otherwise use model_info based on model name, and fall back to
         generic model_info if there's no match.
        """
⋮----
def chat_cost(self) -> Tuple[float, float, float]
⋮----
"""
        (Prompt, Cached, Generation) cost per 1000 tokens, for chat-completion
        models/endpoints.
        Get it from the dict, otherwise fail-over to general method
        """
info = self.info()
cached_cost_per_million = info.cached_cost_per_million
⋮----
cached_cost_per_million = info.input_cost_per_million
⋮----
def set_stream(self, stream: bool) -> bool
⋮----
"""Enable or disable streaming output from API.
        Args:
            stream: enable streaming output from API
        Returns: previous value of stream
        """
tmp = self.config.stream
⋮----
def get_stream(self) -> bool
⋮----
"""Get streaming status."""
⋮----
"""Separate inline reasoning from text tokens in a streaming chunk.

        When models embed thinking inside content (e.g. <think>...</think>)
        rather than using a separate reasoning field, this splits the chunk
        into text-only and reasoning-only portions for proper streamer routing.

        Returns (text_tokens, reasoning_tokens, in_reasoning).
        """
text_tokens = event_text
reasoning_tokens = event_reasoning
⋮----
remaining = event_text
⋮----
text_tokens = ""
⋮----
text_tokens = before
remaining = after
in_reasoning = True
⋮----
reasoning_tokens = before
in_reasoning = False
⋮----
reasoning_tokens = remaining
⋮----
"""Process state vars while processing a streaming API response.
            Returns a tuple consisting of:
        - is_break: whether to break out of the loop
        - has_function: whether the response contains a function_call
        - function_name: name of the function
        - function_args: args of the function
        - completion: completion text
        - reasoning: reasoning text
        - usage: usage dict
        """
# convert event obj (of type ChatCompletionChunk) to dict so rest of code,
# which expects dicts, works as it did before switching to openai v1.x
⋮----
event = event.model_dump()
⋮----
usage = event.get("usage", {}) or {}
choices = event.get("choices", [{}])
⋮----
choices = [{}]
⋮----
# we have a "usage" chunk, and empty choices, so we're done
# ASSUMPTION: a usage chunk ONLY arrives AFTER all normal completion text!
# If any API does not follow this, we need to change this code.
⋮----
event_args = ""
event_fn_name = ""
event_tool_deltas: Optional[List[Dict[str, Any]]] = None
silent = settings.quiet
# The first two events in the stream of Azure OpenAI is useless.
# In the 1st: choices list is empty, in the 2nd: the dict delta has null content
⋮----
delta = choices[0].get("delta", {}) or {}
# capture both content and reasoning_content
event_text = delta.get("content", "")
event_reasoning = delta.get(
⋮----
event_fn_name = delta["function_call"]["name"]
⋮----
event_args = delta["function_call"]["arguments"]
⋮----
# it's a list of deltas, usually just one
event_tool_deltas = delta["tool_calls"]
⋮----
event_text = choices[0]["text"]
event_reasoning = ""  # TODO: Ignoring reasoning for non-chat models
⋮----
finish_reason = choices[0].get("finish_reason", "")
⋮----
filter_names = [
event_text = (
⋮----
function_name = event_fn_name
has_function = True
⋮----
# print out streaming tool calls, if not async
⋮----
tool_fn_name = td["function"]["name"]
⋮----
tool_fn_args = td["function"]["arguments"]
⋮----
# show this delta in the stream
is_break = finish_reason in [
# for function_call, finish_reason does not necessarily
# contain "function_call" as mentioned in the docs.
# So we check for "stop" or "function_call" here.
⋮----
# we got usage chunk, and empty choices, so we're done
⋮----
silent = self.config.async_stream_quiet or settings.quiet
⋮----
is_break = choices[0].get("finish_reason", "") in [
⋮----
def _stream_response(  # type: ignore
⋮----
"""
        Grab and print streaming response from API.
        Args:
            response: event-sequence emitted by API
            chat: whether in chat-mode (or else completion-mode)
        Returns:
            Tuple consisting of:
                LLMResponse object (with message, usage),
                Dict version of OpenAIResponse object (with choices, usage)

        """
completion = ""
reasoning = ""
function_args = ""
function_name = ""
⋮----
has_function = False
tool_deltas: List[Dict[str, Any]] = []
token_usage: Dict[str, int] = {}
done: bool = False
in_reasoning: bool = False  # Track if we're inside reasoning delimiters
⋮----
# capture the token usage when non-empty
token_usage = usage
⋮----
# if not streaming, then we don't wait for last "usage" chunk
⋮----
# mark done, so we quit after the last "usage" chunk
done = True
⋮----
# TODO- get usage info in stream mode (?)
⋮----
async def _stream_response_async(  # type: ignore
⋮----
"""
        Grab and print streaming response from API.
        Args:
            response: event-sequence emitted by API
            chat: whether in chat-mode (or else completion-mode)
        Returns:
            Tuple consisting of:
                LLMResponse object (with message, usage),
                OpenAIResponse object (with choices, usage)

        """
⋮----
# mark done, so we quit after the next "usage" chunk
⋮----
"""
        Convert accumulated tool-call deltas to OpenAIToolCall objects.
        Adapted from this excellent code:
         https://community.openai.com/t/help-for-function-calls-with-streaming/627170/2

        Args:
            tools: list of tool deltas received from streaming API

        Returns:
            str: plain text corresponding to tool calls that failed to parse
            List[OpenAIToolCall]: list of OpenAIToolCall objects
            List[Dict[str, Any]]: list of tool dicts
                (to reconstruct OpenAI API response, so it can be cached)
        """
# Initialize a dictionary with default values
⋮----
# idx -> dict repr of tool
# (used to simulate OpenAIResponse object later, and also to
# accumulate function args as strings)
idx2tool_dict: Dict[str, Dict[str, Any]] = defaultdict(
⋮----
# (try to) parse the fn args of each tool
contents: List[str] = []
good_indices = []
id2args: Dict[str, None | Dict[str, Any]] = {}
⋮----
# used to build tool_calls_list below
id2args[tool_dict["id"]] = args_dict or None  # if {}, store as None
⋮----
# remove the failed tool calls
idx2tool_dict = {
⋮----
# create OpenAIToolCall list
tool_calls_list = [
⋮----
@staticmethod
    def _parse_function_args(args: str) -> Tuple[str, Dict[str, Any]]
⋮----
"""
        Try to parse the `args` string as function args.

        Args:
            args: string containing function args

        Returns:
            Tuple of content, function name and args dict.
            If parsing unsuccessful, returns the original string as content,
            else returns the args dict.
        """
content = ""
args_dict = {}
⋮----
stripped_fn_args = args.strip()
dict_or_list = parse_imperfect_json(stripped_fn_args)
⋮----
args_dict = dict_or_list
⋮----
content = args
⋮----
"""
        Create an LLMResponse object from the streaming API response.

        Args:
            chat: whether in chat-mode (or else completion-mode)
            tool_deltas: list of tool deltas received from streaming API
            has_function: whether the response contains a function_call
            completion: completion text
            reasoning: reasoning text
            function_args: string representing function args
            function_name: name of the function
            usage: token usage dict
        Returns:
            Tuple consisting of:
                LLMResponse object (with message, usage),
                Dict version of OpenAIResponse object (with choices, usage)
                    (this is needed so we can cache the response, as if it were
                    a non-streaming response)
        """
# check if function_call args are valid, if not,
# treat this as a normal msg, not a function call
args: Dict[str, Any] = {}
⋮----
completion = completion + content
⋮----
# mock openai response so we can cache it
⋮----
completion = completion + "\n" + failed_content
msg: Dict[str, Any] = dict(
⋮----
function_call = LLMFunctionCall(name=function_name)
function_call_dict = function_call.model_dump()
⋮----
# non-chat mode has no function_call
msg = dict(text=completion)
# TODO: Ignoring reasoning content for non-chat models
⋮----
# create an OpenAIResponse object so we can cache it as if it were
# a non-streaming response
openai_response = OpenAIResponse(
# Track whether we extracted inline thought tags from the text.
# Only set message_with_reasoning when get_reasoning_final()
# actually finds and extracts inline tags (e.g. <think>...</think>).
# When reasoning is already provided via a separate API field
# (e.g. reasoning_content), the message text doesn't contain
# thought signatures, so there's nothing extra to preserve.
message_with_reasoning = None
⋮----
# some LLM APIs may not return a separate reasoning field,
# and the reasoning may be included in the message content
# within delimiters like <think> ... </think>
⋮----
# Inline tags were found and extracted; preserve the
# original text so it can be restored in message history.
message_with_reasoning = completion
⋮----
message = completion
⋮----
prompt_tokens = usage.get("prompt_tokens", 0)
prompt_tokens_details: Any = usage.get("prompt_tokens_details", {})
cached_tokens = (
completion_tokens = usage.get("completion_tokens", 0)
⋮----
# don't allow empty list [] here
⋮----
def _cache_store(self, k: str, v: Any) -> None
⋮----
def _cache_lookup(self, fn_name: str, **kwargs: Dict[str, Any]) -> Tuple[str, Any]
⋮----
return "", None  # no cache, return empty key and None result
# Use the kwargs as the cache key
sorted_kwargs_str = str(sorted(kwargs.items()))
raw_key = f"{fn_name}:{sorted_kwargs_str}"
⋮----
# Hash the key to a fixed length using SHA256
hashed_key = hashlib.sha256(raw_key.encode()).hexdigest()
⋮----
# when caching disabled, return the hashed_key and none result
⋮----
# Try to get the result from the cache
⋮----
cached_val = self.cache.retrieve(hashed_key)
⋮----
def _cost_chat_model(self, prompt: int, cached: int, completion: int) -> float
⋮----
price = self.chat_cost()
⋮----
"""
        Extracts token usage from ``response`` and computes cost, only when NOT
        in streaming mode, since the LLM API (OpenAI currently) was not
        populating the usage fields in streaming mode (but as of Sep 2024, streaming
        responses include  usage info as well, so we should update the code
        to directly use usage information from the streaming response, which is more
        accurate, esp with "thinking" LLMs like o1 series which consume
        thinking tokens).
        In streaming mode, these are set to zero for
        now, and will be updated later by the fn ``update_token_usage``.
        """
cost = 0.0
prompt_tokens = 0
cached_tokens = 0
completion_tokens = 0
⋮----
usage = response.get("usage")
⋮----
prompt_tokens = usage.get("prompt_tokens") or 0
prompt_tokens_details = usage.get("prompt_tokens_details", {}) or {}
cached_tokens = prompt_tokens_details.get("cached_tokens") or 0
completion_tokens = usage.get("completion_tokens") or 0
cost = self._cost_chat_model(
⋮----
def generate(self, prompt: str, max_tokens: int = 200) -> LLMResponse
⋮----
# Catch HTTP-level API errors (400, 401, 403, 404, 422, 429, 5xx)
# without traceback — these originate server-side and a local
# stack trace adds no diagnostic value.
# Note: APIConnectionError/APITimeoutError are intentionally NOT
# caught here so they fall through to the generic handler below,
# where the full traceback aids in diagnosing local network issues.
⋮----
# log and re-raise exception
⋮----
def _generate(self, prompt: str, max_tokens: int) -> LLMResponse
⋮----
@retry_with_exponential_backoff
        def completions_with_backoff(**kwargs):  # type: ignore
⋮----
cached = False
⋮----
cached = True
⋮----
completion_call = litellm_completion
⋮----
completion_call = self.client.completions.create
⋮----
# If it's not in the cache, call the API
result = completion_call(**kwargs)
⋮----
kwargs: Dict[str, Any] = dict(model=self.config.completion_model)
⋮----
# TODO this is a temp fix, we should really be using a proper completion fn
# that takes a pre-formatted prompt, rather than mocking it as a sys msg.
⋮----
else:  # any other OpenAI-compatible endpoint
⋮----
args = dict(
⋮----
max_tokens=max_tokens,  # for output/completion
⋮----
args = self._openai_api_call_params(args)
⋮----
# assume response is an actual response rather than a streaming event
⋮----
response = response.model_dump()
⋮----
msg = response["choices"][0]["message"]["content"].strip()
⋮----
msg = response["choices"][0]["text"].strip()
⋮----
async def agenerate(self, prompt: str, max_tokens: int = 200) -> LLMResponse
⋮----
# Catch HTTP-level API errors (see comment in generate() above).
⋮----
async def _agenerate(self, prompt: str, max_tokens: int) -> LLMResponse
⋮----
# note we typically will not have self.config.stream = True
# when issuing several api calls concurrently/asynchronously.
# The calling fn should use the context `with Streaming(..., False)` to
# disable streaming.
⋮----
# WARNING: .Completion.* endpoints are deprecated,
# and as of Sep 2023 only legacy models will work here,
# e.g. text-davinci-003, text-ada-001.
⋮----
@async_retry_with_exponential_backoff
        async def completions_with_backoff(**kwargs):  # type: ignore
⋮----
# TODO this may not work: text_completion is not async,
# and we didn't find an async version in litellm
⋮----
acompletion_call = (
⋮----
result = await acompletion_call(**kwargs)
⋮----
# only makes sense for non-OpenAI models
⋮----
messages = [
prompt = self.config.hf_formatter.format(messages)
⋮----
# turn off streaming for async calls
⋮----
# only makes sense for local models, where we are trying to
# convert a chat dialog msg-sequence to a simple completion prompt.
⋮----
formatter = HFFormatter(
⋮----
prompt = formatter.format(messages)
⋮----
result = await self._achat(
⋮----
def _chat_completions_with_backoff_body(self, **kwargs):  # type: ignore
⋮----
# If it's not in the cache, call the API
⋮----
completion_call = self.client.chat.completions.create
⋮----
# If streaming, cannot cache result
# since it is a generator. Instead,
# we hold on to the hashed_key and
# cache the result later
⋮----
# Test if this is a stream with an exception by
# trying to get first chunk: Some providers like LiteLLM
# produce a valid stream object `result` instead of throwing a
# rate-limit error, and if we don't catch it here,
# we end up returning an empty response and not
# using the retry mechanism in the decorator.
⋮----
# try to get the first chunk to check for errors
test_iter = iter(result)
first_chunk = next(test_iter)
# If we get here without error, recreate the stream
result = chain([first_chunk], test_iter)
⋮----
# Empty stream is fine
⋮----
# Propagate any errors in the stream
⋮----
def _chat_completions_with_backoff(self, **kwargs):  # type: ignore
⋮----
retry_func = retry_with_exponential_backoff(
⋮----
async def _achat_completions_with_backoff_body(self, **kwargs):  # type: ignore
⋮----
acompletion_call = litellm_acompletion
⋮----
acompletion_call = self.async_client.chat.completions.create
⋮----
# Try to peek at the first chunk to immediately catch any errors
# Store the original result (the stream)
original_stream = result
⋮----
# Manually create and advance the iterator to check for errors
stream_iter = original_stream.__aiter__()
⋮----
# This will raise an exception if the stream is invalid
first_chunk = await anext(stream_iter)
⋮----
# If we reach here, the stream started successfully
# Now recreate a fresh stream from the original API result
# Otherwise, return a new stream that yields the first chunk
# and remaining items
async def combined_stream():  # type: ignore
⋮----
result = combined_stream()  # type: ignore
⋮----
# Empty stream is normal - nothing to do
⋮----
# Any exception here should be raised to trigger the retry mechanism
⋮----
async def _achat_completions_with_backoff(self, **kwargs):  # type: ignore
⋮----
retry_func = async_retry_with_exponential_backoff(
⋮----
"""Prepare args for LLM chat-completion API call"""
⋮----
llm_messages = [
⋮----
llm_messages = messages
⋮----
# TODO: we will unconditionally insert a dummy user msg
# if the only msg is a system msg.
# We could make this conditional on ModelInfo.needs_first_user_message
⋮----
# some LLMs, notable Gemini as of 12/11/24,
# require the first message to be from the user,
# so insert a dummy user msg if needed.
⋮----
chat_model = self.config.chat_model
⋮----
args: Dict[str, Any] = dict(
⋮----
# groq fails when we include stream_options in the request
⋮----
# get token-usage numbers in stream mode from OpenAI API,
# and possibly other OpenAI-compatible APIs.
⋮----
# only include functions-related args if functions are provided
# since the OpenAI API will throw an error if `functions` is None or []
⋮----
# some models e.g. o1-mini (as of sep 2024) don't support some params,
# like temperature and stream, so we need to remove them.
⋮----
param_rename_map = self.rename_params()
⋮----
# finally, get rid of extra_body params exclusive to certain models
# Only apply allowlist restrictions for known models.
# Unknown/custom models are allowed to use all params by default.
is_known_model = self.info().name != "unknown"
extra_params = args.get("extra_body", {})
⋮----
# openAI response will look like this:
"""
        {
            "id": "chatcmpl-123",
            "object": "chat.completion",
            "created": 1677652288,
            "choices": [{
                "index": 0,
                "message": {
                    "role": "assistant",
                    "name": "",
                    "content": "\n\nHello there, how may I help you?",
                    "reasoning_content": "Okay, let's see here, hmmm...",
                    "function_call": {
                        "name": "fun_name",
                        "arguments: {
                            "arg1": "val1",
                            "arg2": "val2"
                        }
                    },
                },
                "finish_reason": "stop"
            }],
            "usage": {
                "prompt_tokens": 9,
                "completion_tokens": 12,
                "total_tokens": 21
            }
        }
        """
choices = response.get("choices")
⋮----
message = choices[0].get("message", {})
⋮----
message = {}
⋮----
content = message.get("content", "")
reasoning = message.get("reasoning_content", "")
⋮----
message_with_reasoning = content
⋮----
msg = content
⋮----
fun_call = None
⋮----
fun_call = LLMFunctionCall.from_dict(message["function_call"])
⋮----
args_str = message["function_call"]["arguments"] or ""
msg_str = message["content"] or ""
msg = msg_str + args_str
oai_tool_calls = None
⋮----
oai_tool_calls = []
⋮----
tool_call = OpenAIToolCall.from_dict(tool_call_dict)
⋮----
msg = msg + "\n" + json.dumps(tool_call_dict)
⋮----
oai_tool_calls=oai_tool_calls or None,  # don't allow empty list [] here
⋮----
"""
        ChatCompletion API call to OpenAI.
        Args:
            messages: list of messages  to send to the API, typically
                represents back and forth dialogue between user and LLM, but could
                also include "function"-role messages. If messages is a string,
                it is assumed to be a user message.
            max_tokens: max output tokens to generate
            functions: list of LLMFunction specs available to the LLM, to possibly
                use in its response
            function_call: controls how the LLM uses `functions`:
                - "auto": LLM decides whether to use `functions` or not,
                - "none": LLM blocked from using any function
                - a dict of {"name": "function_name"} which forces the LLM to use
                    the specified function.
        Returns:
            LLMResponse object
        """
args = self._prep_chat_completion(
cached, hashed_key, response = self._chat_completions_with_backoff(**args)  # type: ignore
⋮----
return llm_response  # type: ignore
⋮----
response_dict = response
⋮----
response_dict = response.model_dump()
⋮----
"""
        Async version of _chat(). See that function for details.
        """
⋮----
cached, hashed_key, response = await self._achat_completions_with_backoff(  # type: ignore
</file>

<file path=".pre-commit-config.yaml">
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
  # Ruff version.
  rev: v0.15.6
  hooks:
    - id: ruff
</file>

<file path="pyproject.toml">
[project]
name = "langroid"
version = "0.60.1"
authors = [
    {name = "Prasad Chalasani", email = "pchalasani@gmail.com"},
]
description = "Harness LLMs with Multi-Agent Programming"
readme = "README.md"
license = {text = "MIT"}
requires-python = "<3.13,>=3.10"
dependencies = [
    "adb-cloud-connector<2.0.0,>=1.0.2",
    "aiohttp<4.0.0,>=3.9.1",
    "async-generator<2.0,>=1.10",
    "bs4<1.0.0,>=0.0.1",
    "cerebras-cloud-sdk<2.0.0,>=1.1.0",
    "colorlog<7.0.0,>=6.7.0",
    "docstring-parser<1.0,>=0.16",
    "duckduckgo-search<7.0.0,>=6.0.0",
    "exa-py>=1.8.7",
    "faker<19.0.0,>=18.9.0",
    "fakeredis<3.0.0,>=2.12.1",
    "fastmcp>=2.2.5",
    "fire<1.0.0,>=0.5.0",
    "gitpython<4.0.0,>=3.1.43",
    "google-api-python-client<3.0.0,>=2.95.0",
    "google-genai>=1.0.0",
    "groq<1.0.0,>=0.13.0",
    "grpcio<2.0.0,>=1.62.1",
    "halo<1.0.0,>=0.0.31",
    "jinja2<4.0.0,>=3.1.2",
    "json-repair<1.0.0,>=0.29.9",
    "lxml<6.0.0,>=5.4.0",
    "markdownify>=0.13.1",
    "nest-asyncio<2.0.0,>=1.6.0",
    "nltk<4.0.0,>=3.8.2",
    "onnxruntime<2.0.0,>=1.16.1",
    "openai>=1.61.1,<3.0.0",
    "pandas<3.0.0,>=2.0.3",
    "prettytable<4.0.0,>=3.8.0",
    "pydantic<3.0.0,>=2.0.0",
    "pydantic-settings<3.0.0,>=2.0.0",
    "pygithub<2.0.0,>=1.58.1",
    "pygments<3.0.0,>=2.15.1",
    "pymupdf4llm<0.1.0,>=0.0.17",
    "pyparsing<4.0.0,>=3.0.9",
    "pytest-rerunfailures<16.0,>=15.0",
    "python-dotenv>=1.0.0,<2.0.0",
    "python-magic<1.0.0,>=0.4.27",
    "pyyaml<7.0.0,>=6.0.1",
    "qdrant-client<2.0.0,>=1.8.0",
    "rank-bm25<1.0.0,>=0.2.2",
    "redis<6.0.0,>=5.0.1",
    "requests<3.0.0,>=2.31.0",
    "requests-oauthlib<2.0.0,>=1.3.1",
    "rich<14.0.0,>=13.3.4",
    "thefuzz<1.0.0,>=0.20.0",
    "tiktoken<1.0.0,>=0.7.0",
    "trafilatura>=2.0.0,<3.0.0",
    "typer<1.0.0,>=0.9.0",
    "wget<4.0,>=3.2",
]

[project.optional-dependencies]
doc-chat = [
    "docling<3.0.0,>=2.20.0",
    "pdf2image<2.0.0,>=1.17.0",
    "pymupdf4llm<0.1.0,>=0.0.17",
    "pymupdf<2.0.0,>=1.23.3",
    "pypdf>=5.1.0",
    "pytesseract<0.4.0,>=0.3.10",
    "python-docx<2.0.0,>=1.1.0",
    "unstructured[docx,pdf,pptx]<1.0.0,>=0.16.15",
    "marker-pdf"
]

hf-transformers = [
    "sentence-transformers<3.0.0,>=2.2.2",
    "torch<3.0.0,>=2.0.0",
    "transformers<5.0.0,>=4.40.1",
    "huggingface-hub<1.0.0,>=0.21.2",    
]

vecdbs = [
    "lancedb<0.9.0,>=0.8.2",
    "tantivy<0.22.0,>=0.21.0",
    "pyarrow<16.0.0,>=15.0.0",
    "chromadb<=0.4.23,>=0.4.21",
    "weaviate-client>=4.9.6",
    "pinecone-client>=5.0.1",
]

db = [
    "sqlalchemy<3.0.0,>=2.0.19",
    "psycopg2<3.0.0,>=2.9.7",
    "psycopg2-binary>=2.9.10",
    "pymysql<2.0.0,>=1.1.0",
]

all = [
    "pdf2image<2.0.0,>=1.17.0",
    "pymupdf<2.0.0,>=1.23.3",
    "pymupdf4llm<0.1.0,>=0.0.17",
    "docling<3.0.0,>=2.16.0",
    "pypdf>=5.1.0",
    "pytesseract<0.4.0,>=0.3.10",
    "python-docx<2.0.0,>=1.1.0",
    "unstructured[docx,pdf,pptx]<1.0.0,>=0.16.15",
    "sqlalchemy<3.0.0,>=2.0.19",
    "psycopg2<3.0.0,>=2.9.7",
    "pymysql<2.0.0,>=1.1.0",
    "sentence-transformers<3.0.0,>=2.2.2",
    "torch<3.0.0,>=2.0.0",
    "transformers<5.0.0,>=4.40.1",
    "huggingface-hub<1.0.0,>=0.21.2",
    "chromadb<=0.4.23,>=0.4.21",
    "weaviate-client>=4.9.6",
    "metaphor-python<0.2.0,>=0.1.23",
    "neo4j<6.0.0,>=5.14.1",
    "python-arango<9.0.0,>=8.1.2",
    "arango-datasets<2.0.0,>=1.2.2",
    "litellm<2.0.0,>=1.30.1",
    "chainlit<3.0.0,>=2.0.1",
    "python-socketio<6.0.0,>=5.11.0",
    "fastembed<0.4.0,>=0.3.1",
    "pgvector>=0.3.6",
    "psycopg2-binary>=2.9.10",
    "marker-pdf",
    "seltz>=0.2.0",
]

# More granular groupings
lancedb = [
    "lancedb<0.9.0,>=0.8.2",
    "tantivy<0.22.0,>=0.21.0",
    "pyarrow<16.0.0,>=15.0.0",
]

docling = [
    "docling<3.0.0,>=2.16.0",
]

pymupdf4llm = [
    "pymupdf4llm<0.1.0,>=0.0.17",
]

pdf-parsers = [
    "docling<3.0.0,>=2.16.0",
    "pypdf>=5.1.0",
    "pymupdf<2.0.0,>=1.23.3",
    "pymupdf4llm<0.1.0,>=0.0.17",
    "pdf2image<2.0.0,>=1.17.0",
    "pytesseract<0.4.0,>=0.3.10",
    "markitdown[docx,xlsx,pptx]>=0.0.1a3",
    "marker-pdf",
]

docx = [
    "python-docx<2.0.0,>=1.1.0",
]

markitdown = [
    "markitdown[docx,xlsx,pptx]>=0.0.1a3",
]

marker-pdf = [
    "marker-pdf[full]>=1.6.0; sys_platform != 'darwin' or platform_machine != 'x86_64'",
    "opencv-python>=4.11.0.86",
]

scrapy = [
    "scrapy<3.0.0,>=2.11.0",
]

hf-embeddings = [
    "sentence-transformers<3.0.0,>=2.2.2",
    "torch<3.0.0,>=2.0.0",
]

transformers = [
    "transformers<5.0.0,>=4.40.1",
    "huggingface-hub<1.0.0,>=0.21.2",
    "torch<3.0.0,>=2.0.0",
]

unstructured = [
    "unstructured[docx,pdf,pptx]<1.0.0,>=0.16.15",
]

postgres = [
    "pgvector>=0.3.6",
    "psycopg2<3.0.0,>=2.9.7",
    "psycopg2-binary>=2.9.10",
    "sqlalchemy<3.0.0,>=2.0.19",
]

mysql = [
    "pymysql<2.0.0,>=1.1.0",
]

sql = [
    "sqlalchemy<3.0.0,>=2.0.19",
    "pymysql<2.0.0,>=1.1.0",
    "psycopg2<3.0.0,>=2.9.7",
]

litellm = [
    "litellm<2.0.0,>=1.30.1",
]

neo4j = [
    "neo4j<6.0.0,>=5.14.1",
]

arango = [
    "python-arango<9.0.0,>=8.1.2",
    "arango-datasets<2.0.0,>=1.2.2",
]

metaphor = [
    "metaphor-python<0.2.0,>=0.1.23",
]

exa = [
    "exa-py>=1.8.7",
]

tavily = [
    "tavily-python>=0.5.0",
]

seltz = [
    "seltz>=0.2.0",
]

chainlit = [
    "chainlit<3.0.0,>=2.0.1",    
    "python-socketio<6.0.0,>=5.11.0",
]

chromadb = [
    "chromadb<=0.4.23,>=0.4.21",
]
weaviate = [
    "weaviate-client>=4.9.6",
]

meilisearch = [
    "meilisearch-python-sdk<3.0.0,>=2.2.3",
]

fastembed = [
    "fastembed<0.4.0,>=0.3.1",
]
google-genai = [
    "google-genai>=1.0.0",
]

google-generativeai = [
    "google-genai>=1.0.0",
]
doc-parsers = [
    "markitdown[docx,xlsx,pptx]>=0.0.1a3",
    "openpyxl>=3.1.5",
    "python-docx>=1.1.2",
    "python-pptx>=1.0.2",
    "xlrd>=2.0.1",
]

pinecone = [
    "pinecone-client>=5.0.1"
]
asyncio = [
    "asyncio>=3.4.3",
]
firecrawl = [
    "firecrawl-py>=1.13.5",
]
crawl4ai = [
    "crawl4ai>=0.6.3",
]


[dependency-groups]
dev = [
    "black[jupyter]>=24.3.0,<25.0.0",
    "flake8<7.0.0,>=6.0.0",
    "mypy<2.0.0,>=1.11.2",
    "ruff<1.0.0,>=0.8.4",
    "pre-commit<4.0.0,>=3.3.2",
    "autopep8<3.0.0,>=2.0.2",
    "types-python-dateutil>=2.8.0",
    "types-redis<5.0.0.0,>=4.5.5.2",
    "types-requests<3.0.0.0,>=2.31.0.1",
    "types-pyyaml<7.0.0.0,>=6.0.12.20240311",
    "types-pillow<11.0.0.0,>=10.2.0.20240406",
    "pytest<8.0.0,>=7.3.1",
    "pytest-redis<4.0.0,>=3.0.2",
    "pytest-asyncio<1.0.0,>=0.21.1",
    "pytest-postgresql<6.0.0,>=5.0.0",
    "coverage<8.0.0,>=7.2.5",
    "pytest-xdist<4.0.0,>=3.6.1",
    "pytest-timeout<3.0.0,>=2.3.1",
    "pytest-cov<6.0.0,>=5.0.0",
    "docker<8.0.0,>=7.1.0",
    "commitizen>=4.1.0",
    "pytest-mysql>=3.1.0",
]
docs = [
    "mkdocs<2.0.0,>=1.4.2",
    "mkdocs-material<10.0.0,>=9.1.5",
    "mkdocstrings[python]<1.0.0,>=0.25.2",
    "mkdocs-awesome-pages-plugin<3.0.0,>=2.8.0",
    "mkdocs-rss-plugin<2.0.0,>=1.8.0",
    "mkdocs-gen-files<1.0.0,>=0.4.0",
    "mkdocs-literate-nav<1.0.0,>=0.6.0",
    "mkdocs-section-index<1.0.0,>=0.3.5",
    "mkdocs-jupyter<1.0.0,>=0.24.1",
    "nbconvert>=7.17.0",
    "griffe<1.0.0",
]


[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"


[tool.hatch.build]
only-packages = true
include = ["langroid/py.typed", "langroid/"]
exclude = [
    "tests/",
    "examples/",
    "**/__pycache__",
    "**/*.pyc",
    "**/node_modules/**",
]

[tool.black]
line-length = 88
include = '\.pyi?$'
# extend-exclude = '.*pyi$'
# exclude = '^stubs/'

[tool.pytype]
inputs = ["langroid"]

[tool.mypy]
python_version = "3.11"
#mypy_path = ["stubs"]

#follow_imports = "skip"
#check_untyped_defs = true
disallow_untyped_defs = true
ignore_missing_imports = true
warn_unused_ignores = false
strict = true
exclude = [
    "docs", ".venv", "venv", "examples", "examples_dev", "langroid/utils/web",
    "notebooks",
    "langroid/parsing/repo_loader.py",
    "langroid/embedding_models/clustering.py",
    "langroid/agent/callbacks/chainlit.py",
    "langroid/vector_store/chromadb.py",
    "langroid/embedding_models/protoc" # ignore generated files
]
files=["langroid/*"]
plugins = [
    "pydantic.mypy"
]

[tool.ruff]
line-length = 88
# Allow unused variables when underscore-prefixed.
lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
lint.select = [
    "E",  # pycodestyle
    "F",  # pyflakes
    "I",  # isort
]
lint.exclude = ["docs/**", ".venv", "venv", "examples/**", "examples_dev", "langroid/utils/web", "notebooks", "__init__.py", "langroid/embedding_models/protoc/*"]
lint.fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
lint.unfixable = []
lint.extend-ignore = ["F821"]
# Assume Python 3.11.
target-version = "py311"

[tool.pytest.ini_options]
filterwarnings = ["ignore::DeprecationWarning"]


[tool.commitizen]
name = "cz_conventional_commits"
tag_format = "$version"
version_scheme = "semver"
version_provider = "pep621"
major_version_zero = true
</file>

</files>
