FROM gemma4:26b-a4b-it-q8_0

# Default context — kept small for fast startup. PRE sends num_ctx
# per-request and scales up dynamically as conversations grow.
PARAMETER num_ctx 8192

# Larger batch size for faster prompt evaluation (prefill)
PARAMETER num_batch 512

# Sampling — match Google's upstream defaults, add min_p for diversity
PARAMETER temperature 1.0
PARAMETER top_k 64
PARAMETER top_p 0.95
PARAMETER min_p 0.05

# Repetition suppression — prevents degenerate loops
PARAMETER repeat_penalty 1.1
PARAMETER repeat_last_n 256
