# ghcr.io/ingero-io/vllm-ingero: a drop-in vLLM OpenAI server image with
# the Ingero agent baked in. Run it exactly like vllm/vllm-openai, but
# add --privileged --pid=host so the agent's eBPF uprobes can see vLLM's
# CUDA + NCCL activity. Without those, the image still serves vLLM; the
# agent degrades to a no-op (see entrypoint.sh).
#
#   docker run --gpus all --privileged --pid=host -p 8000:8000 \
#     ghcr.io/ingero-io/vllm-ingero:latest --model facebook/opt-125m
#
# Build args pin both halves so a published tag is reproducible.
ARG VLLM_TAG=v0.6.6
ARG AGENT_IMAGE=ghcr.io/ingero-io/ingero:latest

FROM ${AGENT_IMAGE} AS agent

FROM vllm/vllm-openai:${VLLM_TAG}
# The agent is a single static binary; copy it out of the published
# agent image so we never rebuild the eBPF objects here.
COPY --from=agent /usr/local/bin/ingero /usr/local/bin/ingero
COPY entrypoint.sh /usr/local/bin/ingero-vllm-entrypoint.sh
RUN chmod +x /usr/local/bin/ingero-vllm-entrypoint.sh

# Agent trace DB. Mount a volume here to persist traces across runs.
ENV INGERO_DB=/var/lib/ingero/vllm.db
VOLUME /var/lib/ingero

# The wrapper starts the agent (best-effort) then execs the vLLM
# OpenAI API server with all passed arguments.
ENTRYPOINT ["/usr/local/bin/ingero-vllm-entrypoint.sh"]
