{% extends "base.html" %} {% block content %}
{% include "partials/evaluate_subnav.html" %}

LLM Evaluation Dashboard

{{ status.status|upper }}
Run #{{ status.run_info.run_id if status.run_info else status.run_id if status.run_id else '' }} {{ status.run_info.model_name if status.run_info and status.run_info.model_name else '' }}
0/{{ domain_test_counts.values() | sum }}
{% for domain in domains %}
{{ (domain_names or {}).get(domain, domain.replace('_', ' ')) }}
{% for level in range(1, 6) %}
L{{ level }}
-
{% endfor %}
-
0/5
{% endfor %} {% if status.run_info and status.run_info.summary and status.status == 'completed' %} {% set score = status.run_info.overall_score * 100 %} {% set score_color = 'text-emerald-600' if score >= 80 else ('text-amber-600' if score >= 60 else 'text-red-600') %} {% set score_bg = 'from-emerald-500 to-emerald-600' if score >= 80 else ('from-amber-500 to-amber-600' if score >= 60 else 'from-red-500 to-red-600') %} {% set score_badge = 'bg-emerald-100 text-emerald-700 border-emerald-300' if score >= 80 else ('bg-amber-100 text-amber-700 border-amber-300' if score >= 60 else 'bg-red-100 text-red-700 border-red-300') %} {% set score_label = 'Excellent' if score >= 80 else ('Good' if score >= 60 else 'Needs Improvement') %} {% elif status.run_info and status.status == 'interrupted' %}

Evaluation Interrupted

The evaluation was stopped before completion. Partial results are shown in the matrix above.

{% endif %}

Real-Time Logs

{% include 'partials/test-modal.html' %} {% include 'partials/training-modal.html' %} {% endblock %}