config backend=cuda model_dir=/home/ckl/.cache/modelscope/hub/models/Qwen/Qwen3-0.6B train_steps=500 rollout_len=8 decode_len=16 lr=1.000000012e-7 default_lr=4.999999874e-5 grad_clip=1 perturb_scale=0.001 perturb_seed=0x0f0dcafe20260521 safety_first_step_max_seconds=0.5
prompt split=train index=0 ids=[1, 872, 198, 3456]
prompt split=train index=1 ids=[1, 198, 1512, 429]
prompt split=train index=2 ids=[1, 770, 3186, 25, 220]
prompt split=train index=3 ids=[1, 644, 374, 279, 1887]
prompt split=train index=4 ids=[1, 3838, 374, 264, 2077, 13]
prompt split=train index=5 ids=[1, 785, 594, 287, 374, 1690]
prompt split=train index=6 ids=[1, 3347, 11, 358, 1052, 429]
prompt split=train index=7 ids=[1, 2610, 527, 1139, 304, 279, 1670]
prompt split=heldout index=0 ids=[1, 4438, 374, 279, 2768]
prompt split=heldout index=1 ids=[1, 1516, 374, 264, 1296, 4339]
prompt split=heldout index=2 ids=[1, 785, 1401, 315, 279, 1967]
prompt split=heldout index=3 ids=[1, 3198, 279, 1296, 25, 220]
model_summary hidden=1024 intermediate=3072 layers=28 vocab=151936 num_heads=16 num_kv_heads=8 head_dim=128 tie_word_embeddings=true rope_theta=1000000 teacher_param_elements=601292800 student_model_elements=601292800 student_trainable_elements=596049920 teacher_load_seconds=7.155151 student_load_seconds=6.646846
eval_detail step=0 split=train prompt_index=0 prompt=[1, 872, 198, 3456] teacher_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 442] student_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 442] overlap_pct=100.000000 kl=5.621485744854e-3
eval_detail step=0 split=train prompt_index=1 prompt=[1, 198, 1512, 429] teacher_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 2055, 11, 369] student_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 2055, 11, 369] overlap_pct=100.000000 kl=1.115262489178e-2
eval_detail step=0 split=train prompt_index=2 prompt=[1, 770, 3186, 25, 220] teacher_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] student_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] overlap_pct=100.000000 kl=1.497549897173e-2
eval_detail step=0 split=train prompt_index=3 prompt=[1, 644, 374, 279, 1887] teacher_suffix=[2874, 369, 279, 31559, 315, 279, 8457, 11, 323, 279, 1887, 1815, 28469, 9363, 525, 279] student_suffix=[2874, 369, 279, 18003, 315, 279, 1156, 323, 2086, 2188, 315, 279, 1849, 315, 279, 8453] overlap_pct=31.250000 kl=1.330972498486e-2
eval_detail step=0 split=train prompt_index=4 prompt=[1, 3838, 374, 264, 2077, 13] teacher_suffix=[7521, 481, 362, 5714, 374, 264, 897, 429, 374, 5927, 553, 264, 729, 13, 1752, 3110] student_suffix=[3555, 374, 264, 5714, 30, 3555, 374, 264, 5714, 30, 3555, 374, 264, 5714, 30, 3555] overlap_pct=6.250000 kl=1.736336546338e-2
eval_detail step=0 split=train prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] teacher_suffix=[1, 374, 264, 4647, 429, 702, 1012, 1483, 311, 7512, 279, 1251, 315, 279, 7748, 1186] student_suffix=[1, 374, 264, 4647, 429, 19257, 311, 279, 1251, 315, 279, 5537, 315, 279, 7748, 21575] overlap_pct=56.250000 kl=2.734693989953e-2
eval_detail step=0 split=train prompt_index=6 prompt=[1, 3347, 11, 358, 1052, 429] teacher_suffix=[358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490] student_suffix=[358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490] overlap_pct=100.000000 kl=1.167650551419e-2
eval_detail step=0 split=train prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] teacher_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] student_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] overlap_pct=100.000000 kl=1.322513795018e-2
eval_detail step=0 split=heldout prompt_index=0 prompt=[1, 4438, 374, 279, 2768] teacher_suffix=[315, 279, 3364, 13, 576, 3364, 374, 911, 279, 11618, 315, 264, 3908, 3743, 6941, 444] student_suffix=[315, 279, 3491, 13, 576, 3491, 374, 311, 1477, 279, 897, 315, 279, 7493, 13, 576] overlap_pct=31.250000 kl=1.923349009451e-2
eval_detail step=0 split=heldout prompt_index=1 prompt=[1, 1516, 374, 264, 1296, 4339] teacher_suffix=[291, 504, 279, 1467, 11, 714, 279, 1467, 374, 537, 304, 6364, 13, 5209, 14683, 279] student_suffix=[291, 504, 279, 2701, 1467, 25, 4710, 785, 2701, 374, 264, 1140, 315, 279, 1429, 3213] overlap_pct=18.750000 kl=1.733289758898e-2
eval_detail step=0 split=heldout prompt_index=2 prompt=[1, 785, 1401, 315, 279, 1967] teacher_suffix=[43059, 21938, 315, 7148, 1, 374, 264, 11245, 2168, 429, 702, 1012, 13570, 14975, 304, 279] student_suffix=[43059, 21938, 315, 7148, 1, 374, 264, 11245, 37250, 304, 279, 6424, 13, 1084, 374, 264] overlap_pct=50.000000 kl=4.475668043414e-2
eval_detail step=0 split=heldout prompt_index=3 prompt=[1, 3198, 279, 1296, 25, 220] teacher_suffix=[16, 24, 24, 15, 82, 11, 220, 17, 15, 15, 15, 82, 11, 220, 17, 15] student_suffix=[16, 24, 24, 15, 82, 11, 220, 17, 15, 15, 15, 82, 11, 220, 17, 15] overlap_pct=100.000000 kl=5.589429209807e-3
eval_summary step=0 train_overlap_pct=74.218750 heldout_overlap_pct=50.000000 train_kl=1.433391042756e-2 heldout_kl=2.172812433186e-2 eval_seconds=8.675556
train_step step=1 prompt_index=0 prompt=[1, 872, 198, 3456] loss=1.788745430531e-5 rollout_len=12 step_seconds=0.260222
train_step step=10 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.805938104051e-5 rollout_len=12 step_seconds=0.293312
train_step step=20 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.934411506634e-5 rollout_len=13 step_seconds=0.295141
train_step step=30 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.520914106048e-5 rollout_len=14 step_seconds=0.296294
train_step step=40 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=3.992492929683e-5 rollout_len=15 step_seconds=0.298195
train_step step=50 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.804211155511e-5 rollout_len=12 step_seconds=0.293395
eval_detail step=50 split=train prompt_index=0 prompt=[1, 872, 198, 3456] teacher_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 442] student_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 442] overlap_pct=100.000000 kl=4.205619347186e-3
eval_detail step=50 split=train prompt_index=1 prompt=[1, 198, 1512, 429] teacher_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 2055, 11, 369] student_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 2055, 11, 369] overlap_pct=100.000000 kl=9.303841193494e-3
eval_detail step=50 split=train prompt_index=2 prompt=[1, 770, 3186, 25, 220] teacher_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] student_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] overlap_pct=100.000000 kl=7.015143094485e-3
eval_detail step=50 split=train prompt_index=3 prompt=[1, 644, 374, 279, 1887] teacher_suffix=[2874, 369, 279, 31559, 315, 279, 8457, 11, 323, 279, 1887, 1815, 28469, 9363, 525, 279] student_suffix=[2874, 369, 279, 18003, 315, 279, 1156, 323, 2086, 2188, 315, 279, 1849, 315, 279, 8453] overlap_pct=31.250000 kl=1.266151387503e-2
eval_detail step=50 split=train prompt_index=4 prompt=[1, 3838, 374, 264, 2077, 13] teacher_suffix=[7521, 481, 362, 5714, 374, 264, 897, 429, 374, 5927, 553, 264, 729, 13, 1752, 3110] student_suffix=[576, 1102, 315, 264, 729, 374, 279, 897, 5927, 553, 279, 729, 1618, 13, 1752, 3110] overlap_pct=18.750000 kl=1.540783205582e-2
eval_detail step=50 split=train prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] teacher_suffix=[1, 374, 264, 4647, 429, 702, 1012, 1483, 311, 7512, 279, 1251, 315, 279, 7748, 1186] student_suffix=[1, 374, 264, 4647, 429, 19257, 311, 279, 1251, 315, 279, 5537, 315, 279, 3082, 315] overlap_pct=50.000000 kl=2.405758219685e-2
eval_detail step=50 split=train prompt_index=6 prompt=[1, 3347, 11, 358, 1052, 429] teacher_suffix=[358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490] student_suffix=[358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490] overlap_pct=100.000000 kl=9.536280958109e-3
eval_detail step=50 split=train prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] teacher_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] student_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] overlap_pct=100.000000 kl=9.135539867783e-3
eval_detail step=50 split=heldout prompt_index=0 prompt=[1, 4438, 374, 279, 2768] teacher_suffix=[315, 279, 3364, 13, 576, 3364, 374, 911, 279, 11618, 315, 264, 3908, 3743, 6941, 444] student_suffix=[315, 279, 3491, 13, 576, 3491, 374, 311, 1477, 279, 897, 315, 279, 7493, 13, 576] overlap_pct=31.250000 kl=1.847108675102e-2
eval_detail step=50 split=heldout prompt_index=1 prompt=[1, 1516, 374, 264, 1296, 4339] teacher_suffix=[291, 504, 279, 1467, 11, 714, 279, 1467, 374, 537, 304, 6364, 13, 5209, 14683, 279] student_suffix=[291, 504, 279, 2701, 1467, 25, 4710, 785, 2701, 374, 264, 1140, 315, 279, 1429, 3213] overlap_pct=18.750000 kl=1.747763554751e-2
eval_detail step=50 split=heldout prompt_index=2 prompt=[1, 785, 1401, 315, 279, 1967] teacher_suffix=[43059, 21938, 315, 7148, 1, 374, 264, 11245, 2168, 429, 702, 1012, 13570, 14975, 304, 279] student_suffix=[43059, 21938, 315, 7148, 1, 374, 264, 11245, 37250, 304, 279, 6424, 13, 1084, 374, 264] overlap_pct=50.000000 kl=4.366684810067e-2
eval_detail step=50 split=heldout prompt_index=3 prompt=[1, 3198, 279, 1296, 25, 220] teacher_suffix=[16, 24, 24, 15, 82, 11, 220, 17, 15, 15, 15, 82, 11, 220, 17, 15] student_suffix=[16, 24, 24, 15, 82, 11, 220, 17, 15, 15, 15, 82, 11, 220, 17, 15] overlap_pct=100.000000 kl=5.384251043790e-3
eval_summary step=50 train_overlap_pct=75.000000 heldout_overlap_pct=50.000000 train_kl=1.141541907359e-2 heldout_kl=2.124995536075e-2 eval_seconds=7.146351
train_step step=60 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.933618427254e-5 rollout_len=13 step_seconds=0.294201
train_step step=70 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.519380334183e-5 rollout_len=14 step_seconds=0.296803
train_step step=80 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=3.990106415586e-5 rollout_len=15 step_seconds=0.298558
train_step step=90 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.803059007623e-5 rollout_len=12 step_seconds=0.292715
train_step step=100 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.932908293791e-5 rollout_len=13 step_seconds=0.293739
eval_detail step=100 split=train prompt_index=0 prompt=[1, 872, 198, 3456] teacher_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 442] student_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 442] overlap_pct=100.000000 kl=3.659647304725e-3
eval_detail step=100 split=train prompt_index=1 prompt=[1, 198, 1512, 429] teacher_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 2055, 11, 369] student_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 2055, 11, 369] overlap_pct=100.000000 kl=8.213714346970e-3
eval_detail step=100 split=train prompt_index=2 prompt=[1, 770, 3186, 25, 220] teacher_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] student_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] overlap_pct=100.000000 kl=5.972806679367e-3
eval_detail step=100 split=train prompt_index=3 prompt=[1, 644, 374, 279, 1887] teacher_suffix=[2874, 369, 279, 31559, 315, 279, 8457, 11, 323, 279, 1887, 1815, 28469, 9363, 525, 279] student_suffix=[2874, 369, 279, 18003, 315, 279, 1156, 323, 2086, 2188, 315, 279, 1849, 315, 279, 8453] overlap_pct=31.250000 kl=1.206390871072e-2
eval_detail step=100 split=train prompt_index=4 prompt=[1, 3838, 374, 264, 2077, 13] teacher_suffix=[7521, 481, 362, 5714, 374, 264, 897, 429, 374, 5927, 553, 264, 729, 13, 1752, 3110] student_suffix=[576, 1102, 315, 264, 729, 374, 279, 897, 5927, 553, 279, 729, 1618, 13, 1752, 3110] overlap_pct=18.750000 kl=1.418225509143e-2
eval_detail step=100 split=train prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] teacher_suffix=[1, 374, 264, 4647, 429, 702, 1012, 1483, 311, 7512, 279, 1251, 315, 279, 7748, 1186] student_suffix=[1, 374, 264, 4647, 429, 19257, 311, 279, 1251, 315, 279, 5537, 315, 279, 3082, 315] overlap_pct=50.000000 kl=2.039082392256e-2
eval_detail step=100 split=train prompt_index=6 prompt=[1, 3347, 11, 358, 1052, 429] teacher_suffix=[358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490] student_suffix=[358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490] overlap_pct=100.000000 kl=8.055084552512e-3
eval_detail step=100 split=train prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] teacher_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] student_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] overlap_pct=100.000000 kl=7.037745849744e-3
eval_detail step=100 split=heldout prompt_index=0 prompt=[1, 4438, 374, 279, 2768] teacher_suffix=[315, 279, 3364, 13, 576, 3364, 374, 911, 279, 11618, 315, 264, 3908, 3743, 6941, 444] student_suffix=[315, 279, 3491, 13, 576, 3491, 374, 311, 1477, 279, 897, 315, 279, 7493, 13, 576] overlap_pct=31.250000 kl=1.781074049738e-2
eval_detail step=100 split=heldout prompt_index=1 prompt=[1, 1516, 374, 264, 1296, 4339] teacher_suffix=[291, 504, 279, 1467, 11, 714, 279, 1467, 374, 537, 304, 6364, 13, 5209, 14683, 279] student_suffix=[291, 504, 279, 2701, 1467, 25, 4710, 785, 1156, 3019, 304, 279, 1882, 315, 11220, 264] overlap_pct=25.000000 kl=1.736794087595e-2
eval_detail step=100 split=heldout prompt_index=2 prompt=[1, 785, 1401, 315, 279, 1967] teacher_suffix=[43059, 21938, 315, 7148, 1, 374, 264, 11245, 2168, 429, 702, 1012, 13570, 14975, 304, 279] student_suffix=[43059, 21938, 315, 7148, 1, 374, 264, 11245, 2168, 429, 702, 1012, 13570, 14975, 304, 279] overlap_pct=100.000000 kl=4.304753649745e-2
eval_detail step=100 split=heldout prompt_index=3 prompt=[1, 3198, 279, 1296, 25, 220] teacher_suffix=[16, 24, 24, 15, 82, 11, 220, 17, 15, 15, 15, 82, 11, 220, 17, 15] student_suffix=[16, 24, 24, 15, 82, 11, 220, 17, 15, 15, 15, 82, 11, 220, 17, 15] overlap_pct=100.000000 kl=5.235718316689e-3
eval_summary step=100 train_overlap_pct=75.000000 heldout_overlap_pct=64.062500 train_kl=9.946998307254e-3 heldout_kl=2.086548404687e-2 eval_seconds=7.208395
train_step step=110 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.518353696563e-5 rollout_len=14 step_seconds=0.296126
train_step step=120 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=3.988657408627e-5 rollout_len=15 step_seconds=0.296949
train_step step=130 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.802291030297e-5 rollout_len=12 step_seconds=0.294144
train_step step=140 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.932326944778e-5 rollout_len=13 step_seconds=0.296181
train_step step=150 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.460746665951e-5 rollout_len=14 step_seconds=0.296160
train_step step=160 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=3.987666786998e-5 rollout_len=15 step_seconds=0.299387
train_step step=170 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.801651473623e-5 rollout_len=12 step_seconds=0.295862
train_step step=180 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.931833998649e-5 rollout_len=13 step_seconds=0.296240
train_step step=190 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.460099833319e-5 rollout_len=14 step_seconds=0.297522
train_step step=200 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=3.987005038653e-5 rollout_len=15 step_seconds=0.301596
eval_detail step=200 split=train prompt_index=0 prompt=[1, 872, 198, 3456] teacher_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 442] student_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 442] overlap_pct=100.000000 kl=3.029544466357e-3
eval_detail step=200 split=train prompt_index=1 prompt=[1, 198, 1512, 429] teacher_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 2055, 11, 369] student_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 2055, 11, 369] overlap_pct=100.000000 kl=7.028778132486e-3
eval_detail step=200 split=train prompt_index=2 prompt=[1, 770, 3186, 25, 220] teacher_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] student_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] overlap_pct=100.000000 kl=4.758022292311e-3
eval_detail step=200 split=train prompt_index=3 prompt=[1, 644, 374, 279, 1887] teacher_suffix=[2874, 369, 279, 31559, 315, 279, 8457, 11, 323, 279, 1887, 1815, 28469, 9363, 525, 279] student_suffix=[2874, 369, 279, 18003, 315, 279, 1156, 323, 2086, 2188, 315, 279, 1849, 315, 279, 8453] overlap_pct=31.250000 kl=1.117112802584e-2
eval_detail step=200 split=train prompt_index=4 prompt=[1, 3838, 374, 264, 2077, 13] teacher_suffix=[7521, 481, 362, 5714, 374, 264, 897, 429, 374, 5927, 553, 264, 729, 13, 1752, 3110] student_suffix=[576, 1102, 315, 264, 729, 374, 279, 897, 5927, 553, 279, 729, 1618, 13, 1752, 3110] overlap_pct=18.750000 kl=1.316431258812e-2
eval_detail step=200 split=train prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] teacher_suffix=[1, 374, 264, 4647, 429, 702, 1012, 1483, 311, 7512, 279, 1251, 315, 279, 7748, 1186] student_suffix=[1, 374, 264, 4647, 429, 702, 1012, 1483, 311, 7512, 279, 1251, 315, 279, 7748, 1186] overlap_pct=100.000000 kl=1.610525876044e-2
eval_detail step=200 split=train prompt_index=6 prompt=[1, 3347, 11, 358, 1052, 429] teacher_suffix=[358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490] student_suffix=[358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490] overlap_pct=100.000000 kl=6.487489959923e-3
eval_detail step=200 split=train prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] teacher_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] student_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] overlap_pct=100.000000 kl=5.605665945048e-3
eval_detail step=200 split=heldout prompt_index=0 prompt=[1, 4438, 374, 279, 2768] teacher_suffix=[315, 279, 3364, 13, 576, 3364, 374, 911, 279, 11618, 315, 264, 3908, 3743, 6941, 444] student_suffix=[315, 279, 3491, 13, 576, 3491, 374, 311, 1477, 279, 897, 315, 279, 7493, 13, 576] overlap_pct=31.250000 kl=1.688688419578e-2
eval_detail step=200 split=heldout prompt_index=1 prompt=[1, 1516, 374, 264, 1296, 4339] teacher_suffix=[291, 504, 279, 1467, 11, 714, 279, 1467, 374, 537, 304, 6364, 13, 5209, 14683, 279] student_suffix=[291, 504, 279, 2701, 1467, 25, 4710, 785, 1156, 3019, 304, 279, 1882, 315, 11220, 264] overlap_pct=25.000000 kl=1.696500697340e-2
eval_detail step=200 split=heldout prompt_index=2 prompt=[1, 785, 1401, 315, 279, 1967] teacher_suffix=[43059, 21938, 315, 7148, 1, 374, 264, 11245, 2168, 429, 702, 1012, 13570, 14975, 304, 279] student_suffix=[43059, 21938, 315, 7148, 1, 374, 264, 11245, 2168, 429, 702, 1012, 13570, 14975, 304, 279] overlap_pct=100.000000 kl=4.213957887355e-2
eval_detail step=200 split=heldout prompt_index=3 prompt=[1, 3198, 279, 1296, 25, 220] teacher_suffix=[16, 24, 24, 15, 82, 11, 220, 17, 15, 15, 15, 82, 11, 220, 17, 15] student_suffix=[16, 24, 24, 15, 82, 11, 220, 17, 15, 15, 15, 82, 11, 220, 17, 15] overlap_pct=100.000000 kl=5.029617609293e-3
eval_summary step=200 train_overlap_pct=81.250000 heldout_overlap_pct=64.062500 train_kl=8.418775021317e-3 heldout_kl=2.025527191301e-2 eval_seconds=7.243566
train_step step=210 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.801160710282e-5 rollout_len=12 step_seconds=0.294861
train_step step=220 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.931415267289e-5 rollout_len=13 step_seconds=0.295140
train_step step=230 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.459595245658e-5 rollout_len=14 step_seconds=0.296448
train_step step=240 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=3.986467709183e-5 rollout_len=15 step_seconds=0.298035
train_step step=250 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.800778722507e-5 rollout_len=12 step_seconds=0.294032
train_step step=260 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.931055834983e-5 rollout_len=13 step_seconds=0.296614
train_step step=270 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.459165236563e-5 rollout_len=14 step_seconds=0.296171
train_step step=280 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=3.986032970715e-5 rollout_len=15 step_seconds=0.296461
train_step step=290 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.800457852776e-5 rollout_len=12 step_seconds=0.292815
train_step step=300 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.930728416890e-5 rollout_len=13 step_seconds=0.294462
train_step step=310 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.458833089098e-5 rollout_len=14 step_seconds=0.297524
train_step step=320 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=3.985686998931e-5 rollout_len=15 step_seconds=0.298501
train_step step=330 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.800167905865e-5 rollout_len=12 step_seconds=0.295743
train_step step=340 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.930441744160e-5 rollout_len=13 step_seconds=0.295527
train_step step=350 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.458525679889e-5 rollout_len=14 step_seconds=0.297101
train_step step=360 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=3.985385774286e-5 rollout_len=15 step_seconds=0.299497
train_step step=370 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.799922706094e-5 rollout_len=12 step_seconds=0.294285
train_step step=380 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.930222737836e-5 rollout_len=13 step_seconds=0.296161
train_step step=390 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.458251740085e-5 rollout_len=14 step_seconds=0.297257
train_step step=400 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=3.985144576291e-5 rollout_len=15 step_seconds=0.299399
train_step step=410 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.799694604822e-5 rollout_len=12 step_seconds=0.295931
train_step step=420 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.929993545171e-5 rollout_len=13 step_seconds=0.295656
train_step step=430 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.458029823378e-5 rollout_len=14 step_seconds=0.298232
train_step step=440 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=3.984901923104e-5 rollout_len=15 step_seconds=0.301244
train_step step=450 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.799506157520e-5 rollout_len=12 step_seconds=0.295678
train_step step=460 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.929785816581e-5 rollout_len=13 step_seconds=0.298644
train_step step=470 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.457855564193e-5 rollout_len=14 step_seconds=0.300322
train_step step=480 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=3.984706199844e-5 rollout_len=15 step_seconds=0.300049
train_step step=490 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.799332262133e-5 rollout_len=12 step_seconds=0.296596
train_step step=500 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.929619560950e-5 rollout_len=13 step_seconds=0.297693
eval_detail step=500 split=train prompt_index=0 prompt=[1, 872, 198, 3456] teacher_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 442] student_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 442] overlap_pct=100.000000 kl=2.264052394804e-3
eval_detail step=500 split=train prompt_index=1 prompt=[1, 198, 1512, 429] teacher_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 2055, 11, 369] student_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 2055, 11, 369] overlap_pct=100.000000 kl=5.495318545835e-3
eval_detail step=500 split=train prompt_index=2 prompt=[1, 770, 3186, 25, 220] teacher_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] student_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] overlap_pct=100.000000 kl=3.300165340592e-3
eval_detail step=500 split=train prompt_index=3 prompt=[1, 644, 374, 279, 1887] teacher_suffix=[2874, 369, 279, 31559, 315, 279, 8457, 11, 323, 279, 1887, 1815, 28469, 9363, 525, 279] student_suffix=[2874, 369, 279, 18003, 315, 279, 1156, 323, 2086, 2188, 315, 279, 1849, 315, 279, 8453] overlap_pct=31.250000 kl=9.850565967708e-3
eval_detail step=500 split=train prompt_index=4 prompt=[1, 3838, 374, 264, 2077, 13] teacher_suffix=[7521, 481, 362, 5714, 374, 264, 897, 429, 374, 5927, 553, 264, 729, 13, 1752, 3110] student_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] overlap_pct=0.000000 kl=1.240619213162e-2
eval_detail step=500 split=train prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] teacher_suffix=[1, 374, 264, 4647, 429, 702, 1012, 1483, 311, 7512, 279, 1251, 315, 279, 7748, 1186] student_suffix=[1, 374, 264, 4647, 429, 702, 1012, 1483, 311, 7512, 279, 1251, 315, 279, 7748, 1186] overlap_pct=100.000000 kl=1.257756216943e-2
eval_detail step=500 split=train prompt_index=6 prompt=[1, 3347, 11, 358, 1052, 429] teacher_suffix=[358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490] student_suffix=[358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490] overlap_pct=100.000000 kl=4.824963664762e-3
eval_detail step=500 split=train prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] teacher_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] student_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] overlap_pct=100.000000 kl=4.096562580926e-3
eval_detail step=500 split=heldout prompt_index=0 prompt=[1, 4438, 374, 279, 2768] teacher_suffix=[315, 279, 3364, 13, 576, 3364, 374, 911, 279, 11618, 315, 264, 3908, 3743, 6941, 444] student_suffix=[315, 279, 3491, 13, 576, 3491, 374, 311, 1477, 279, 897, 315, 279, 7493, 13, 576] overlap_pct=31.250000 kl=1.575100311769e-2
eval_detail step=500 split=heldout prompt_index=1 prompt=[1, 1516, 374, 264, 1296, 4339] teacher_suffix=[291, 504, 279, 1467, 11, 714, 279, 1467, 374, 537, 304, 6364, 13, 5209, 14683, 279] student_suffix=[291, 504, 279, 2701, 1467, 25, 4710, 785, 1156, 3019, 304, 279, 1882, 315, 11220, 264] overlap_pct=25.000000 kl=1.589673028514e-2
eval_detail step=500 split=heldout prompt_index=2 prompt=[1, 785, 1401, 315, 279, 1967] teacher_suffix=[43059, 21938, 315, 7148, 1, 374, 264, 11245, 2168, 429, 702, 1012, 13570, 14975, 304, 279] student_suffix=[43059, 21938, 315, 7148, 1, 374, 264, 11245, 2168, 429, 702, 1012, 13570, 14975, 304, 279] overlap_pct=100.000000 kl=4.137502208692e-2
eval_detail step=500 split=heldout prompt_index=3 prompt=[1, 3198, 279, 1296, 25, 220] teacher_suffix=[16, 24, 24, 15, 82, 11, 220, 17, 15, 15, 15, 82, 11, 220, 17, 15] student_suffix=[16, 24, 24, 15, 82, 11, 220, 17, 15, 15, 15, 82, 11, 220, 17, 15] overlap_pct=100.000000 kl=4.533159641555e-3
eval_summary step=500 train_overlap_pct=78.906250 heldout_overlap_pct=64.062500 train_kl=6.851922849460e-3 heldout_kl=1.938897878283e-2 eval_seconds=7.209642
training_summary total_steps=500 total_wall_seconds=176.848619 mean_step_seconds=0.296080 median_step_seconds=0.296389 mean_sampled_loss=3.395399682995e-5 first_sampled_loss=1.788745430531e-5 step200_sampled_loss=3.987005038653e-5 final_sampled_loss=3.929619560950e-5 sampled_loss_reduction_200_pct=-122.893933 sampled_loss_reduction_final_pct=-119.685792 train_kl_reduction_200_pct=41.266725 train_kl_reduction_final_pct=52.197812
summary_eval_row step=0 train_overlap_pct=74.218750 heldout_overlap_pct=50.000000 train_kl=1.433391042756e-2 heldout_kl=2.172812433186e-2
summary_eval_row step=50 train_overlap_pct=75.000000 heldout_overlap_pct=50.000000 train_kl=1.141541907359e-2 heldout_kl=2.124995536075e-2
summary_eval_row step=100 train_overlap_pct=75.000000 heldout_overlap_pct=64.062500 train_kl=9.946998307254e-3 heldout_kl=2.086548404687e-2
summary_eval_row step=200 train_overlap_pct=81.250000 heldout_overlap_pct=64.062500 train_kl=8.418775021317e-3 heldout_kl=2.025527191301e-2
summary_eval_row step=500 train_overlap_pct=78.906250 heldout_overlap_pct=64.062500 train_kl=6.851922849460e-3 heldout_kl=1.938897878283e-2
