config backend=cuda model_dir=/home/ckl/.cache/modelscope/hub/models/Qwen/Qwen3-0.6B train_steps=500 rollout_len=8 decode_len=16 lr=0.00005 grad_clip=1 perturb_scale=0.001 perturb_seed=0x0f0dcafe20260521 safety_first_step_max_seconds=0.5
prompt split=train index=0 ids=[1, 872, 198, 3456]
prompt split=train index=1 ids=[1, 198, 1512, 429]
prompt split=train index=2 ids=[1, 770, 3186, 25, 220]
prompt split=train index=3 ids=[1, 644, 374, 279, 1887]
prompt split=train index=4 ids=[1, 3838, 374, 264, 2077, 13]
prompt split=train index=5 ids=[1, 785, 594, 287, 374, 1690]
prompt split=train index=6 ids=[1, 3347, 11, 358, 1052, 429]
prompt split=train index=7 ids=[1, 2610, 527, 1139, 304, 279, 1670]
prompt split=heldout index=0 ids=[1, 4438, 374, 279, 2768]
prompt split=heldout index=1 ids=[1, 1516, 374, 264, 1296, 4339]
prompt split=heldout index=2 ids=[1, 785, 1401, 315, 279, 1967]
prompt split=heldout index=3 ids=[1, 3198, 279, 1296, 25, 220]
model_summary hidden=1024 intermediate=3072 layers=28 vocab=151936 num_heads=16 num_kv_heads=8 head_dim=128 tie_word_embeddings=true rope_theta=1000000 teacher_param_elements=601292800 student_model_elements=601292800 student_trainable_elements=596049920 teacher_load_seconds=7.193975 student_load_seconds=6.657006
eval_detail step=0 split=train prompt_index=0 prompt=[1, 872, 198, 3456] teacher_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 442] student_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 442] overlap_pct=100.000000 kl=5.621485744854e-3
eval_detail step=0 split=train prompt_index=1 prompt=[1, 198, 1512, 429] teacher_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 2055, 11, 369] student_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 2055, 11, 369] overlap_pct=100.000000 kl=1.115262489178e-2
eval_detail step=0 split=train prompt_index=2 prompt=[1, 770, 3186, 25, 220] teacher_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] student_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] overlap_pct=100.000000 kl=1.497549897173e-2
eval_detail step=0 split=train prompt_index=3 prompt=[1, 644, 374, 279, 1887] teacher_suffix=[2874, 369, 279, 31559, 315, 279, 8457, 11, 323, 279, 1887, 1815, 28469, 9363, 525, 279] student_suffix=[2874, 369, 279, 18003, 315, 279, 1156, 323, 2086, 2188, 315, 279, 1849, 315, 279, 8453] overlap_pct=31.250000 kl=1.330972498486e-2
eval_detail step=0 split=train prompt_index=4 prompt=[1, 3838, 374, 264, 2077, 13] teacher_suffix=[7521, 481, 362, 5714, 374, 264, 897, 429, 374, 5927, 553, 264, 729, 13, 1752, 3110] student_suffix=[3555, 374, 264, 5714, 30, 3555, 374, 264, 5714, 30, 3555, 374, 264, 5714, 30, 3555] overlap_pct=6.250000 kl=1.736336546338e-2
eval_detail step=0 split=train prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] teacher_suffix=[1, 374, 264, 4647, 429, 702, 1012, 1483, 311, 7512, 279, 1251, 315, 279, 7748, 1186] student_suffix=[1, 374, 264, 4647, 429, 19257, 311, 279, 1251, 315, 279, 5537, 315, 279, 7748, 21575] overlap_pct=56.250000 kl=2.734693989953e-2
eval_detail step=0 split=train prompt_index=6 prompt=[1, 3347, 11, 358, 1052, 429] teacher_suffix=[358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490] student_suffix=[358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490] overlap_pct=100.000000 kl=1.167650551419e-2
eval_detail step=0 split=train prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] teacher_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] student_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] overlap_pct=100.000000 kl=1.322513795018e-2
eval_detail step=0 split=heldout prompt_index=0 prompt=[1, 4438, 374, 279, 2768] teacher_suffix=[315, 279, 3364, 13, 576, 3364, 374, 911, 279, 11618, 315, 264, 3908, 3743, 6941, 444] student_suffix=[315, 279, 3491, 13, 576, 3491, 374, 311, 1477, 279, 897, 315, 279, 7493, 13, 576] overlap_pct=31.250000 kl=1.923349009451e-2
eval_detail step=0 split=heldout prompt_index=1 prompt=[1, 1516, 374, 264, 1296, 4339] teacher_suffix=[291, 504, 279, 1467, 11, 714, 279, 1467, 374, 537, 304, 6364, 13, 5209, 14683, 279] student_suffix=[291, 504, 279, 2701, 1467, 25, 4710, 785, 2701, 374, 264, 1140, 315, 279, 1429, 3213] overlap_pct=18.750000 kl=1.733289758898e-2
eval_detail step=0 split=heldout prompt_index=2 prompt=[1, 785, 1401, 315, 279, 1967] teacher_suffix=[43059, 21938, 315, 7148, 1, 374, 264, 11245, 2168, 429, 702, 1012, 13570, 14975, 304, 279] student_suffix=[43059, 21938, 315, 7148, 1, 374, 264, 11245, 37250, 304, 279, 6424, 13, 1084, 374, 264] overlap_pct=50.000000 kl=4.475668043414e-2
eval_detail step=0 split=heldout prompt_index=3 prompt=[1, 3198, 279, 1296, 25, 220] teacher_suffix=[16, 24, 24, 15, 82, 11, 220, 17, 15, 15, 15, 82, 11, 220, 17, 15] student_suffix=[16, 24, 24, 15, 82, 11, 220, 17, 15, 15, 15, 82, 11, 220, 17, 15] overlap_pct=100.000000 kl=5.589429209807e-3
eval_summary step=0 train_overlap_pct=74.218750 heldout_overlap_pct=50.000000 train_kl=1.433391042756e-2 heldout_kl=2.172812433186e-2 eval_seconds=8.714601
train_step step=1 prompt_index=0 prompt=[1, 872, 198, 3456] loss=1.788745430531e-5 rollout_len=12 step_seconds=0.260722
train_step step=10 prompt_index=1 prompt=[1, 198, 1512, 429] loss=4.504968092078e-5 rollout_len=12 step_seconds=0.294152
train_step step=20 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.947103687096e-5 rollout_len=13 step_seconds=0.295688
train_step step=30 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=5.406545460573e-5 rollout_len=14 step_seconds=0.298313
train_step step=40 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=4.347077629063e-5 rollout_len=15 step_seconds=0.298911
train_step step=50 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.922039104509e-5 rollout_len=12 step_seconds=0.294604
eval_detail step=50 split=train prompt_index=0 prompt=[1, 872, 198, 3456] teacher_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 442] student_suffix=[1688, 912, 368, 972, 262, 1077, 2629, 284, 2197, 4749, 492, 1242, 1823, 957, 463, 262] overlap_pct=12.500000 kl=1.312181524679e0
eval_detail step=50 split=train prompt_index=1 prompt=[1, 198, 1512, 429] teacher_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 2055, 11, 369] student_suffix=[374, 537, 264, 1372, 11, 714, 264, 914, 11, 323, 1221, 279, 914, 374, 537, 264] overlap_pct=12.500000 kl=3.124620383626e-1
eval_detail step=50 split=train prompt_index=2 prompt=[1, 770, 3186, 25, 220] teacher_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] student_suffix=[15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] overlap_pct=93.750000 kl=6.487561564144e-1
eval_detail step=50 split=train prompt_index=3 prompt=[1, 644, 374, 279, 1887] teacher_suffix=[2874, 369, 279, 31559, 315, 279, 8457, 11, 323, 279, 1887, 1815, 28469, 9363, 525, 279] student_suffix=[2530, 315, 4802, 369, 279, 1879, 11, 323, 279, 1887, 2530, 315, 4802, 369, 279, 1879] overlap_pct=0.000000 kl=5.495872400161e-1
eval_detail step=50 split=train prompt_index=4 prompt=[1, 3838, 374, 264, 2077, 13] teacher_suffix=[7521, 481, 362, 5714, 374, 264, 897, 429, 374, 5927, 553, 264, 729, 13, 1752, 3110] student_suffix=[3555, 374, 279, 1102, 315, 279, 1102, 315, 279, 1102, 315, 279, 1102, 315, 279, 1102] overlap_pct=0.000000 kl=5.005745857426e-1
eval_detail step=50 split=train prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] teacher_suffix=[1, 374, 264, 4647, 429, 702, 1012, 1483, 311, 7512, 279, 1251, 315, 279, 7748, 1186] student_suffix=[11, 279, 364, 82, 6, 374, 264, 364, 82, 6, 323, 364, 82, 6, 374, 264] overlap_pct=0.000000 kl=5.422453971716e-1
eval_detail step=50 split=train prompt_index=6 prompt=[1, 3347, 11, 358, 1052, 429] teacher_suffix=[358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490] student_suffix=[358, 646, 944, 1490, 432, 13, 358, 2776, 537, 2704, 421, 432, 594, 264, 1661, 4522] overlap_pct=43.750000 kl=1.743542863927e-1
eval_detail step=50 split=train prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] teacher_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] student_suffix=[477, 11, 323, 279, 62, 716, 374, 304, 279, 10984, 11, 323, 279, 62, 716, 374] overlap_pct=43.750000 kl=3.661390606559e-1
eval_detail step=50 split=heldout prompt_index=0 prompt=[1, 4438, 374, 279, 2768] teacher_suffix=[315, 279, 3364, 13, 576, 3364, 374, 911, 279, 11618, 315, 264, 3908, 3743, 6941, 444] student_suffix=[315, 279, 1459, 315, 279, 1459, 315, 279, 1459, 315, 279, 1459, 315, 279, 1459, 315] overlap_pct=12.500000 kl=4.457561841836e-1
eval_detail step=50 split=heldout prompt_index=1 prompt=[1, 1516, 374, 264, 1296, 4339] teacher_suffix=[291, 504, 279, 1467, 11, 714, 279, 1467, 374, 537, 304, 6364, 13, 5209, 14683, 279] student_suffix=[11, 714, 432, 374, 264, 1661, 3110, 315, 264, 1661, 3110, 315, 264, 1661, 3110, 315] overlap_pct=0.000000 kl=4.684752791382e-1
eval_detail step=50 split=heldout prompt_index=2 prompt=[1, 785, 1401, 315, 279, 1967] teacher_suffix=[43059, 21938, 315, 7148, 1, 374, 264, 11245, 2168, 429, 702, 1012, 13570, 14975, 304, 279] student_suffix=[4355, 374, 264, 4185, 3491, 11, 323, 279, 3491, 374, 311, 1477, 279, 7192, 1372, 315] overlap_pct=0.000000 kl=9.194811650382e-1
eval_detail step=50 split=heldout prompt_index=3 prompt=[1, 3198, 279, 1296, 25, 220] teacher_suffix=[16, 24, 24, 15, 82, 11, 220, 17, 15, 15, 15, 82, 11, 220, 17, 15] student_suffix=[16, 15, 15, 15, 15, 15, 15, 11, 220, 16, 15, 15, 15, 15, 15, 11] overlap_pct=18.750000 kl=2.103656874514e-1
eval_summary step=50 train_overlap_pct=25.781250 heldout_overlap_pct=7.812500 train_kl=5.507875361794e-1 heldout_kl=5.110195789528e-1 eval_seconds=7.170249
train_step step=60 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=4.199991963105e-5 rollout_len=13 step_seconds=0.295828
train_step step=70 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=4.256968895788e-5 rollout_len=14 step_seconds=0.297910
train_step step=80 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=3.740760439541e-5 rollout_len=15 step_seconds=0.298627
train_step step=90 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.702204776346e-5 rollout_len=12 step_seconds=0.295139
train_step step=100 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.974307401222e-5 rollout_len=13 step_seconds=0.297594
eval_detail step=100 split=train prompt_index=0 prompt=[1, 872, 198, 3456] teacher_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 442] student_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 584] overlap_pct=93.750000 kl=3.009291620410e-1
eval_detail step=100 split=train prompt_index=1 prompt=[1, 198, 1512, 429] teacher_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 2055, 11, 369] student_suffix=[374, 11, 279, 1372, 315, 3039, 279, 6524, 7952, 304, 279, 914, 624, 785, 1372, 315] overlap_pct=62.500000 kl=1.347613624530e-1
eval_detail step=100 split=train prompt_index=2 prompt=[1, 770, 3186, 25, 220] teacher_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] student_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] overlap_pct=100.000000 kl=2.506804537519e-1
eval_detail step=100 split=train prompt_index=3 prompt=[1, 644, 374, 279, 1887] teacher_suffix=[2874, 369, 279, 31559, 315, 279, 8457, 11, 323, 279, 1887, 1815, 28469, 9363, 525, 279] student_suffix=[2874, 369, 279, 18003, 315, 279, 1156, 323, 2086, 9471, 315, 279, 8453, 1251, 1189, 4710] overlap_pct=31.250000 kl=2.233031076371e-1
eval_detail step=100 split=train prompt_index=4 prompt=[1, 3838, 374, 264, 2077, 13] teacher_suffix=[7521, 481, 362, 5714, 374, 264, 897, 429, 374, 5927, 553, 264, 729, 13, 1752, 3110] student_suffix=[16, 15, 15, 15, 7521, 323, 1246, 374, 432, 1483, 304, 279, 2266, 315, 279, 1809] overlap_pct=0.000000 kl=3.950790959821e-1
eval_detail step=100 split=train prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] teacher_suffix=[1, 374, 264, 4647, 429, 702, 1012, 1483, 311, 7512, 279, 1251, 315, 279, 7748, 1186] student_suffix=[1, 323, 330, 1782, 364, 82, 6, 374, 264, 6, 304, 279, 364, 82, 6, 374] overlap_pct=6.250000 kl=4.060047137432e-1
eval_detail step=100 split=train prompt_index=6 prompt=[1, 3347, 11, 358, 1052, 429] teacher_suffix=[358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490] student_suffix=[358, 646, 944, 1490, 432, 11, 714, 358, 646, 2666, 432, 13, 358, 646, 2666, 432] overlap_pct=56.250000 kl=6.846393646911e-2
eval_detail step=100 split=train prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] teacher_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] student_suffix=[477, 11, 323, 279, 62, 716, 374, 304, 279, 62, 716, 1154, 323, 279, 62, 716] overlap_pct=50.000000 kl=1.931078198278e-1
eval_detail step=100 split=heldout prompt_index=0 prompt=[1, 4438, 374, 279, 2768] teacher_suffix=[315, 279, 3364, 13, 576, 3364, 374, 911, 279, 11618, 315, 264, 3908, 3743, 6941, 444] student_suffix=[315, 279, 1459, 315, 279, 1459, 315, 279, 1459, 315, 279, 1459, 315, 279, 1459, 315] overlap_pct=12.500000 kl=4.821530228852e-1
eval_detail step=100 split=heldout prompt_index=1 prompt=[1, 1516, 374, 264, 1296, 4339] teacher_suffix=[291, 504, 279, 1467, 11, 714, 279, 1467, 374, 537, 304, 6364, 13, 5209, 14683, 279] student_suffix=[11, 714, 279, 829, 374, 264, 829, 11, 323, 279, 829, 374, 264, 829, 11, 323] overlap_pct=6.250000 kl=4.285622142685e-1
eval_detail step=100 split=heldout prompt_index=2 prompt=[1, 785, 1401, 315, 279, 1967] teacher_suffix=[43059, 21938, 315, 7148, 1, 374, 264, 11245, 2168, 429, 702, 1012, 13570, 14975, 304, 279] student_suffix=[83145, 3491, 374, 264, 3491, 429, 7460, 279, 990, 315, 279, 1967, 83145, 594, 821, 5944] overlap_pct=0.000000 kl=8.412190856824e-1
eval_detail step=100 split=heldout prompt_index=3 prompt=[1, 3198, 279, 1296, 25, 220] teacher_suffix=[16, 24, 24, 15, 82, 11, 220, 17, 15, 15, 15, 82, 11, 220, 17, 15] student_suffix=[16, 24, 24, 15, 11, 220, 16, 24, 24, 16, 11, 220, 16, 24, 24, 17] overlap_pct=25.000000 kl=1.906344156364e-1
eval_summary step=100 train_overlap_pct=50.000000 heldout_overlap_pct=10.937500 train_kl=2.465412064881e-1 heldout_kl=4.856421846181e-1 eval_seconds=7.296999
train_step step=110 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.702289177454e-5 rollout_len=14 step_seconds=0.299213
train_step step=120 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=4.031626667711e-5 rollout_len=15 step_seconds=0.300126
train_step step=130 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.839375858661e-5 rollout_len=12 step_seconds=0.296117
train_step step=140 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.981669578934e-5 rollout_len=13 step_seconds=0.297787
train_step step=150 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.533907511155e-5 rollout_len=14 step_seconds=0.299527
train_step step=160 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=4.169817839283e-5 rollout_len=15 step_seconds=0.298606
train_step step=170 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.830777859548e-5 rollout_len=12 step_seconds=0.293518
train_step step=180 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.967071097577e-5 rollout_len=13 step_seconds=0.295250
train_step step=190 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.506124630803e-5 rollout_len=14 step_seconds=0.297828
train_step step=200 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=4.003396679764e-5 rollout_len=15 step_seconds=0.296807
eval_detail step=200 split=train prompt_index=0 prompt=[1, 872, 198, 3456] teacher_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 442] student_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 584] overlap_pct=93.750000 kl=2.338035372802e-1
eval_detail step=200 split=train prompt_index=1 prompt=[1, 198, 1512, 429] teacher_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 2055, 11, 369] student_suffix=[374, 11, 279, 1372, 315, 3039, 279, 6524, 7952, 304, 279, 914, 11, 323, 279, 1372] overlap_pct=62.500000 kl=9.013038090703e-2
eval_detail step=200 split=train prompt_index=2 prompt=[1, 770, 3186, 25, 220] teacher_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] student_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] overlap_pct=100.000000 kl=1.553399640861e-1
eval_detail step=200 split=train prompt_index=3 prompt=[1, 644, 374, 279, 1887] teacher_suffix=[2874, 369, 279, 31559, 315, 279, 8457, 11, 323, 279, 1887, 1815, 28469, 9363, 525, 279] student_suffix=[2874, 369, 279, 18003, 315, 279, 1156, 323, 2086, 1378, 315, 279, 1156, 1378, 315, 279] overlap_pct=37.500000 kl=2.203262841614e-1
eval_detail step=200 split=train prompt_index=4 prompt=[1, 3838, 374, 264, 2077, 13] teacher_suffix=[7521, 481, 362, 5714, 374, 264, 897, 429, 374, 5927, 553, 264, 729, 13, 1752, 3110] student_suffix=[7521, 481, 3555, 374, 279, 1102, 315, 279, 1102, 30, 481, 3555, 374, 279, 1102, 315] overlap_pct=12.500000 kl=3.373474094324e-1
eval_detail step=200 split=train prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] teacher_suffix=[1, 374, 264, 4647, 429, 702, 1012, 1483, 311, 7512, 279, 1251, 315, 279, 7748, 1186] student_suffix=[1, 374, 264, 4647, 429, 702, 1012, 1483, 311, 8300, 311, 279, 1251, 315, 279, 3639] overlap_pct=56.250000 kl=1.955610855177e-1
eval_detail step=200 split=train prompt_index=6 prompt=[1, 3347, 11, 358, 1052, 429] teacher_suffix=[358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490] student_suffix=[358, 646, 944, 1490, 432, 11, 714, 358, 646, 1490, 432, 304, 279, 6319, 13, 358] overlap_pct=43.750000 kl=7.141791930940e-2
eval_detail step=200 split=train prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] teacher_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] student_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] overlap_pct=100.000000 kl=3.388893063545e-2
eval_detail step=200 split=heldout prompt_index=0 prompt=[1, 4438, 374, 279, 2768] teacher_suffix=[315, 279, 3364, 13, 576, 3364, 374, 911, 279, 11618, 315, 264, 3908, 3743, 6941, 444] student_suffix=[315, 279, 1459, 315, 279, 1459, 11, 323, 279, 1459, 315, 279, 1459, 374, 279, 1459] overlap_pct=25.000000 kl=4.384768492739e-1
eval_detail step=200 split=heldout prompt_index=1 prompt=[1, 1516, 374, 264, 1296, 4339] teacher_suffix=[291, 504, 279, 1467, 11, 714, 279, 1467, 374, 537, 304, 6364, 13, 5209, 14683, 279] student_suffix=[11, 714, 432, 594, 537, 264, 1661, 3110, 369, 279, 7428, 315, 279, 7428, 315, 279] overlap_pct=6.250000 kl=3.973959760713e-1
eval_detail step=200 split=heldout prompt_index=2 prompt=[1, 785, 1401, 315, 279, 1967] teacher_suffix=[43059, 21938, 315, 7148, 1, 374, 264, 11245, 2168, 429, 702, 1012, 13570, 14975, 304, 279] student_suffix=[83145, 3491, 374, 264, 11416, 3110, 315, 264, 3491, 429, 374, 537, 264, 3491, 304, 279] overlap_pct=18.750000 kl=7.645226334082e-1
eval_detail step=200 split=heldout prompt_index=3 prompt=[1, 3198, 279, 1296, 25, 220] teacher_suffix=[16, 24, 24, 15, 82, 11, 220, 17, 15, 15, 15, 82, 11, 220, 17, 15] student_suffix=[16, 24, 24, 15, 11, 220, 17, 15, 15, 15, 11, 220, 17, 15, 16, 15] overlap_pct=43.750000 kl=1.382999618838e-1
eval_summary step=200 train_overlap_pct=63.281250 heldout_overlap_pct=23.437500 train_kl=1.672269389162e-1 heldout_kl=4.346738551593e-1 eval_seconds=7.215613
train_step step=210 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.812159775407e-5 rollout_len=12 step_seconds=0.295646
train_step step=220 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.940023452742e-5 rollout_len=13 step_seconds=0.296980
train_step step=230 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.494673364912e-5 rollout_len=14 step_seconds=0.298614
train_step step=240 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=4.000768967671e-5 rollout_len=15 step_seconds=0.299357
train_step step=250 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.841350189759e-5 rollout_len=12 step_seconds=0.297017
train_step step=260 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.969688259531e-5 rollout_len=13 step_seconds=0.295664
train_step step=270 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.489654409350e-5 rollout_len=14 step_seconds=0.298073
train_step step=280 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=3.998415195383e-5 rollout_len=15 step_seconds=0.299708
train_step step=290 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.810935595538e-5 rollout_len=12 step_seconds=0.296958
train_step step=300 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.963003109675e-5 rollout_len=13 step_seconds=0.297758
train_step step=310 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.553853457561e-5 rollout_len=14 step_seconds=0.299133
train_step step=320 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=4.006045128335e-5 rollout_len=15 step_seconds=0.299596
train_step step=330 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.814359661192e-5 rollout_len=12 step_seconds=0.294669
train_step step=340 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.937714427593e-5 rollout_len=13 step_seconds=0.297192
train_step step=350 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.552846465027e-5 rollout_len=14 step_seconds=0.298686
train_step step=360 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=4.101622835151e-5 rollout_len=15 step_seconds=0.301240
train_step step=370 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.807572284131e-5 rollout_len=12 step_seconds=0.296185
train_step step=380 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.934770938940e-5 rollout_len=13 step_seconds=0.297590
train_step step=390 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.546105654095e-5 rollout_len=14 step_seconds=0.298902
train_step step=400 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=4.004336005892e-5 rollout_len=15 step_seconds=0.299305
train_step step=410 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.809920235653e-5 rollout_len=12 step_seconds=0.295005
train_step step=420 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=4.111593443668e-5 rollout_len=13 step_seconds=0.296481
train_step step=430 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.538697637850e-5 rollout_len=14 step_seconds=0.298012
train_step step=440 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=3.989971446572e-5 rollout_len=15 step_seconds=0.298758
train_step step=450 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.803511208389e-5 rollout_len=12 step_seconds=0.295262
train_step step=460 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.931111132260e-5 rollout_len=13 step_seconds=0.296699
train_step step=470 prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] loss=3.481464591459e-5 rollout_len=14 step_seconds=0.298992
train_step step=480 prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] loss=3.987085801782e-5 rollout_len=15 step_seconds=0.299462
train_step step=490 prompt_index=1 prompt=[1, 198, 1512, 429] loss=3.801980710705e-5 rollout_len=12 step_seconds=0.295602
train_step step=500 prompt_index=3 prompt=[1, 644, 374, 279, 1887] loss=3.929456215701e-5 rollout_len=13 step_seconds=0.296287
eval_detail step=500 split=train prompt_index=0 prompt=[1, 872, 198, 3456] teacher_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 442] student_suffix=[888, 536, 4697, 972, 262, 584, 1099, 737, 1887, 2242, 1294, 2827, 8, 972, 286, 584] overlap_pct=93.750000 kl=1.475877968430e-1
eval_detail step=500 split=train prompt_index=1 prompt=[1, 198, 1512, 429] teacher_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 2055, 11, 369] student_suffix=[374, 11, 279, 1372, 315, 3039, 279, 1372, 7952, 304, 279, 1140, 13, 1752, 3110, 11] overlap_pct=81.250000 kl=7.641159883251e-2
eval_detail step=500 split=train prompt_index=2 prompt=[1, 770, 3186, 25, 220] teacher_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] student_suffix=[16, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15] overlap_pct=100.000000 kl=3.469249661232e-2
eval_detail step=500 split=train prompt_index=3 prompt=[1, 644, 374, 279, 1887] teacher_suffix=[2874, 369, 279, 31559, 315, 279, 8457, 11, 323, 279, 1887, 1815, 28469, 9363, 525, 279] student_suffix=[2874, 369, 279, 18003, 315, 279, 1156, 323, 2086, 6193, 315, 279, 9237, 594, 39612, 11] overlap_pct=31.250000 kl=1.425857983542e-1
eval_detail step=500 split=train prompt_index=4 prompt=[1, 3838, 374, 264, 2077, 13] teacher_suffix=[7521, 481, 362, 5714, 374, 264, 897, 429, 374, 5927, 553, 264, 729, 13, 1752, 3110] student_suffix=[16, 35, 1334, 304, 49347, 11974, 1, 2077, 1, 374, 264, 4647, 429, 19257, 311, 279] overlap_pct=0.000000 kl=1.110910774507e-1
eval_detail step=500 split=train prompt_index=5 prompt=[1, 785, 594, 287, 374, 1690] teacher_suffix=[1, 374, 264, 4647, 429, 702, 1012, 1483, 311, 7512, 279, 1251, 315, 279, 7748, 1186] student_suffix=[1, 374, 264, 4647, 429, 702, 1012, 1483, 311, 7512, 279, 1251, 315, 279, 3639, 4180] overlap_pct=87.500000 kl=1.181251987444e-1
eval_detail step=500 split=train prompt_index=6 prompt=[1, 3347, 11, 358, 1052, 429] teacher_suffix=[358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490, 432, 13, 358, 646, 944, 1490] student_suffix=[358, 646, 1490, 11, 323, 358, 646, 1490, 429, 358, 646, 1490, 11, 323, 358, 646] overlap_pct=12.500000 kl=5.965587315569e-2
eval_detail step=500 split=train prompt_index=7 prompt=[1, 2610, 527, 1139, 304, 279, 1670] teacher_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] student_suffix=[477, 11, 323, 279, 62, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716, 716] overlap_pct=100.000000 kl=1.568514685654e-2
eval_detail step=500 split=heldout prompt_index=0 prompt=[1, 4438, 374, 279, 2768] teacher_suffix=[315, 279, 3364, 13, 576, 3364, 374, 911, 279, 11618, 315, 264, 3908, 3743, 6941, 444] student_suffix=[315, 279, 3491, 11, 323, 279, 3491, 374, 279, 1459, 315, 279, 3491, 13, 576, 3491] overlap_pct=25.000000 kl=2.747697499430e-1
eval_detail step=500 split=heldout prompt_index=1 prompt=[1, 1516, 374, 264, 1296, 4339] teacher_suffix=[291, 504, 279, 1467, 11, 714, 279, 1467, 374, 537, 304, 6364, 13, 5209, 14683, 279] student_suffix=[11, 323, 279, 1008, 374, 264, 3175, 11, 323, 279, 1008, 374, 264, 3175, 11, 323] overlap_pct=6.250000 kl=3.355334513450e-1
eval_detail step=500 split=heldout prompt_index=2 prompt=[1, 785, 1401, 315, 279, 1967] teacher_suffix=[43059, 21938, 315, 7148, 1, 374, 264, 11245, 2168, 429, 702, 1012, 13570, 14975, 304, 279] student_suffix=[65, 28284, 11, 279, 1401, 315, 279, 1967, 65, 28284, 11, 279, 1401, 315, 279, 1967] overlap_pct=0.000000 kl=6.501620864422e-1
eval_detail step=500 split=heldout prompt_index=3 prompt=[1, 3198, 279, 1296, 25, 220] teacher_suffix=[16, 24, 24, 15, 82, 11, 220, 17, 15, 15, 15, 82, 11, 220, 17, 15] student_suffix=[16, 24, 24, 15, 11, 220, 16, 24, 24, 16, 11, 220, 16, 24, 24, 17] overlap_pct=25.000000 kl=1.221730298555e-1
eval_summary step=500 train_overlap_pct=63.281250 heldout_overlap_pct=14.062500 train_kl=8.822937335617e-2 heldout_kl=3.456595793964e-1 eval_seconds=7.272355
training_summary total_steps=500 total_wall_seconds=177.526524 mean_step_seconds=0.297141 median_step_seconds=0.297430 mean_sampled_loss=3.496435148918e-5 first_sampled_loss=1.788745430531e-5 step200_sampled_loss=4.003396679764e-5 final_sampled_loss=3.929456215701e-5 sampled_loss_reduction_200_pct=-123.810309 sampled_loss_reduction_final_pct=-119.676660 train_kl_reduction_200_pct=-1066.652602 train_kl_reduction_final_pct=-515.528985
summary_eval_row step=0 train_overlap_pct=74.218750 heldout_overlap_pct=50.000000 train_kl=1.433391042756e-2 heldout_kl=2.172812433186e-2
summary_eval_row step=50 train_overlap_pct=25.781250 heldout_overlap_pct=7.812500 train_kl=5.507875361794e-1 heldout_kl=5.110195789528e-1
summary_eval_row step=100 train_overlap_pct=50.000000 heldout_overlap_pct=10.937500 train_kl=2.465412064881e-1 heldout_kl=4.856421846181e-1
summary_eval_row step=200 train_overlap_pct=63.281250 heldout_overlap_pct=23.437500 train_kl=1.672269389162e-1 heldout_kl=4.346738551593e-1
summary_eval_row step=500 train_overlap_pct=63.281250 heldout_overlap_pct=14.062500 train_kl=8.822937335617e-2 heldout_kl=3.456595793964e-1
