config backend=cuda teacher_model=/home/ckl/.cache/modelscope/hub/Qwen/Qwen3___5-4B student_model=/home/ckl/.cache/modelscope/hub/Qwen/Qwen3___5-0___8B-Base student_mode=lora lora_rank=16 lora_alpha=32.000000 lora_target_set=attention-qv steps=200 rollout_len=8 lr=9.999999747e-6 grad_clip=1 prompt_source=jsonl:examples/opd/sample-prompts.jsonl rows=20 tokenizer=/home/ckl/.cache/modelscope/hub/Qwen/Qwen3___5-0___8B-Base/tokenizer.json truncated_rows=1 train_prompt_count=16 heldout_prompt_count=4 eval_steps=[0, 50, 100, 200] cuda_graph=true
prompt split=train index=0 ids=[814, 20139, 3069, 2526, 3992, 3983, 8495, 494, 383, 64179, 1527, 63853, 13]
prompt split=train index=1 ids=[8917, 5437, 537, 279, 6463, 1881, 4487, 65427, 321, 9637, 65427, 13]
prompt split=train index=2 ids=[7734, 264, 61446, 50802, 364, 26805, 264, 52965, 4706, 2923, 13]
prompt split=train index=3 ids=[3710, 3520, 264, 27502, 1067, 54067, 30]
prompt split=train index=4 ids=[72240, 1204, 264, 19088, 10756, 628, 8214, 264, 17671, 71698, 5286, 1558, 13]
prompt split=train index=5 ids=[33963, 264, 2716, 3010, 314, 264, 9640, 421, 6813, 33633, 31626, 13]
prompt split=train index=6 ids=[826, 2250, 11590, 421, 264, 4706, 1542, 369, 35140, 3072, 13]
prompt split=train index=7 ids=[814, 20139, 3817, 11258, 35887, 303, 799, 13901, 13]
prompt split=train index=8 ids=[4199, 1220, 449, 22839, 9973, 3315, 310, 2426, 466, 5332, 449, 24460, 30]
prompt split=train index=9 ids=[7734, 264, 16338, 5020, 883, 21966, 4779, 22627, 16070, 13]
prompt split=train index=10 ids=[9930, 369, 4581, 33686, 26370, 6807, 2166, 47590, 430, 449, 5428, 17723, 30]
prompt split=train index=11 ids=[26487, 54102, 45543, 440, 47241, 6299, 11336, 364, 1527, 63853, 13]
prompt split=train index=12 ids=[3710, 369, 279, 3364, 314, 5467, 9502, 49219, 303, 449, 12753, 35, 9064, 30]
prompt split=train index=13 ids=[814, 20139, 3069, 17515, 11241, 4766, 303, 4906, 944, 13]
prompt split=train index=14 ids=[33963, 264, 799, 1284, 17834, 7044, 314, 15135, 19441, 45597, 4706, 13]
prompt split=train index=15 ids=[3710, 1220, 264, 1156, 1716, 1518, 4162, 264, 1167, 16451, 44424, 27502, 30]
prompt split=heldout index=0 ids=[72240, 264, 5902, 1581, 6618, 4238, 364, 264, 3043, 15464, 2680, 5286, 13]
prompt split=heldout index=1 ids=[9930, 628, 264, 4557, 15196, 7042, 1345, 4581, 26370, 25710, 9766, 30]
prompt split=heldout index=2 ids=[7734, 264, 2716, 9663, 883, 5132, 49219, 1973, 449, 6326, 6696, 13]
prompt split=heldout index=3 ids=[814, 20139, 3069, 264, 1865, 1414, 9640, 716, 369, 2577, 1056, 1375, 2195, 17952, 3817, 27437]
model_summary student_hidden=1024 student_layers=24 student_vocab=248320 student_model_elements=769809216 student_trainable_elements=638976 student_load_seconds=8.767269 infer_load_seconds=11.458700
eval_summary step=0 train_kl=1.509985423809e-5 heldout_kl=1.738248056427e-5 eval_seconds=9.457684
train_step step=1 prompt_index=0 loss=1.676253305050e-5 rollout_len=21 step_seconds=5.712468
phase_summary step=1 total=5.712467 student_rollout=2.353967 infer_forward_token_logits=0.005273 infer_sync=0.385938 d2d_bridge_import=0.000599 teacher_forward_total=0.391814 student_forward=0.349995 kl_loss=0.014937 optimizer_zero_grad=0.000001 backward=2.587419 grad_clip=0.000528 optimizer_step=0.000933 post_step_cleanup=0.012859 teacher_seq_len=21 teacher_vocab=248320
train_step step=2 prompt_index=1 loss=1.479463298892e-5 rollout_len=20 step_seconds=6.485865
phase_summary step=2 total=6.485864 student_rollout=3.262151 infer_forward_token_logits=0.005336 infer_sync=0.454262 d2d_bridge_import=0.000548 teacher_forward_total=0.460152 student_forward=0.328396 kl_loss=0.009540 optimizer_zero_grad=0.000022 backward=2.411315 grad_clip=0.000537 optimizer_step=0.000830 post_step_cleanup=0.012820 teacher_seq_len=20 teacher_vocab=248320
train_step step=3 prompt_index=2 loss=1.750581213855e-5 rollout_len=19 step_seconds=6.185905
phase_summary step=3 total=6.185905 student_rollout=3.099736 infer_forward_token_logits=0.005245 infer_sync=0.511421 d2d_bridge_import=0.000598 teacher_forward_total=0.517269 student_forward=0.315376 kl_loss=0.009218 optimizer_zero_grad=0.000027 backward=2.230204 grad_clip=0.000529 optimizer_step=0.000471 post_step_cleanup=0.013000 teacher_seq_len=19 teacher_vocab=248320
train_step step=4 prompt_index=3 loss=1.104052716983e-5 rollout_len=15 step_seconds=4.049312
phase_summary step=4 total=4.049312 student_rollout=1.642619 infer_forward_token_logits=0.005177 infer_sync=0.382719 d2d_bridge_import=0.000081 teacher_forward_total=0.387982 student_forward=0.249272 kl_loss=0.007626 optimizer_zero_grad=0.000026 backward=1.747875 grad_clip=0.000534 optimizer_step=0.000487 post_step_cleanup=0.012816 teacher_seq_len=15 teacher_vocab=248320
train_step step=5 prompt_index=4 loss=1.522783895780e-5 rollout_len=21 step_seconds=5.798426
phase_summary step=5 total=5.798426 student_rollout=2.419261 infer_forward_token_logits=0.005305 infer_sync=0.466046 d2d_bridge_import=0.000598 teacher_forward_total=0.471955 student_forward=0.349604 kl_loss=0.009901 optimizer_zero_grad=0.000025 backward=2.533807 grad_clip=0.000535 optimizer_step=0.000481 post_step_cleanup=0.012784 teacher_seq_len=21 teacher_vocab=248320
train_step step=6 prompt_index=5 loss=1.334762237093e-5 rollout_len=20 step_seconds=5.644397
phase_summary step=6 total=5.644396 student_rollout=2.346909 infer_forward_token_logits=0.005302 infer_sync=0.492456 d2d_bridge_import=0.000595 teacher_forward_total=0.498359 student_forward=0.332891 kl_loss=0.009862 optimizer_zero_grad=0.000024 backward=2.441574 grad_clip=0.000539 optimizer_step=0.000863 post_step_cleanup=0.013302 teacher_seq_len=20 teacher_vocab=248320
train_step step=7 prompt_index=6 loss=1.915741631819e-5 rollout_len=19 step_seconds=6.124815
phase_summary step=7 total=6.124814 student_rollout=3.133428 infer_forward_token_logits=0.005301 infer_sync=0.397140 d2d_bridge_import=0.000607 teacher_forward_total=0.403052 student_forward=0.314460 kl_loss=0.009146 optimizer_zero_grad=0.000025 backward=2.250788 grad_clip=0.000531 optimizer_step=0.000470 post_step_cleanup=0.012837 teacher_seq_len=19 teacher_vocab=248320
train_step step=8 prompt_index=7 loss=1.404779504810e-5 rollout_len=17 step_seconds=4.588639
phase_summary step=8 total=4.588638 student_rollout=1.931656 infer_forward_token_logits=0.005246 infer_sync=0.356094 d2d_bridge_import=0.000590 teacher_forward_total=0.361934 student_forward=0.285021 kl_loss=0.008520 optimizer_zero_grad=0.000022 backward=1.987234 grad_clip=0.000540 optimizer_step=0.000530 post_step_cleanup=0.013109 teacher_seq_len=17 teacher_vocab=248320
train_step step=9 prompt_index=8 loss=1.254184280697e-5 rollout_len=21 step_seconds=5.744101
phase_summary step=9 total=5.744100 student_rollout=2.439322 infer_forward_token_logits=0.005251 infer_sync=0.482790 d2d_bridge_import=0.000079 teacher_forward_total=0.488125 student_forward=0.347001 kl_loss=0.009882 optimizer_zero_grad=0.000023 backward=2.445936 grad_clip=0.000527 optimizer_step=0.000477 post_step_cleanup=0.012734 teacher_seq_len=21 teacher_vocab=248320
train_step step=10 prompt_index=9 loss=1.900326788018e-5 rollout_len=18 step_seconds=4.955619
phase_summary step=10 total=4.955619 student_rollout=2.075611 infer_forward_token_logits=0.005324 infer_sync=0.440048 d2d_bridge_import=0.000591 teacher_forward_total=0.445968 student_forward=0.303898 kl_loss=0.008863 optimizer_zero_grad=0.000023 backward=2.107493 grad_clip=0.000530 optimizer_step=0.000475 post_step_cleanup=0.012683 teacher_seq_len=18 teacher_vocab=248320
train_step step=11 prompt_index=10 loss=1.861246346380e-5 rollout_len=21 step_seconds=5.748860
phase_summary step=11 total=5.748860 student_rollout=2.426783 infer_forward_token_logits=0.005563 infer_sync=0.425096 d2d_bridge_import=0.000615 teacher_forward_total=0.431280 student_forward=0.346769 kl_loss=0.009904 optimizer_zero_grad=0.000025 backward=2.519716 grad_clip=0.000560 optimizer_step=0.000485 post_step_cleanup=0.013257 teacher_seq_len=21 teacher_vocab=248320
train_step step=12 prompt_index=11 loss=1.654323750699e-5 rollout_len=19 step_seconds=5.328028
phase_summary step=12 total=5.328028 student_rollout=2.178875 infer_forward_token_logits=0.005363 infer_sync=0.448957 d2d_bridge_import=0.000588 teacher_forward_total=0.454914 student_forward=0.316798 kl_loss=0.009318 optimizer_zero_grad=0.000026 backward=2.354077 grad_clip=0.000525 optimizer_step=0.000483 post_step_cleanup=0.012935 teacher_seq_len=19 teacher_vocab=248320
train_step step=13 prompt_index=12 loss=1.975796294573e-5 rollout_len=22 step_seconds=6.031106
phase_summary step=13 total=6.031105 student_rollout=2.632358 infer_forward_token_logits=0.005284 infer_sync=0.516033 d2d_bridge_import=0.000600 teacher_forward_total=0.521922 student_forward=0.360934 kl_loss=0.010430 optimizer_zero_grad=0.000023 backward=2.491532 grad_clip=0.000528 optimizer_step=0.000490 post_step_cleanup=0.012809 teacher_seq_len=22 teacher_vocab=248320
train_step step=14 prompt_index=13 loss=1.610022627574e-5 rollout_len=18 step_seconds=5.489315
phase_summary step=14 total=5.489314 student_rollout=2.631027 infer_forward_token_logits=0.005281 infer_sync=0.522272 d2d_bridge_import=0.000550 teacher_forward_total=0.528108 student_forward=0.295806 kl_loss=0.008731 optimizer_zero_grad=0.000024 backward=2.011803 grad_clip=0.000540 optimizer_step=0.000473 post_step_cleanup=0.012726 teacher_seq_len=18 teacher_vocab=248320
train_step step=15 prompt_index=14 loss=1.248080934602e-5 rollout_len=20 step_seconds=5.497994
phase_summary step=15 total=5.497993 student_rollout=2.260736 infer_forward_token_logits=0.005261 infer_sync=0.484082 d2d_bridge_import=0.000594 teacher_forward_total=0.489942 student_forward=0.328678 kl_loss=0.009497 optimizer_zero_grad=0.000023 backward=2.395311 grad_clip=0.000525 optimizer_step=0.000499 post_step_cleanup=0.012707 teacher_seq_len=20 teacher_vocab=248320
train_step step=16 prompt_index=15 loss=1.337045159744e-5 rollout_len=21 step_seconds=5.677387
phase_summary step=16 total=5.677386 student_rollout=2.480996 infer_forward_token_logits=0.005371 infer_sync=0.465210 d2d_bridge_import=0.000089 teacher_forward_total=0.470673 student_forward=0.345937 kl_loss=0.009937 optimizer_zero_grad=0.000021 backward=2.356072 grad_clip=0.000525 optimizer_step=0.000470 post_step_cleanup=0.012682 teacher_seq_len=21 teacher_vocab=248320
train_step step=17 prompt_index=0 loss=1.666684693191e-5 rollout_len=21 step_seconds=5.729044
phase_summary step=17 total=5.729043 student_rollout=2.453229 infer_forward_token_logits=0.005296 infer_sync=0.422558 d2d_bridge_import=0.000043 teacher_forward_total=0.427901 student_forward=0.348455 kl_loss=0.009872 optimizer_zero_grad=0.000025 backward=2.475781 grad_clip=0.000533 optimizer_step=0.000475 post_step_cleanup=0.012699 teacher_seq_len=21 teacher_vocab=248320
train_step step=18 prompt_index=1 loss=1.471898303862e-5 rollout_len=20 step_seconds=5.355425
phase_summary step=18 total=5.355424 student_rollout=2.313326 infer_forward_token_logits=0.005373 infer_sync=0.411526 d2d_bridge_import=0.000596 teacher_forward_total=0.417500 student_forward=0.330500 kl_loss=0.009531 optimizer_zero_grad=0.000026 backward=2.270110 grad_clip=0.000525 optimizer_step=0.000831 post_step_cleanup=0.012994 teacher_seq_len=20 teacher_vocab=248320
train_step step=19 prompt_index=2 loss=1.745817280607e-5 rollout_len=19 step_seconds=5.928206
phase_summary step=19 total=5.928206 student_rollout=2.814855 infer_forward_token_logits=0.005233 infer_sync=0.430396 d2d_bridge_import=0.000066 teacher_forward_total=0.435699 student_forward=0.315102 kl_loss=0.009467 optimizer_zero_grad=0.000025 backward=2.336923 grad_clip=0.000625 optimizer_step=0.000564 post_step_cleanup=0.014857 teacher_seq_len=19 teacher_vocab=248320
train_step step=20 prompt_index=3 loss=1.102107489714e-5 rollout_len=15 step_seconds=4.136498
phase_summary step=20 total=4.136497 student_rollout=1.652134 infer_forward_token_logits=0.005201 infer_sync=0.314716 d2d_bridge_import=0.000612 teacher_forward_total=0.320532 student_forward=0.250401 kl_loss=0.007578 optimizer_zero_grad=0.000021 backward=1.891876 grad_clip=0.000517 optimizer_step=0.000488 post_step_cleanup=0.012877 teacher_seq_len=15 teacher_vocab=248320
train_step step=21 prompt_index=4 loss=1.518713634141e-5 rollout_len=21 step_seconds=5.631103
phase_summary step=21 total=5.631103 student_rollout=2.424410 infer_forward_token_logits=0.005325 infer_sync=0.469184 d2d_bridge_import=0.000550 teacher_forward_total=0.475063 student_forward=0.348272 kl_loss=0.009777 optimizer_zero_grad=0.000025 backward=2.359431 grad_clip=0.000533 optimizer_step=0.000855 post_step_cleanup=0.012652 teacher_seq_len=21 teacher_vocab=248320
train_step step=22 prompt_index=5 loss=1.331798648607e-5 rollout_len=20 step_seconds=6.527461
phase_summary step=22 total=6.527460 student_rollout=3.165178 infer_forward_token_logits=0.005366 infer_sync=0.558828 d2d_bridge_import=0.000550 teacher_forward_total=0.564748 student_forward=0.328435 kl_loss=0.009545 optimizer_zero_grad=0.000026 backward=2.445678 grad_clip=0.000532 optimizer_step=0.000482 post_step_cleanup=0.012754 teacher_seq_len=20 teacher_vocab=248320
train_step step=23 prompt_index=6 loss=1.906713259814e-5 rollout_len=19 step_seconds=5.162040
phase_summary step=23 total=5.162039 student_rollout=2.207565 infer_forward_token_logits=0.005285 infer_sync=0.463254 d2d_bridge_import=0.000610 teacher_forward_total=0.469153 student_forward=0.314595 kl_loss=0.009117 optimizer_zero_grad=0.000025 backward=2.147457 grad_clip=0.000529 optimizer_step=0.000473 post_step_cleanup=0.013037 teacher_seq_len=19 teacher_vocab=248320
train_step step=24 prompt_index=7 loss=1.397846426698e-5 rollout_len=17 step_seconds=4.567378
phase_summary step=24 total=4.567378 student_rollout=1.907956 infer_forward_token_logits=0.005333 infer_sync=0.397987 d2d_bridge_import=0.000067 teacher_forward_total=0.403391 student_forward=0.281993 kl_loss=0.008278 optimizer_zero_grad=0.000024 backward=1.951962 grad_clip=0.000539 optimizer_step=0.000479 post_step_cleanup=0.012664 teacher_seq_len=17 teacher_vocab=248320
train_step step=25 prompt_index=8 loss=1.253232130694e-5 rollout_len=21 step_seconds=5.664547
phase_summary step=25 total=5.664546 student_rollout=2.435704 infer_forward_token_logits=0.005368 infer_sync=0.510308 d2d_bridge_import=0.000589 teacher_forward_total=0.516269 student_forward=0.347880 kl_loss=0.009785 optimizer_zero_grad=0.000022 backward=2.341185 grad_clip=0.000519 optimizer_step=0.000482 post_step_cleanup=0.012621 teacher_seq_len=21 teacher_vocab=248320
train_step step=26 prompt_index=9 loss=1.895479908853e-5 rollout_len=18 step_seconds=5.049089
phase_summary step=26 total=5.049088 student_rollout=2.069412 infer_forward_token_logits=0.005526 infer_sync=0.423227 d2d_bridge_import=0.000078 teacher_forward_total=0.428836 student_forward=0.296805 kl_loss=0.008722 optimizer_zero_grad=0.000023 backward=2.230867 grad_clip=0.000538 optimizer_step=0.000843 post_step_cleanup=0.012964 teacher_seq_len=18 teacher_vocab=248320
train_step step=27 prompt_index=10 loss=1.853317917266e-5 rollout_len=21 step_seconds=6.733413
phase_summary step=27 total=6.733413 student_rollout=3.391765 infer_forward_token_logits=0.003533 infer_sync=0.571121 d2d_bridge_import=0.000079 teacher_forward_total=0.574738 student_forward=0.353751 kl_loss=0.009974 optimizer_zero_grad=0.000026 backward=2.388681 grad_clip=0.000544 optimizer_step=0.000868 post_step_cleanup=0.012985 teacher_seq_len=21 teacher_vocab=248320
train_step step=28 prompt_index=11 loss=1.649248952162e-5 rollout_len=19 step_seconds=6.114084
phase_summary step=28 total=6.114084 student_rollout=3.104586 infer_forward_token_logits=0.005365 infer_sync=0.519863 d2d_bridge_import=0.000552 teacher_forward_total=0.525785 student_forward=0.312736 kl_loss=0.009050 optimizer_zero_grad=0.000024 backward=2.148071 grad_clip=0.000539 optimizer_step=0.000473 post_step_cleanup=0.012739 teacher_seq_len=19 teacher_vocab=248320
train_step step=29 prompt_index=12 loss=1.969063850993e-5 rollout_len=22 step_seconds=6.104394
phase_summary step=29 total=6.104393 student_rollout=2.579872 infer_forward_token_logits=0.005334 infer_sync=0.480855 d2d_bridge_import=0.000597 teacher_forward_total=0.486792 student_forward=0.363808 kl_loss=0.010507 optimizer_zero_grad=0.000026 backward=2.649224 grad_clip=0.000526 optimizer_step=0.000834 post_step_cleanup=0.012727 teacher_seq_len=22 teacher_vocab=248320
train_step step=30 prompt_index=13 loss=1.605115176062e-5 rollout_len=18 step_seconds=5.735843
phase_summary step=30 total=5.735842 student_rollout=2.977390 infer_forward_token_logits=0.005271 infer_sync=0.412918 d2d_bridge_import=0.000593 teacher_forward_total=0.418786 student_forward=0.297747 kl_loss=0.008740 optimizer_zero_grad=0.000022 backward=2.019434 grad_clip=0.000523 optimizer_step=0.000472 post_step_cleanup=0.012648 teacher_seq_len=18 teacher_vocab=248320
train_step step=31 prompt_index=14 loss=1.244220766239e-5 rollout_len=20 step_seconds=5.546118
phase_summary step=31 total=5.546118 student_rollout=2.294806 infer_forward_token_logits=0.005301 infer_sync=0.465117 d2d_bridge_import=0.000594 teacher_forward_total=0.471016 student_forward=0.331797 kl_loss=0.009553 optimizer_zero_grad=0.000025 backward=2.424722 grad_clip=0.000530 optimizer_step=0.000847 post_step_cleanup=0.012740 teacher_seq_len=20 teacher_vocab=248320
train_step step=32 prompt_index=15 loss=1.335596607532e-5 rollout_len=21 step_seconds=6.502193
phase_summary step=32 total=6.502192 student_rollout=3.283642 infer_forward_token_logits=0.005378 infer_sync=0.467415 d2d_bridge_import=0.000065 teacher_forward_total=0.472862 student_forward=0.344932 kl_loss=0.009896 optimizer_zero_grad=0.000026 backward=2.376743 grad_clip=0.000535 optimizer_step=0.000850 post_step_cleanup=0.012626 teacher_seq_len=21 teacher_vocab=248320
train_step step=33 prompt_index=0 loss=1.660407724557e-5 rollout_len=21 step_seconds=6.547954
phase_summary step=33 total=6.547954 student_rollout=3.378927 infer_forward_token_logits=0.005247 infer_sync=0.465240 d2d_bridge_import=0.000080 teacher_forward_total=0.470571 student_forward=0.343525 kl_loss=0.009875 optimizer_zero_grad=0.000029 backward=2.330386 grad_clip=0.000548 optimizer_step=0.000881 post_step_cleanup=0.013120 teacher_seq_len=21 teacher_vocab=248320
train_step step=34 prompt_index=1 loss=1.465834247938e-5 rollout_len=20 step_seconds=6.311313
phase_summary step=34 total=6.311313 student_rollout=3.230973 infer_forward_token_logits=0.004925 infer_sync=0.458831 d2d_bridge_import=0.000597 teacher_forward_total=0.464357 student_forward=0.327228 kl_loss=0.009521 optimizer_zero_grad=0.000025 backward=2.265427 grad_clip=0.000530 optimizer_step=0.000491 post_step_cleanup=0.012739 teacher_seq_len=20 teacher_vocab=248320
train_step step=35 prompt_index=2 loss=1.741973937897e-5 rollout_len=19 step_seconds=5.118113
phase_summary step=35 total=5.118113 student_rollout=2.184658 infer_forward_token_logits=0.005259 infer_sync=0.452875 d2d_bridge_import=0.000042 teacher_forward_total=0.458180 student_forward=0.314623 kl_loss=0.009095 optimizer_zero_grad=0.000024 backward=2.137530 grad_clip=0.000519 optimizer_step=0.000477 post_step_cleanup=0.012925 teacher_seq_len=19 teacher_vocab=248320
train_step step=36 prompt_index=3 loss=1.084555606212e-5 rollout_len=15 step_seconds=4.210726
phase_summary step=36 total=4.210725 student_rollout=1.638987 infer_forward_token_logits=0.003175 infer_sync=0.386987 d2d_bridge_import=0.000043 teacher_forward_total=0.390208 student_forward=0.251137 kl_loss=0.007577 optimizer_zero_grad=0.000024 backward=1.909101 grad_clip=0.000528 optimizer_step=0.000473 post_step_cleanup=0.012609 teacher_seq_len=15 teacher_vocab=248320
train_step step=37 prompt_index=4 loss=1.514872110420e-5 rollout_len=21 step_seconds=6.341149
phase_summary step=37 total=6.341148 student_rollout=2.940164 infer_forward_token_logits=0.005380 infer_sync=0.515092 d2d_bridge_import=0.000604 teacher_forward_total=0.521081 student_forward=0.347788 kl_loss=0.009886 optimizer_zero_grad=0.000023 backward=2.508473 grad_clip=0.000528 optimizer_step=0.000497 post_step_cleanup=0.012623 teacher_seq_len=21 teacher_vocab=248320
train_step step=38 prompt_index=5 loss=1.329107544734e-5 rollout_len=20 step_seconds=5.630458
phase_summary step=38 total=5.630458 student_rollout=2.359163 infer_forward_token_logits=0.005426 infer_sync=0.502703 d2d_bridge_import=0.000554 teacher_forward_total=0.508687 student_forward=0.332285 kl_loss=0.009454 optimizer_zero_grad=0.000027 backward=2.406718 grad_clip=0.000527 optimizer_step=0.000840 post_step_cleanup=0.012679 teacher_seq_len=20 teacher_vocab=248320
train_step step=39 prompt_index=6 loss=1.898252594401e-5 rollout_len=19 step_seconds=6.151976
phase_summary step=39 total=6.151975 student_rollout=3.096195 infer_forward_token_logits=0.005387 infer_sync=0.550001 d2d_bridge_import=0.000604 teacher_forward_total=0.555997 student_forward=0.314771 kl_loss=0.009136 optimizer_zero_grad=0.000027 backward=2.162092 grad_clip=0.000520 optimizer_step=0.000475 post_step_cleanup=0.012683 teacher_seq_len=19 teacher_vocab=248320
train_step step=40 prompt_index=7 loss=1.391025034536e-5 rollout_len=17 step_seconds=4.600459
phase_summary step=40 total=4.600458 student_rollout=1.914436 infer_forward_token_logits=0.005232 infer_sync=0.413457 d2d_bridge_import=0.000590 teacher_forward_total=0.419285 student_forward=0.280527 kl_loss=0.008372 optimizer_zero_grad=0.000024 backward=1.963907 grad_clip=0.000536 optimizer_step=0.000482 post_step_cleanup=0.012806 teacher_seq_len=17 teacher_vocab=248320
train_step step=41 prompt_index=8 loss=1.252284437214e-5 rollout_len=21 step_seconds=5.609065
phase_summary step=41 total=5.609064 student_rollout=2.433303 infer_forward_token_logits=0.005533 infer_sync=0.409501 d2d_bridge_import=0.000079 teacher_forward_total=0.415118 student_forward=0.347226 kl_loss=0.009901 optimizer_zero_grad=0.000026 backward=2.389406 grad_clip=0.000542 optimizer_step=0.000490 post_step_cleanup=0.012969 teacher_seq_len=21 teacher_vocab=248320
train_step step=42 prompt_index=9 loss=1.890782368719e-5 rollout_len=18 step_seconds=4.910012
phase_summary step=42 total=4.910011 student_rollout=2.055217 infer_forward_token_logits=0.005415 infer_sync=0.451302 d2d_bridge_import=0.000590 teacher_forward_total=0.457312 student_forward=0.301088 kl_loss=0.008838 optimizer_zero_grad=0.000025 backward=2.073700 grad_clip=0.000529 optimizer_step=0.000472 post_step_cleanup=0.012742 teacher_seq_len=18 teacher_vocab=248320
train_step step=43 prompt_index=10 loss=1.845193401095e-5 rollout_len=21 step_seconds=6.489862
phase_summary step=43 total=6.489861 student_rollout=3.108207 infer_forward_token_logits=0.005352 infer_sync=0.488463 d2d_bridge_import=0.000607 teacher_forward_total=0.494426 student_forward=0.343442 kl_loss=0.009986 optimizer_zero_grad=0.000026 backward=2.519716 grad_clip=0.000541 optimizer_step=0.000856 post_step_cleanup=0.012581 teacher_seq_len=21 teacher_vocab=248320
train_step step=44 prompt_index=11 loss=1.644043004490e-5 rollout_len=19 step_seconds=6.153701
phase_summary step=44 total=6.153700 student_rollout=3.102702 infer_forward_token_logits=0.005219 infer_sync=0.545260 d2d_bridge_import=0.000552 teacher_forward_total=0.551034 student_forward=0.314912 kl_loss=0.009125 optimizer_zero_grad=0.000023 backward=2.162059 grad_clip=0.000533 optimizer_step=0.000475 post_step_cleanup=0.012754 teacher_seq_len=19 teacher_vocab=248320
train_step step=45 prompt_index=12 loss=1.962074020412e-5 rollout_len=22 step_seconds=6.858045
phase_summary step=45 total=6.858044 student_rollout=3.425612 infer_forward_token_logits=0.005431 infer_sync=0.538219 d2d_bridge_import=0.000602 teacher_forward_total=0.544259 student_forward=0.360677 kl_loss=0.010284 optimizer_zero_grad=0.000025 backward=2.502899 grad_clip=0.000532 optimizer_step=0.000854 post_step_cleanup=0.012819 teacher_seq_len=22 teacher_vocab=248320
train_step step=46 prompt_index=13 loss=1.600079485797e-5 rollout_len=18 step_seconds=5.985428
phase_summary step=46 total=5.985427 student_rollout=2.894762 infer_forward_token_logits=0.005339 infer_sync=0.525488 d2d_bridge_import=0.000554 teacher_forward_total=0.531386 student_forward=0.298584 kl_loss=0.008738 optimizer_zero_grad=0.000027 backward=2.238041 grad_clip=0.000528 optimizer_step=0.000476 post_step_cleanup=0.012804 teacher_seq_len=18 teacher_vocab=248320
train_step step=47 prompt_index=14 loss=1.240324763785e-5 rollout_len=20 step_seconds=5.705574
phase_summary step=47 total=5.705573 student_rollout=2.436510 infer_forward_token_logits=0.005328 infer_sync=0.429848 d2d_bridge_import=0.000553 teacher_forward_total=0.435734 student_forward=0.328992 kl_loss=0.009498 optimizer_zero_grad=0.000025 backward=2.479926 grad_clip=0.000550 optimizer_step=0.000506 post_step_cleanup=0.013744 teacher_seq_len=20 teacher_vocab=248320
train_step step=48 prompt_index=15 loss=1.334101034445e-5 rollout_len=21 step_seconds=6.081521
phase_summary step=48 total=6.081520 student_rollout=2.577299 infer_forward_token_logits=0.005403 infer_sync=0.590828 d2d_bridge_import=0.000079 teacher_forward_total=0.596315 student_forward=0.344918 kl_loss=0.009872 optimizer_zero_grad=0.000024 backward=2.539327 grad_clip=0.000540 optimizer_step=0.000495 post_step_cleanup=0.012628 teacher_seq_len=21 teacher_vocab=248320
train_step step=49 prompt_index=0 loss=1.654234074522e-5 rollout_len=21 step_seconds=5.942738
phase_summary step=49 total=5.942737 student_rollout=2.521432 infer_forward_token_logits=0.005140 infer_sync=0.562561 d2d_bridge_import=0.000075 teacher_forward_total=0.567781 student_forward=0.346611 kl_loss=0.009881 optimizer_zero_grad=0.000027 backward=2.483081 grad_clip=0.000560 optimizer_step=0.000513 post_step_cleanup=0.012771 teacher_seq_len=21 teacher_vocab=248320
train_step step=50 prompt_index=1 loss=1.459751638322e-5 rollout_len=20 step_seconds=5.574671
phase_summary step=50 total=5.574670 student_rollout=2.351174 infer_forward_token_logits=0.005319 infer_sync=0.479168 d2d_bridge_import=0.000638 teacher_forward_total=0.485129 student_forward=0.330329 kl_loss=0.009529 optimizer_zero_grad=0.000023 backward=2.384413 grad_clip=0.000529 optimizer_step=0.000824 post_step_cleanup=0.012627 teacher_seq_len=20 teacher_vocab=248320
eval_summary step=50 train_kl=1.503763178334e-5 heldout_kl=1.730247004161e-5 eval_seconds=7.884876
train_step step=51 prompt_index=2 loss=1.738188802847e-5 rollout_len=19 step_seconds=4.776039
phase_summary step=51 total=4.776038 student_rollout=2.008536 infer_forward_token_logits=0.005498 infer_sync=0.316079 d2d_bridge_import=0.000080 teacher_forward_total=0.321661 student_forward=0.311508 kl_loss=0.009194 optimizer_zero_grad=0.000027 backward=2.111211 grad_clip=0.000521 optimizer_step=0.000503 post_step_cleanup=0.012826 teacher_seq_len=19 teacher_vocab=248320
train_step step=52 prompt_index=3 loss=1.083151983039e-5 rollout_len=15 step_seconds=4.127253
phase_summary step=52 total=4.127252 student_rollout=1.629195 infer_forward_token_logits=0.005181 infer_sync=0.319850 d2d_bridge_import=0.000639 teacher_forward_total=0.325674 student_forward=0.249764 kl_loss=0.007610 optimizer_zero_grad=0.000022 backward=1.901060 grad_clip=0.000521 optimizer_step=0.000484 post_step_cleanup=0.012825 teacher_seq_len=15 teacher_vocab=248320
train_step step=53 prompt_index=4 loss=1.511033042334e-5 rollout_len=21 step_seconds=5.672120
phase_summary step=53 total=5.672119 student_rollout=2.421624 infer_forward_token_logits=0.005308 infer_sync=0.472733 d2d_bridge_import=0.000551 teacher_forward_total=0.478597 student_forward=0.344871 kl_loss=0.009887 optimizer_zero_grad=0.000025 backward=2.403238 grad_clip=0.000537 optimizer_step=0.000471 post_step_cleanup=0.012781 teacher_seq_len=21 teacher_vocab=248320
train_step step=54 prompt_index=5 loss=1.326477104158e-5 rollout_len=20 step_seconds=5.971103
phase_summary step=54 total=5.971101 student_rollout=2.896294 infer_forward_token_logits=0.005360 infer_sync=0.420519 d2d_bridge_import=0.000553 teacher_forward_total=0.426438 student_forward=0.326930 kl_loss=0.009555 optimizer_zero_grad=0.000026 backward=2.297825 grad_clip=0.000528 optimizer_step=0.000487 post_step_cleanup=0.012937 teacher_seq_len=20 teacher_vocab=248320
train_step step=55 prompt_index=6 loss=1.889834675239e-5 rollout_len=19 step_seconds=5.292786
phase_summary step=55 total=5.292786 student_rollout=2.195243 infer_forward_token_logits=0.005312 infer_sync=0.434976 d2d_bridge_import=0.000578 teacher_forward_total=0.440871 student_forward=0.315144 kl_loss=0.009579 optimizer_zero_grad=0.000027 backward=2.318050 grad_clip=0.000526 optimizer_step=0.000484 post_step_cleanup=0.012774 teacher_seq_len=19 teacher_vocab=248320
train_step step=56 prompt_index=7 loss=1.384186907671e-5 rollout_len=17 step_seconds=4.749076
phase_summary step=56 total=4.749075 student_rollout=1.920122 infer_forward_token_logits=0.005316 infer_sync=0.386104 d2d_bridge_import=0.000077 teacher_forward_total=0.391501 student_forward=0.282481 kl_loss=0.008367 optimizer_zero_grad=0.000023 backward=2.132781 grad_clip=0.000523 optimizer_step=0.000480 post_step_cleanup=0.012752 teacher_seq_len=17 teacher_vocab=248320
train_step step=57 prompt_index=8 loss=1.251354024134e-5 rollout_len=21 step_seconds=5.787508
phase_summary step=57 total=5.787508 student_rollout=2.451143 infer_forward_token_logits=0.005377 infer_sync=0.507436 d2d_bridge_import=0.000588 teacher_forward_total=0.513410 student_forward=0.347194 kl_loss=0.009903 optimizer_zero_grad=0.000025 backward=2.452075 grad_clip=0.000536 optimizer_step=0.000499 post_step_cleanup=0.012642 teacher_seq_len=21 teacher_vocab=248320
train_step step=58 prompt_index=9 loss=1.867801438493e-5 rollout_len=18 step_seconds=4.927731
phase_summary step=58 total=4.927730 student_rollout=2.085856 infer_forward_token_logits=0.005323 infer_sync=0.451582 d2d_bridge_import=0.000071 teacher_forward_total=0.456980 student_forward=0.301837 kl_loss=0.008751 optimizer_zero_grad=0.000026 backward=2.060347 grad_clip=0.000524 optimizer_step=0.000470 post_step_cleanup=0.012854 teacher_seq_len=18 teacher_vocab=248320
train_step step=59 prompt_index=10 loss=1.837143099692e-5 rollout_len=21 step_seconds=5.602501
phase_summary step=59 total=5.602500 student_rollout=2.444131 infer_forward_token_logits=0.005368 infer_sync=0.455898 d2d_bridge_import=0.000062 teacher_forward_total=0.461332 student_forward=0.347520 kl_loss=0.009884 optimizer_zero_grad=0.000026 backward=2.325877 grad_clip=0.000538 optimizer_step=0.000463 post_step_cleanup=0.012646 teacher_seq_len=21 teacher_vocab=248320
train_step step=60 prompt_index=11 loss=1.638873436605e-5 rollout_len=19 step_seconds=5.282304
phase_summary step=60 total=5.282303 student_rollout=2.165830 infer_forward_token_logits=0.005261 infer_sync=0.451400 d2d_bridge_import=0.000591 teacher_forward_total=0.457256 student_forward=0.314502 kl_loss=0.009147 optimizer_zero_grad=0.000031 backward=2.321398 grad_clip=0.000541 optimizer_step=0.000855 post_step_cleanup=0.012659 teacher_seq_len=19 teacher_vocab=248320
train_step step=61 prompt_index=12 loss=1.955070365511e-5 rollout_len=22 step_seconds=7.009898
phase_summary step=61 total=7.009897 student_rollout=3.529511 infer_forward_token_logits=0.005356 infer_sync=0.615641 d2d_bridge_import=0.000593 teacher_forward_total=0.621595 student_forward=0.362788 kl_loss=0.010439 optimizer_zero_grad=0.000025 backward=2.471693 grad_clip=0.000526 optimizer_step=0.000486 post_step_cleanup=0.012781 teacher_seq_len=22 teacher_vocab=248320
train_step step=62 prompt_index=13 loss=1.594921741344e-5 rollout_len=18 step_seconds=5.071419
phase_summary step=62 total=5.071418 student_rollout=2.080311 infer_forward_token_logits=0.005248 infer_sync=0.444868 d2d_bridge_import=0.000635 teacher_forward_total=0.450755 student_forward=0.296932 kl_loss=0.008775 optimizer_zero_grad=0.000025 backward=2.220891 grad_clip=0.000529 optimizer_step=0.000477 post_step_cleanup=0.012663 teacher_seq_len=18 teacher_vocab=248320
train_step step=63 prompt_index=14 loss=1.236380921910e-5 rollout_len=20 step_seconds=5.313067
phase_summary step=63 total=5.313066 student_rollout=2.316375 infer_forward_token_logits=0.005280 infer_sync=0.410537 d2d_bridge_import=0.000551 teacher_forward_total=0.416372 student_forward=0.327432 kl_loss=0.009517 optimizer_zero_grad=0.000022 backward=2.229425 grad_clip=0.000532 optimizer_step=0.000477 post_step_cleanup=0.012827 teacher_seq_len=20 teacher_vocab=248320
train_step step=64 prompt_index=15 loss=1.332580541202e-5 rollout_len=21 step_seconds=5.790073
phase_summary step=64 total=5.790073 student_rollout=2.424774 infer_forward_token_logits=0.005388 infer_sync=0.406001 d2d_bridge_import=0.000061 teacher_forward_total=0.411453 student_forward=0.346624 kl_loss=0.009883 optimizer_zero_grad=0.000024 backward=2.583625 grad_clip=0.000532 optimizer_step=0.000501 post_step_cleanup=0.012634 teacher_seq_len=21 teacher_vocab=248320
train_step step=65 prompt_index=0 loss=1.647771023272e-5 rollout_len=21 step_seconds=5.696512
phase_summary step=65 total=5.696511 student_rollout=2.446335 infer_forward_token_logits=0.005339 infer_sync=0.512898 d2d_bridge_import=0.000077 teacher_forward_total=0.518319 student_forward=0.344071 kl_loss=0.009914 optimizer_zero_grad=0.000026 backward=2.363629 grad_clip=0.000532 optimizer_step=0.000837 post_step_cleanup=0.012761 teacher_seq_len=21 teacher_vocab=248320
train_step step=66 prompt_index=1 loss=1.453579352528e-5 rollout_len=20 step_seconds=6.430143
phase_summary step=66 total=6.430142 student_rollout=3.279604 infer_forward_token_logits=0.005616 infer_sync=0.524285 d2d_bridge_import=0.000605 teacher_forward_total=0.530512 student_forward=0.333563 kl_loss=0.009612 optimizer_zero_grad=0.000028 backward=2.261644 grad_clip=0.000549 optimizer_step=0.000847 post_step_cleanup=0.013702 teacher_seq_len=20 teacher_vocab=248320
train_step step=67 prompt_index=2 loss=1.734481884341e-5 rollout_len=19 step_seconds=5.878302
phase_summary step=67 total=5.878302 student_rollout=2.898722 infer_forward_token_logits=0.005296 infer_sync=0.457830 d2d_bridge_import=0.000080 teacher_forward_total=0.463211 student_forward=0.316576 kl_loss=0.009182 optimizer_zero_grad=0.000025 backward=2.176301 grad_clip=0.000544 optimizer_step=0.000824 post_step_cleanup=0.012810 teacher_seq_len=19 teacher_vocab=248320
train_step step=68 prompt_index=3 loss=1.081745358533e-5 rollout_len=15 step_seconds=4.772069
phase_summary step=68 total=4.772068 student_rollout=2.185807 infer_forward_token_logits=0.003260 infer_sync=0.408115 d2d_bridge_import=0.000072 teacher_forward_total=0.411450 student_forward=0.253445 kl_loss=0.007651 optimizer_zero_grad=0.000026 backward=1.899819 grad_clip=0.000526 optimizer_step=0.000481 post_step_cleanup=0.012757 teacher_seq_len=15 teacher_vocab=248320
train_step step=69 prompt_index=4 loss=1.507157594460e-5 rollout_len=21 step_seconds=5.785484
phase_summary step=69 total=5.785483 student_rollout=2.432880 infer_forward_token_logits=0.003351 infer_sync=0.417687 d2d_bridge_import=0.000604 teacher_forward_total=0.421645 student_forward=0.348917 kl_loss=0.009933 optimizer_zero_grad=0.000024 backward=2.557887 grad_clip=0.000538 optimizer_step=0.000834 post_step_cleanup=0.012740 teacher_seq_len=21 teacher_vocab=248320
train_step step=70 prompt_index=5 loss=1.323758442595e-5 rollout_len=20 step_seconds=6.400775
phase_summary step=70 total=6.400774 student_rollout=3.303165 infer_forward_token_logits=0.005354 infer_sync=0.425845 d2d_bridge_import=0.000552 teacher_forward_total=0.431757 student_forward=0.327728 kl_loss=0.009539 optimizer_zero_grad=0.000025 backward=2.314416 grad_clip=0.000526 optimizer_step=0.000856 post_step_cleanup=0.012679 teacher_seq_len=20 teacher_vocab=248320
train_step step=71 prompt_index=6 loss=1.881059324660e-5 rollout_len=19 step_seconds=6.060242
phase_summary step=71 total=6.060242 student_rollout=3.051285 infer_forward_token_logits=0.005338 infer_sync=0.452315 d2d_bridge_import=0.000603 teacher_forward_total=0.458261 student_forward=0.312618 kl_loss=0.009086 optimizer_zero_grad=0.000026 backward=2.215158 grad_clip=0.000524 optimizer_step=0.000482 post_step_cleanup=0.012718 teacher_seq_len=19 teacher_vocab=248320
train_step step=72 prompt_index=7 loss=1.377071566822e-5 rollout_len=17 step_seconds=4.802841
phase_summary step=72 total=4.802841 student_rollout=1.905509 infer_forward_token_logits=0.005250 infer_sync=0.405290 d2d_bridge_import=0.000594 teacher_forward_total=0.411139 student_forward=0.280090 kl_loss=0.008375 optimizer_zero_grad=0.000026 backward=2.183945 grad_clip=0.000529 optimizer_step=0.000486 post_step_cleanup=0.012660 teacher_seq_len=17 teacher_vocab=248320
train_step step=73 prompt_index=8 loss=1.250395416719e-5 rollout_len=21 step_seconds=5.850895
phase_summary step=73 total=5.850895 student_rollout=2.446805 infer_forward_token_logits=0.003156 infer_sync=0.493106 d2d_bridge_import=0.000054 teacher_forward_total=0.496320 student_forward=0.346622 kl_loss=0.010775 optimizer_zero_grad=0.000026 backward=2.536610 grad_clip=0.000531 optimizer_step=0.000489 post_step_cleanup=0.012638 teacher_seq_len=21 teacher_vocab=248320
train_step step=74 prompt_index=9 loss=1.859918847913e-5 rollout_len=18 step_seconds=5.046571
phase_summary step=74 total=5.046571 student_rollout=2.090070 infer_forward_token_logits=0.005289 infer_sync=0.406734 d2d_bridge_import=0.000553 teacher_forward_total=0.412582 student_forward=0.296772 kl_loss=0.008771 optimizer_zero_grad=0.000024 backward=2.224461 grad_clip=0.000517 optimizer_step=0.000477 post_step_cleanup=0.012818 teacher_seq_len=18 teacher_vocab=248320
train_step step=75 prompt_index=10 loss=1.828827589634e-5 rollout_len=21 step_seconds=5.843842
phase_summary step=75 total=5.843841 student_rollout=2.436159 infer_forward_token_logits=0.003277 infer_sync=0.451177 d2d_bridge_import=0.000576 teacher_forward_total=0.455033 student_forward=0.346347 kl_loss=0.009900 optimizer_zero_grad=0.000027 backward=2.582761 grad_clip=0.000526 optimizer_step=0.000481 post_step_cleanup=0.012516 teacher_seq_len=21 teacher_vocab=248320
train_step step=76 prompt_index=11 loss=1.633544343349e-5 rollout_len=19 step_seconds=5.078844
phase_summary step=76 total=5.078843 student_rollout=2.198407 infer_forward_token_logits=0.005356 infer_sync=0.415327 d2d_bridge_import=0.000549 teacher_forward_total=0.421236 student_forward=0.314675 kl_loss=0.009098 optimizer_zero_grad=0.000023 backward=2.121519 grad_clip=0.000535 optimizer_step=0.000468 post_step_cleanup=0.012796 teacher_seq_len=19 teacher_vocab=248320
train_step step=77 prompt_index=12 loss=1.947662349266e-5 rollout_len=22 step_seconds=5.836643
phase_summary step=77 total=5.836642 student_rollout=2.573735 infer_forward_token_logits=0.005386 infer_sync=0.419678 d2d_bridge_import=0.000575 teacher_forward_total=0.425642 student_forward=0.362879 kl_loss=0.010247 optimizer_zero_grad=0.000024 backward=2.449894 grad_clip=0.000531 optimizer_step=0.000855 post_step_cleanup=0.012753 teacher_seq_len=22 teacher_vocab=248320
train_step step=78 prompt_index=13 loss=1.589609746588e-5 rollout_len=18 step_seconds=5.764859
phase_summary step=78 total=5.764859 student_rollout=2.931105 infer_forward_token_logits=0.005348 infer_sync=0.442308 d2d_bridge_import=0.000555 teacher_forward_total=0.448217 student_forward=0.299529 kl_loss=0.008808 optimizer_zero_grad=0.000024 backward=2.063255 grad_clip=0.000536 optimizer_step=0.000485 post_step_cleanup=0.012856 teacher_seq_len=18 teacher_vocab=248320
train_step step=79 prompt_index=14 loss=1.232300201082e-5 rollout_len=20 step_seconds=5.384736
phase_summary step=79 total=5.384735 student_rollout=2.299013 infer_forward_token_logits=0.005290 infer_sync=0.436848 d2d_bridge_import=0.000558 teacher_forward_total=0.442699 student_forward=0.330872 kl_loss=0.009455 optimizer_zero_grad=0.000026 backward=2.288119 grad_clip=0.000530 optimizer_step=0.000836 post_step_cleanup=0.013097 teacher_seq_len=20 teacher_vocab=248320
train_step step=80 prompt_index=15 loss=1.331067687715e-5 rollout_len=21 step_seconds=6.617735
phase_summary step=80 total=6.617734 student_rollout=3.315968 infer_forward_token_logits=0.005428 infer_sync=0.545809 d2d_bridge_import=0.000083 teacher_forward_total=0.551325 student_forward=0.347465 kl_loss=0.009906 optimizer_zero_grad=0.000023 backward=2.379142 grad_clip=0.000539 optimizer_step=0.000494 post_step_cleanup=0.012782 teacher_seq_len=21 teacher_vocab=248320
train_step step=81 prompt_index=0 loss=1.297355720453e-5 rollout_len=21 step_seconds=5.851712
phase_summary step=81 total=5.851711 student_rollout=2.518291 infer_forward_token_logits=0.005523 infer_sync=0.444352 d2d_bridge_import=0.000083 teacher_forward_total=0.449963 student_forward=0.348776 kl_loss=0.009916 optimizer_zero_grad=0.000027 backward=2.511007 grad_clip=0.000534 optimizer_step=0.000467 post_step_cleanup=0.012643 teacher_seq_len=21 teacher_vocab=248320
train_step step=82 prompt_index=1 loss=1.447264367016e-5 rollout_len=20 step_seconds=5.603884
phase_summary step=82 total=5.603884 student_rollout=2.318192 infer_forward_token_logits=0.005441 infer_sync=0.494779 d2d_bridge_import=0.000639 teacher_forward_total=0.500864 student_forward=0.328738 kl_loss=0.009529 optimizer_zero_grad=0.000023 backward=2.432425 grad_clip=0.000526 optimizer_step=0.000837 post_step_cleanup=0.012665 teacher_seq_len=20 teacher_vocab=248320
train_step step=83 prompt_index=2 loss=1.730705662339e-5 rollout_len=19 step_seconds=5.852024
phase_summary step=83 total=5.852023 student_rollout=2.886244 infer_forward_token_logits=0.005249 infer_sync=0.470617 d2d_bridge_import=0.000072 teacher_forward_total=0.475942 student_forward=0.313931 kl_loss=0.009110 optimizer_zero_grad=0.000024 backward=2.152920 grad_clip=0.000523 optimizer_step=0.000481 post_step_cleanup=0.012763 teacher_seq_len=19 teacher_vocab=248320
train_step step=84 prompt_index=3 loss=1.080309357349e-5 rollout_len=15 step_seconds=4.665828
phase_summary step=84 total=4.665827 student_rollout=2.167306 infer_forward_token_logits=0.005167 infer_sync=0.448697 d2d_bridge_import=0.000603 teacher_forward_total=0.454473 student_forward=0.248091 kl_loss=0.007600 optimizer_zero_grad=0.000025 backward=1.774451 grad_clip=0.000528 optimizer_step=0.000480 post_step_cleanup=0.012779 teacher_seq_len=15 teacher_vocab=248320
train_step step=85 prompt_index=4 loss=1.503178827988e-5 rollout_len=21 step_seconds=5.646364
phase_summary step=85 total=5.646363 student_rollout=2.401731 infer_forward_token_logits=0.005294 infer_sync=0.495242 d2d_bridge_import=0.000593 teacher_forward_total=0.501135 student_forward=0.347272 kl_loss=0.009920 optimizer_zero_grad=0.000027 backward=2.372170 grad_clip=0.000526 optimizer_step=0.000836 post_step_cleanup=0.012655 teacher_seq_len=21 teacher_vocab=248320
train_step step=86 prompt_index=5 loss=1.320935189142e-5 rollout_len=20 step_seconds=6.181973
phase_summary step=86 total=6.181973 student_rollout=3.171798 infer_forward_token_logits=0.005360 infer_sync=0.431694 d2d_bridge_import=0.000559 teacher_forward_total=0.437617 student_forward=0.330437 kl_loss=0.009518 optimizer_zero_grad=0.000026 backward=2.218743 grad_clip=0.000528 optimizer_step=0.000473 post_step_cleanup=0.012748 teacher_seq_len=20 teacher_vocab=248320
train_step step=87 prompt_index=6 loss=1.872057873697e-5 rollout_len=19 step_seconds=5.157829
phase_summary step=87 total=5.157828 student_rollout=2.193821 infer_forward_token_logits=0.005245 infer_sync=0.463794 d2d_bridge_import=0.000641 teacher_forward_total=0.469683 student_forward=0.314191 kl_loss=0.009198 optimizer_zero_grad=0.000025 backward=2.156586 grad_clip=0.000528 optimizer_step=0.000839 post_step_cleanup=0.012868 teacher_seq_len=19 teacher_vocab=248320
train_step step=88 prompt_index=7 loss=1.369890560454e-5 rollout_len=17 step_seconds=5.257544
phase_summary step=88 total=5.257543 student_rollout=2.504525 infer_forward_token_logits=0.005436 infer_sync=0.482694 d2d_bridge_import=0.000081 teacher_forward_total=0.488216 student_forward=0.283473 kl_loss=0.008486 optimizer_zero_grad=0.000025 backward=1.959085 grad_clip=0.000517 optimizer_step=0.000468 post_step_cleanup=0.012665 teacher_seq_len=17 teacher_vocab=248320
train_step step=89 prompt_index=8 loss=1.249408614967e-5 rollout_len=21 step_seconds=5.731353
phase_summary step=89 total=5.731353 student_rollout=2.497210 infer_forward_token_logits=0.005437 infer_sync=0.489210 d2d_bridge_import=0.000551 teacher_forward_total=0.495203 student_forward=0.346902 kl_loss=0.010045 optimizer_zero_grad=0.000026 backward=2.367770 grad_clip=0.000538 optimizer_step=0.000849 post_step_cleanup=0.012727 teacher_seq_len=21 teacher_vocab=248320
train_step step=90 prompt_index=9 loss=1.851858360169e-5 rollout_len=18 step_seconds=5.626761
phase_summary step=90 total=5.626761 student_rollout=2.728640 infer_forward_token_logits=0.005268 infer_sync=0.513555 d2d_bridge_import=0.000081 teacher_forward_total=0.518908 student_forward=0.298931 kl_loss=0.008752 optimizer_zero_grad=0.000025 backward=2.057572 grad_clip=0.000518 optimizer_step=0.000484 post_step_cleanup=0.012850 teacher_seq_len=18 teacher_vocab=248320
train_step step=91 prompt_index=10 loss=1.820366742322e-5 rollout_len=21 step_seconds=6.372971
phase_summary step=91 total=6.372970 student_rollout=3.064118 infer_forward_token_logits=0.005445 infer_sync=0.594286 d2d_bridge_import=0.000077 teacher_forward_total=0.599813 student_forward=0.345591 kl_loss=0.009866 optimizer_zero_grad=0.000027 backward=2.339441 grad_clip=0.000527 optimizer_step=0.000839 post_step_cleanup=0.012658 teacher_seq_len=21 teacher_vocab=248320
train_step step=92 prompt_index=11 loss=1.628174868529e-5 rollout_len=19 step_seconds=6.117428
phase_summary step=92 total=6.117427 student_rollout=3.047017 infer_forward_token_logits=0.005319 infer_sync=0.406250 d2d_bridge_import=0.000553 teacher_forward_total=0.412127 student_forward=0.313369 kl_loss=0.009111 optimizer_zero_grad=0.000028 backward=2.321821 grad_clip=0.000537 optimizer_step=0.000476 post_step_cleanup=0.012864 teacher_seq_len=19 teacher_vocab=248320
train_step step=93 prompt_index=12 loss=1.940390939126e-5 rollout_len=22 step_seconds=6.224781
phase_summary step=93 total=6.224781 student_rollout=2.709379 infer_forward_token_logits=0.005313 infer_sync=0.440457 d2d_bridge_import=0.000613 teacher_forward_total=0.446387 student_forward=0.360872 kl_loss=0.010294 optimizer_zero_grad=0.000025 backward=2.683403 grad_clip=0.000535 optimizer_step=0.000857 post_step_cleanup=0.012944 teacher_seq_len=22 teacher_vocab=248320
train_step step=94 prompt_index=13 loss=1.584071651450e-5 rollout_len=18 step_seconds=5.879499
phase_summary step=94 total=5.879498 student_rollout=3.032447 infer_forward_token_logits=0.005326 infer_sync=0.457686 d2d_bridge_import=0.000588 teacher_forward_total=0.463604 student_forward=0.299014 kl_loss=0.008757 optimizer_zero_grad=0.000024 backward=2.061716 grad_clip=0.000542 optimizer_step=0.000485 post_step_cleanup=0.012826 teacher_seq_len=18 teacher_vocab=248320
train_step step=95 prompt_index=14 loss=1.228258406627e-5 rollout_len=20 step_seconds=6.024282
phase_summary step=95 total=6.024281 student_rollout=2.913033 infer_forward_token_logits=0.005276 infer_sync=0.521275 d2d_bridge_import=0.000553 teacher_forward_total=0.527108 student_forward=0.330705 kl_loss=0.009584 optimizer_zero_grad=0.000024 backward=2.230013 grad_clip=0.000523 optimizer_step=0.000470 post_step_cleanup=0.012734 teacher_seq_len=20 teacher_vocab=248320
train_step step=96 prompt_index=15 loss=1.329485712631e-5 rollout_len=21 step_seconds=5.660102
phase_summary step=96 total=5.660102 student_rollout=2.458364 infer_forward_token_logits=0.005713 infer_sync=0.471290 d2d_bridge_import=0.000080 teacher_forward_total=0.477088 student_forward=0.345658 kl_loss=0.009892 optimizer_zero_grad=0.000026 backward=2.354950 grad_clip=0.000545 optimizer_step=0.000838 post_step_cleanup=0.012667 teacher_seq_len=21 teacher_vocab=248320
train_step step=97 prompt_index=0 loss=1.292747401749e-5 rollout_len=21 step_seconds=6.614575
phase_summary step=97 total=6.614574 student_rollout=3.351388 infer_forward_token_logits=0.005407 infer_sync=0.410312 d2d_bridge_import=0.000053 teacher_forward_total=0.415778 student_forward=0.348634 kl_loss=0.009890 optimizer_zero_grad=0.000028 backward=2.475154 grad_clip=0.000534 optimizer_step=0.000490 post_step_cleanup=0.012598 teacher_seq_len=21 teacher_vocab=248320
train_step step=98 prompt_index=1 loss=1.440908727091e-5 rollout_len=20 step_seconds=5.542328
phase_summary step=98 total=5.542328 student_rollout=2.355179 infer_forward_token_logits=0.005386 infer_sync=0.475834 d2d_bridge_import=0.000623 teacher_forward_total=0.481847 student_forward=0.330388 kl_loss=0.009496 optimizer_zero_grad=0.000027 backward=2.351084 grad_clip=0.000533 optimizer_step=0.000838 post_step_cleanup=0.012813 teacher_seq_len=20 teacher_vocab=248320
train_step step=99 prompt_index=2 loss=1.726841401251e-5 rollout_len=19 step_seconds=5.874468
phase_summary step=99 total=5.874467 student_rollout=2.782350 infer_forward_token_logits=0.005317 infer_sync=0.391313 d2d_bridge_import=0.000064 teacher_forward_total=0.396699 student_forward=0.313049 kl_loss=0.009112 optimizer_zero_grad=0.000026 backward=2.358026 grad_clip=0.000533 optimizer_step=0.000857 post_step_cleanup=0.013731 teacher_seq_len=19 teacher_vocab=248320
train_step step=100 prompt_index=3 loss=1.079340108845e-5 rollout_len=15 step_seconds=4.840514
phase_summary step=100 total=4.840514 student_rollout=2.177486 infer_forward_token_logits=0.005144 infer_sync=0.460972 d2d_bridge_import=0.000049 teacher_forward_total=0.466167 student_forward=0.250944 kl_loss=0.007650 optimizer_zero_grad=0.000026 backward=1.924448 grad_clip=0.000522 optimizer_step=0.000469 post_step_cleanup=0.012705 teacher_seq_len=15 teacher_vocab=248320
eval_summary step=100 train_kl=1.497842333720e-5 heldout_kl=1.722338788568e-5 eval_seconds=7.732979
train_step step=101 prompt_index=4 loss=1.499060817878e-5 rollout_len=21 step_seconds=5.522559
phase_summary step=101 total=5.522558 student_rollout=2.272035 infer_forward_token_logits=0.005302 infer_sync=0.483111 d2d_bridge_import=0.000600 teacher_forward_total=0.489017 student_forward=0.347213 kl_loss=0.009910 optimizer_zero_grad=0.000025 backward=2.390334 grad_clip=0.000541 optimizer_step=0.000852 post_step_cleanup=0.012602 teacher_seq_len=21 teacher_vocab=248320
train_step step=102 prompt_index=5 loss=1.318019985774e-5 rollout_len=20 step_seconds=6.468519
phase_summary step=102 total=6.468519 student_rollout=3.242267 infer_forward_token_logits=0.005398 infer_sync=0.574906 d2d_bridge_import=0.000554 teacher_forward_total=0.580862 student_forward=0.332374 kl_loss=0.009538 optimizer_zero_grad=0.000023 backward=2.289362 grad_clip=0.000528 optimizer_step=0.000844 post_step_cleanup=0.012638 teacher_seq_len=20 teacher_vocab=248320
train_step step=103 prompt_index=6 loss=1.253342543350e-5 rollout_len=19 step_seconds=5.835627
phase_summary step=103 total=5.835626 student_rollout=2.893345 infer_forward_token_logits=0.005302 infer_sync=0.436757 d2d_bridge_import=0.000589 teacher_forward_total=0.442652 student_forward=0.318591 kl_loss=0.009135 optimizer_zero_grad=0.000024 backward=2.158004 grad_clip=0.000533 optimizer_step=0.000482 post_step_cleanup=0.012795 teacher_seq_len=19 teacher_vocab=248320
train_step step=104 prompt_index=7 loss=1.362360490020e-5 rollout_len=17 step_seconds=5.305163
phase_summary step=104 total=5.305163 student_rollout=2.506099 infer_forward_token_logits=0.005501 infer_sync=0.358350 d2d_bridge_import=0.000553 teacher_forward_total=0.364410 student_forward=0.289270 kl_loss=0.008435 optimizer_zero_grad=0.000024 backward=2.122863 grad_clip=0.000538 optimizer_step=0.000479 post_step_cleanup=0.012958 teacher_seq_len=17 teacher_vocab=248320
train_step step=105 prompt_index=8 loss=1.248413900612e-5 rollout_len=21 step_seconds=6.412213
phase_summary step=105 total=6.412213 student_rollout=3.184067 infer_forward_token_logits=0.005235 infer_sync=0.432998 d2d_bridge_import=0.000081 teacher_forward_total=0.438319 student_forward=0.354279 kl_loss=0.009996 optimizer_zero_grad=0.000026 backward=2.411244 grad_clip=0.000537 optimizer_step=0.000834 post_step_cleanup=0.012827 teacher_seq_len=21 teacher_vocab=248320
train_step step=106 prompt_index=9 loss=1.843714380811e-5 rollout_len=18 step_seconds=5.695389
phase_summary step=106 total=5.695389 student_rollout=2.689920 infer_forward_token_logits=0.005522 infer_sync=0.440317 d2d_bridge_import=0.000554 teacher_forward_total=0.446399 student_forward=0.305018 kl_loss=0.008906 optimizer_zero_grad=0.000024 backward=2.230721 grad_clip=0.000549 optimizer_step=0.000837 post_step_cleanup=0.012927 teacher_seq_len=18 teacher_vocab=248320
train_step step=107 prompt_index=10 loss=1.811892980186e-5 rollout_len=21 step_seconds=6.454440
phase_summary step=107 total=6.454439 student_rollout=3.214250 infer_forward_token_logits=0.003352 infer_sync=0.491227 d2d_bridge_import=0.000601 teacher_forward_total=0.495183 student_forward=0.353032 kl_loss=0.009964 optimizer_zero_grad=0.000022 backward=2.368039 grad_clip=0.000542 optimizer_step=0.000495 post_step_cleanup=0.012834 teacher_seq_len=21 teacher_vocab=248320
train_step step=108 prompt_index=11 loss=1.622649506317e-5 rollout_len=19 step_seconds=5.219611
phase_summary step=108 total=5.219610 student_rollout=2.250970 infer_forward_token_logits=0.005276 infer_sync=0.455785 d2d_bridge_import=0.000592 teacher_forward_total=0.461656 student_forward=0.321751 kl_loss=0.009237 optimizer_zero_grad=0.000023 backward=2.162056 grad_clip=0.000530 optimizer_step=0.000483 post_step_cleanup=0.012820 teacher_seq_len=19 teacher_vocab=248320
train_step step=109 prompt_index=12 loss=1.933028579515e-5 rollout_len=22 step_seconds=5.972303
phase_summary step=109 total=5.972302 student_rollout=2.600233 infer_forward_token_logits=0.005335 infer_sync=0.508982 d2d_bridge_import=0.000596 teacher_forward_total=0.514917 student_forward=0.364073 kl_loss=0.010370 optimizer_zero_grad=0.000024 backward=2.468456 grad_clip=0.000525 optimizer_step=0.000840 post_step_cleanup=0.012800 teacher_seq_len=22 teacher_vocab=248320
train_step step=110 prompt_index=13 loss=1.578080627951e-5 rollout_len=18 step_seconds=5.942115
phase_summary step=110 total=5.942114 student_rollout=2.928818 infer_forward_token_logits=0.005342 infer_sync=0.481172 d2d_bridge_import=0.000550 teacher_forward_total=0.487068 student_forward=0.298249 kl_loss=0.008776 optimizer_zero_grad=0.000022 backward=2.205096 grad_clip=0.000529 optimizer_step=0.000564 post_step_cleanup=0.012904 teacher_seq_len=18 teacher_vocab=248320
train_step step=111 prompt_index=14 loss=1.223909384862e-5 rollout_len=20 step_seconds=5.518148
phase_summary step=111 total=5.518147 student_rollout=2.362499 infer_forward_token_logits=0.005374 infer_sync=0.503010 d2d_bridge_import=0.000553 teacher_forward_total=0.508941 student_forward=0.332788 kl_loss=0.009554 optimizer_zero_grad=0.000023 backward=2.290262 grad_clip=0.000535 optimizer_step=0.000473 post_step_cleanup=0.012987 teacher_seq_len=20 teacher_vocab=248320
train_step step=112 prompt_index=15 loss=1.327814607066e-5 rollout_len=21 step_seconds=5.943853
phase_summary step=112 total=5.943852 student_rollout=2.565471 infer_forward_token_logits=0.005398 infer_sync=0.592848 d2d_bridge_import=0.000079 teacher_forward_total=0.598330 student_forward=0.349327 kl_loss=0.009904 optimizer_zero_grad=0.000022 backward=2.406543 grad_clip=0.000532 optimizer_step=0.000863 post_step_cleanup=0.012803 teacher_seq_len=21 teacher_vocab=248320
train_step step=113 prompt_index=0 loss=1.287848954235e-5 rollout_len=21 step_seconds=6.603157
phase_summary step=113 total=6.603156 student_rollout=3.435833 infer_forward_token_logits=0.005217 infer_sync=0.410646 d2d_bridge_import=0.000072 teacher_forward_total=0.415941 student_forward=0.345922 kl_loss=0.009902 optimizer_zero_grad=0.000025 backward=2.381275 grad_clip=0.000537 optimizer_step=0.000855 post_step_cleanup=0.012789 teacher_seq_len=21 teacher_vocab=248320
train_step step=114 prompt_index=1 loss=1.434124169464e-5 rollout_len=20 step_seconds=6.660720
phase_summary step=114 total=6.660719 student_rollout=3.278646 infer_forward_token_logits=0.005346 infer_sync=0.542083 d2d_bridge_import=0.000600 teacher_forward_total=0.548034 student_forward=0.330945 kl_loss=0.009497 optimizer_zero_grad=0.000025 backward=2.479370 grad_clip=0.000528 optimizer_step=0.000827 post_step_cleanup=0.012781 teacher_seq_len=20 teacher_vocab=248320
train_step step=115 prompt_index=2 loss=1.722829438222e-5 rollout_len=19 step_seconds=6.072894
phase_summary step=115 total=6.072894 student_rollout=3.000052 infer_forward_token_logits=0.004998 infer_sync=0.540911 d2d_bridge_import=0.000072 teacher_forward_total=0.545986 student_forward=0.311858 kl_loss=0.009274 optimizer_zero_grad=0.000031 backward=2.191886 grad_clip=0.000526 optimizer_step=0.000482 post_step_cleanup=0.012719 teacher_seq_len=19 teacher_vocab=248320
train_step step=116 prompt_index=3 loss=1.077823071682e-5 rollout_len=15 step_seconds=4.235010
phase_summary step=116 total=4.235009 student_rollout=1.635826 infer_forward_token_logits=0.005194 infer_sync=0.405857 d2d_bridge_import=0.000637 teacher_forward_total=0.411693 student_forward=0.252093 kl_loss=0.007615 optimizer_zero_grad=0.000025 backward=1.913902 grad_clip=0.000517 optimizer_step=0.000482 post_step_cleanup=0.012789 teacher_seq_len=15 teacher_vocab=248320
train_step step=117 prompt_index=4 loss=1.494636853749e-5 rollout_len=21 step_seconds=5.681144
phase_summary step=117 total=5.681143 student_rollout=2.444264 infer_forward_token_logits=0.005321 infer_sync=0.484857 d2d_bridge_import=0.000581 teacher_forward_total=0.490762 student_forward=0.347847 kl_loss=0.009928 optimizer_zero_grad=0.000024 backward=2.374208 grad_clip=0.000527 optimizer_step=0.000855 post_step_cleanup=0.012636 teacher_seq_len=21 teacher_vocab=248320
train_step step=118 prompt_index=5 loss=1.315017470915e-5 rollout_len=20 step_seconds=6.379271
phase_summary step=118 total=6.379270 student_rollout=3.197881 infer_forward_token_logits=0.005397 infer_sync=0.574888 d2d_bridge_import=0.000553 teacher_forward_total=0.580842 student_forward=0.333272 kl_loss=0.009496 optimizer_zero_grad=0.000025 backward=2.243836 grad_clip=0.000534 optimizer_step=0.000487 post_step_cleanup=0.012833 teacher_seq_len=20 teacher_vocab=248320
train_step step=119 prompt_index=6 loss=1.248780336027e-5 rollout_len=19 step_seconds=5.185057
phase_summary step=119 total=5.185057 student_rollout=2.201512 infer_forward_token_logits=0.005305 infer_sync=0.455067 d2d_bridge_import=0.000641 teacher_forward_total=0.461019 student_forward=0.317504 kl_loss=0.009143 optimizer_zero_grad=0.000023 backward=2.182083 grad_clip=0.000534 optimizer_step=0.000478 post_step_cleanup=0.012672 teacher_seq_len=19 teacher_vocab=248320
train_step step=120 prompt_index=7 loss=1.354218238703e-5 rollout_len=17 step_seconds=4.565115
phase_summary step=120 total=4.565114 student_rollout=1.918908 infer_forward_token_logits=0.005272 infer_sync=0.352810 d2d_bridge_import=0.000041 teacher_forward_total=0.358126 student_forward=0.285832 kl_loss=0.008439 optimizer_zero_grad=0.000023 backward=1.979967 grad_clip=0.000536 optimizer_step=0.000492 post_step_cleanup=0.012725 teacher_seq_len=17 teacher_vocab=248320
train_step step=121 prompt_index=8 loss=1.247393902304e-5 rollout_len=21 step_seconds=5.728152
phase_summary step=121 total=5.728152 student_rollout=2.448993 infer_forward_token_logits=0.005610 infer_sync=0.483103 d2d_bridge_import=0.000553 teacher_forward_total=0.489270 student_forward=0.349504 kl_loss=0.009899 optimizer_zero_grad=0.000025 backward=2.416113 grad_clip=0.000539 optimizer_step=0.000832 post_step_cleanup=0.012878 teacher_seq_len=21 teacher_vocab=248320
train_step step=122 prompt_index=9 loss=1.835801413108e-5 rollout_len=18 step_seconds=5.665791
phase_summary step=122 total=5.665789 student_rollout=2.824499 infer_forward_token_logits=0.005431 infer_sync=0.445534 d2d_bridge_import=0.000069 teacher_forward_total=0.451037 student_forward=0.298306 kl_loss=0.008864 optimizer_zero_grad=0.000024 backward=2.069090 grad_clip=0.000539 optimizer_step=0.000472 post_step_cleanup=0.012872 teacher_seq_len=18 teacher_vocab=248320
train_step step=123 prompt_index=10 loss=1.803385021049e-5 rollout_len=21 step_seconds=5.808964
phase_summary step=123 total=5.808963 student_rollout=2.486491 infer_forward_token_logits=0.005401 infer_sync=0.534907 d2d_bridge_import=0.000055 teacher_forward_total=0.540368 student_forward=0.347775 kl_loss=0.009938 optimizer_zero_grad=0.000027 backward=2.410509 grad_clip=0.000530 optimizer_step=0.000496 post_step_cleanup=0.012738 teacher_seq_len=21 teacher_vocab=248320
train_step step=124 prompt_index=11 loss=1.826497646107e-5 rollout_len=19 step_seconds=5.191873
phase_summary step=124 total=5.191872 student_rollout=2.221460 infer_forward_token_logits=0.005328 infer_sync=0.460056 d2d_bridge_import=0.000590 teacher_forward_total=0.465980 student_forward=0.316124 kl_loss=0.009207 optimizer_zero_grad=0.000027 backward=2.165257 grad_clip=0.000541 optimizer_step=0.000473 post_step_cleanup=0.012720 teacher_seq_len=19 teacher_vocab=248320
train_step step=125 prompt_index=12 loss=1.925513242895e-5 rollout_len=22 step_seconds=6.012150
phase_summary step=125 total=6.012149 student_rollout=2.613421 infer_forward_token_logits=0.005362 infer_sync=0.512667 d2d_bridge_import=0.000589 teacher_forward_total=0.518623 student_forward=0.368219 kl_loss=0.010265 optimizer_zero_grad=0.000026 backward=2.487205 grad_clip=0.000539 optimizer_step=0.000863 post_step_cleanup=0.012897 teacher_seq_len=22 teacher_vocab=248320
train_step step=126 prompt_index=13 loss=1.571629763930e-5 rollout_len=18 step_seconds=5.754992
phase_summary step=126 total=5.754991 student_rollout=2.912830 infer_forward_token_logits=0.005319 infer_sync=0.477003 d2d_bridge_import=0.000589 teacher_forward_total=0.482915 student_forward=0.300990 kl_loss=0.008840 optimizer_zero_grad=0.000027 backward=2.035485 grad_clip=0.000528 optimizer_step=0.000488 post_step_cleanup=0.012802 teacher_seq_len=18 teacher_vocab=248320
train_step step=127 prompt_index=14 loss=1.219253408635e-5 rollout_len=20 step_seconds=5.520655
phase_summary step=127 total=5.520653 student_rollout=2.319360 infer_forward_token_logits=0.005276 infer_sync=0.443615 d2d_bridge_import=0.000549 teacher_forward_total=0.449444 student_forward=0.332171 kl_loss=0.009533 optimizer_zero_grad=0.000026 backward=2.393820 grad_clip=0.000550 optimizer_step=0.000858 post_step_cleanup=0.014801 teacher_seq_len=20 teacher_vocab=248320
train_step step=128 prompt_index=15 loss=1.326018264081e-5 rollout_len=21 step_seconds=6.817185
phase_summary step=128 total=6.817185 student_rollout=3.377080 infer_forward_token_logits=0.005426 infer_sync=0.548341 d2d_bridge_import=0.000079 teacher_forward_total=0.553850 student_forward=0.347924 kl_loss=0.009946 optimizer_zero_grad=0.000026 backward=2.514185 grad_clip=0.000541 optimizer_step=0.000819 post_step_cleanup=0.012688 teacher_seq_len=21 teacher_vocab=248320
train_step step=129 prompt_index=0 loss=1.282615448872e-5 rollout_len=21 step_seconds=6.687842
phase_summary step=129 total=6.687841 student_rollout=3.439850 infer_forward_token_logits=0.005354 infer_sync=0.504127 d2d_bridge_import=0.000045 teacher_forward_total=0.509531 student_forward=0.346921 kl_loss=0.009875 optimizer_zero_grad=0.000028 backward=2.367740 grad_clip=0.000531 optimizer_step=0.000492 post_step_cleanup=0.012790 teacher_seq_len=21 teacher_vocab=248320
train_step step=130 prompt_index=1 loss=1.426934068149e-5 rollout_len=20 step_seconds=5.404075
phase_summary step=130 total=5.404074 student_rollout=2.357128 infer_forward_token_logits=0.005387 infer_sync=0.402407 d2d_bridge_import=0.000596 teacher_forward_total=0.408394 student_forward=0.333951 kl_loss=0.009523 optimizer_zero_grad=0.000025 backward=2.281146 grad_clip=0.000533 optimizer_step=0.000470 post_step_cleanup=0.012820 teacher_seq_len=20 teacher_vocab=248320
train_step step=131 prompt_index=2 loss=1.718623752822e-5 rollout_len=19 step_seconds=5.161590
phase_summary step=131 total=5.161589 student_rollout=2.177834 infer_forward_token_logits=0.005228 infer_sync=0.470158 d2d_bridge_import=0.000066 teacher_forward_total=0.475456 student_forward=0.317983 kl_loss=0.009140 optimizer_zero_grad=0.000026 backward=2.166514 grad_clip=0.000537 optimizer_step=0.000494 post_step_cleanup=0.013522 teacher_seq_len=19 teacher_vocab=248320
train_step step=132 prompt_index=3 loss=1.076265471056e-5 rollout_len=15 step_seconds=4.230414
phase_summary step=132 total=4.230414 student_rollout=1.654777 infer_forward_token_logits=0.005192 infer_sync=0.387820 d2d_bridge_import=0.000059 teacher_forward_total=0.393076 student_forward=0.254055 kl_loss=0.007628 optimizer_zero_grad=0.000025 backward=1.906862 grad_clip=0.000540 optimizer_step=0.000483 post_step_cleanup=0.012889 teacher_seq_len=15 teacher_vocab=248320
train_step step=133 prompt_index=4 loss=1.490025624662e-5 rollout_len=21 step_seconds=6.413916
phase_summary step=133 total=6.413915 student_rollout=2.980580 infer_forward_token_logits=0.005355 infer_sync=0.514048 d2d_bridge_import=0.000596 teacher_forward_total=0.520003 student_forward=0.349519 kl_loss=0.009888 optimizer_zero_grad=0.000023 backward=2.540023 grad_clip=0.000538 optimizer_step=0.000494 post_step_cleanup=0.012761 teacher_seq_len=21 teacher_vocab=248320
train_step step=134 prompt_index=5 loss=1.428500127076e-5 rollout_len=20 step_seconds=5.679401
phase_summary step=134 total=5.679400 student_rollout=2.376962 infer_forward_token_logits=0.005386 infer_sync=0.498002 d2d_bridge_import=0.000591 teacher_forward_total=0.503983 student_forward=0.331564 kl_loss=0.009546 optimizer_zero_grad=0.000022 backward=2.443162 grad_clip=0.000526 optimizer_step=0.000853 post_step_cleanup=0.012691 teacher_seq_len=20 teacher_vocab=248320
train_step step=135 prompt_index=6 loss=1.243859514943e-5 rollout_len=19 step_seconds=6.194080
phase_summary step=135 total=6.194079 student_rollout=3.136005 infer_forward_token_logits=0.005347 infer_sync=0.390128 d2d_bridge_import=0.000573 teacher_forward_total=0.396052 student_forward=0.316322 kl_loss=0.009137 optimizer_zero_grad=0.000026 backward=2.322673 grad_clip=0.000527 optimizer_step=0.000488 post_step_cleanup=0.012764 teacher_seq_len=19 teacher_vocab=248320
train_step step=136 prompt_index=7 loss=1.345554210275e-5 rollout_len=17 step_seconds=4.654348
phase_summary step=136 total=4.654348 student_rollout=1.934490 infer_forward_token_logits=0.005576 infer_sync=0.422708 d2d_bridge_import=0.000592 teacher_forward_total=0.428880 student_forward=0.286387 kl_loss=0.008335 optimizer_zero_grad=0.000025 backward=1.982418 grad_clip=0.000533 optimizer_step=0.000473 post_step_cleanup=0.012721 teacher_seq_len=17 teacher_vocab=248320
train_step step=137 prompt_index=8 loss=1.246301962965e-5 rollout_len=21 step_seconds=6.395886
phase_summary step=137 total=6.395885 student_rollout=3.058988 infer_forward_token_logits=0.005241 infer_sync=0.444725 d2d_bridge_import=0.000064 teacher_forward_total=0.450035 student_forward=0.349464 kl_loss=0.009950 optimizer_zero_grad=0.000023 backward=2.513586 grad_clip=0.000526 optimizer_step=0.000498 post_step_cleanup=0.012750 teacher_seq_len=21 teacher_vocab=248320
train_step step=138 prompt_index=9 loss=1.827355845307e-5 rollout_len=18 step_seconds=5.086780
phase_summary step=138 total=5.086780 student_rollout=2.100308 infer_forward_token_logits=0.005349 infer_sync=0.442857 d2d_bridge_import=0.000587 teacher_forward_total=0.448800 student_forward=0.299025 kl_loss=0.008794 optimizer_zero_grad=0.000025 backward=2.215871 grad_clip=0.000530 optimizer_step=0.000476 post_step_cleanup=0.012864 teacher_seq_len=18 teacher_vocab=248320
train_step step=139 prompt_index=10 loss=1.794311174308e-5 rollout_len=21 step_seconds=5.693663
phase_summary step=139 total=5.693662 student_rollout=2.455463 infer_forward_token_logits=0.003192 infer_sync=0.463397 d2d_bridge_import=0.000582 teacher_forward_total=0.467176 student_forward=0.349407 kl_loss=0.009911 optimizer_zero_grad=0.000024 backward=2.397649 grad_clip=0.000556 optimizer_step=0.000844 post_step_cleanup=0.012539 teacher_seq_len=21 teacher_vocab=248320
train_step step=140 prompt_index=11 loss=1.814787174226e-5 rollout_len=19 step_seconds=6.134904
phase_summary step=140 total=6.134904 student_rollout=3.058746 infer_forward_token_logits=0.005335 infer_sync=0.434957 d2d_bridge_import=0.000553 teacher_forward_total=0.440849 student_forward=0.315881 kl_loss=0.009154 optimizer_zero_grad=0.000023 backward=2.296429 grad_clip=0.000529 optimizer_step=0.000484 post_step_cleanup=0.012722 teacher_seq_len=19 teacher_vocab=248320
train_step step=141 prompt_index=12 loss=1.917378904182e-5 rollout_len=22 step_seconds=6.254365
phase_summary step=141 total=6.254364 student_rollout=2.721383 infer_forward_token_logits=0.005383 infer_sync=0.452003 d2d_bridge_import=0.000573 teacher_forward_total=0.457963 student_forward=0.369966 kl_loss=0.010335 optimizer_zero_grad=0.000027 backward=2.680326 grad_clip=0.000544 optimizer_step=0.000856 post_step_cleanup=0.012895 teacher_seq_len=22 teacher_vocab=248320
train_step step=142 prompt_index=13 loss=1.564816375321e-5 rollout_len=18 step_seconds=5.909108
phase_summary step=142 total=5.909107 student_rollout=3.085608 infer_forward_token_logits=0.005421 infer_sync=0.400397 d2d_bridge_import=0.000597 teacher_forward_total=0.406420 student_forward=0.303440 kl_loss=0.008835 optimizer_zero_grad=0.000023 backward=2.090474 grad_clip=0.000526 optimizer_step=0.000479 post_step_cleanup=0.013210 teacher_seq_len=18 teacher_vocab=248320
train_step step=143 prompt_index=14 loss=1.214370240632e-5 rollout_len=20 step_seconds=6.065810
phase_summary step=143 total=6.065810 student_rollout=2.955616 infer_forward_token_logits=0.005378 infer_sync=0.448668 d2d_bridge_import=0.000550 teacher_forward_total=0.454602 student_forward=0.336717 kl_loss=0.009498 optimizer_zero_grad=0.000022 backward=2.295237 grad_clip=0.000543 optimizer_step=0.000477 post_step_cleanup=0.013003 teacher_seq_len=20 teacher_vocab=248320
train_step step=144 prompt_index=15 loss=1.324137701886e-5 rollout_len=21 step_seconds=5.836776
phase_summary step=144 total=5.836775 student_rollout=2.498076 infer_forward_token_logits=0.005371 infer_sync=0.505001 d2d_bridge_import=0.000094 teacher_forward_total=0.510472 student_forward=0.350546 kl_loss=0.009929 optimizer_zero_grad=0.000026 backward=2.453488 grad_clip=0.000547 optimizer_step=0.000827 post_step_cleanup=0.012780 teacher_seq_len=21 teacher_vocab=248320
train_step step=145 prompt_index=0 loss=1.277027058677e-5 rollout_len=21 step_seconds=6.707345
phase_summary step=145 total=6.707343 student_rollout=3.503562 infer_forward_token_logits=0.005499 infer_sync=0.410701 d2d_bridge_import=0.000067 teacher_forward_total=0.416271 student_forward=0.352590 kl_loss=0.009939 optimizer_zero_grad=0.000024 backward=2.411028 grad_clip=0.000526 optimizer_step=0.000495 post_step_cleanup=0.012818 teacher_seq_len=21 teacher_vocab=248320
train_step step=146 prompt_index=1 loss=1.419307955075e-5 rollout_len=20 step_seconds=5.497745
phase_summary step=146 total=5.497744 student_rollout=2.386735 infer_forward_token_logits=0.005390 infer_sync=0.478431 d2d_bridge_import=0.000610 teacher_forward_total=0.484435 student_forward=0.335338 kl_loss=0.009551 optimizer_zero_grad=0.000027 backward=2.267358 grad_clip=0.000535 optimizer_step=0.000856 post_step_cleanup=0.012810 teacher_seq_len=20 teacher_vocab=248320
train_step step=147 prompt_index=2 loss=1.714159770927e-5 rollout_len=19 step_seconds=5.855183
phase_summary step=147 total=5.855182 student_rollout=2.857288 infer_forward_token_logits=0.005288 infer_sync=0.419927 d2d_bridge_import=0.000078 teacher_forward_total=0.425297 student_forward=0.319947 kl_loss=0.009306 optimizer_zero_grad=0.000031 backward=2.228524 grad_clip=0.000554 optimizer_step=0.000474 post_step_cleanup=0.013673 teacher_seq_len=19 teacher_vocab=248320
train_step step=148 prompt_index=3 loss=1.074643114407e-5 rollout_len=15 step_seconds=4.077828
phase_summary step=148 total=4.077827 student_rollout=1.671212 infer_forward_token_logits=0.005372 infer_sync=0.363505 d2d_bridge_import=0.000641 teacher_forward_total=0.369522 student_forward=0.255621 kl_loss=0.007689 optimizer_zero_grad=0.000022 backward=1.759375 grad_clip=0.000541 optimizer_step=0.000475 post_step_cleanup=0.013301 teacher_seq_len=15 teacher_vocab=248320
train_step step=149 prompt_index=4 loss=1.485178381699e-5 rollout_len=21 step_seconds=5.726980
phase_summary step=149 total=5.726979 student_rollout=2.468543 infer_forward_token_logits=0.005314 infer_sync=0.414005 d2d_bridge_import=0.000552 teacher_forward_total=0.419876 student_forward=0.353425 kl_loss=0.010058 optimizer_zero_grad=0.000026 backward=2.461091 grad_clip=0.000535 optimizer_step=0.000483 post_step_cleanup=0.012828 teacher_seq_len=21 teacher_vocab=248320
train_step step=150 prompt_index=5 loss=1.424562833563e-5 rollout_len=20 step_seconds=5.533193
phase_summary step=150 total=5.533192 student_rollout=2.363710 infer_forward_token_logits=0.005303 infer_sync=0.497057 d2d_bridge_import=0.000592 teacher_forward_total=0.502957 student_forward=0.339447 kl_loss=0.009628 optimizer_zero_grad=0.000024 backward=2.303144 grad_clip=0.000525 optimizer_step=0.000868 post_step_cleanup=0.012803 teacher_seq_len=20 teacher_vocab=248320
train_step step=151 prompt_index=6 loss=1.238659297087e-5 rollout_len=19 step_seconds=5.992158
phase_summary step=151 total=5.992157 student_rollout=2.925743 infer_forward_token_logits=0.005535 infer_sync=0.544787 d2d_bridge_import=0.000605 teacher_forward_total=0.550933 student_forward=0.320159 kl_loss=0.009162 optimizer_zero_grad=0.000026 backward=2.172068 grad_clip=0.000533 optimizer_step=0.000476 post_step_cleanup=0.012968 teacher_seq_len=19 teacher_vocab=248320
train_step step=152 prompt_index=7 loss=1.336509285466e-5 rollout_len=17 step_seconds=4.765776
phase_summary step=152 total=4.765775 student_rollout=1.928967 infer_forward_token_logits=0.005251 infer_sync=0.407020 d2d_bridge_import=0.000077 teacher_forward_total=0.412353 student_forward=0.282206 kl_loss=0.008371 optimizer_zero_grad=0.000025 backward=2.120003 grad_clip=0.000525 optimizer_step=0.000478 post_step_cleanup=0.012779 teacher_seq_len=17 teacher_vocab=248320
train_step step=153 prompt_index=8 loss=1.245169278263e-5 rollout_len=21 step_seconds=5.670581
phase_summary step=153 total=5.670580 student_rollout=2.459267 infer_forward_token_logits=0.005313 infer_sync=0.409496 d2d_bridge_import=0.000549 teacher_forward_total=0.415365 student_forward=0.349011 kl_loss=0.009918 optimizer_zero_grad=0.000028 backward=2.423197 grad_clip=0.000533 optimizer_step=0.000480 post_step_cleanup=0.012689 teacher_seq_len=21 teacher_vocab=248320
train_step step=154 prompt_index=9 loss=1.818827513489e-5 rollout_len=18 step_seconds=4.876922
phase_summary step=154 total=4.876921 student_rollout=2.047339 infer_forward_token_logits=0.003219 infer_sync=0.434966 d2d_bridge_import=0.000039 teacher_forward_total=0.438226 student_forward=0.303431 kl_loss=0.008800 optimizer_zero_grad=0.000025 backward=2.065112 grad_clip=0.000538 optimizer_step=0.000480 post_step_cleanup=0.012886 teacher_seq_len=18 teacher_vocab=248320
train_step step=155 prompt_index=10 loss=1.784499727364e-5 rollout_len=21 step_seconds=6.451712
phase_summary step=155 total=6.451711 student_rollout=3.135261 infer_forward_token_logits=0.005347 infer_sync=0.454603 d2d_bridge_import=0.000077 teacher_forward_total=0.460033 student_forward=0.347842 kl_loss=0.009893 optimizer_zero_grad=0.000024 backward=2.483892 grad_clip=0.000540 optimizer_step=0.000854 post_step_cleanup=0.013302 teacher_seq_len=21 teacher_vocab=248320
train_step step=156 prompt_index=11 loss=1.802204315027e-5 rollout_len=19 step_seconds=6.359529
phase_summary step=156 total=6.359528 student_rollout=3.166845 infer_forward_token_logits=0.005323 infer_sync=0.551178 d2d_bridge_import=0.000550 teacher_forward_total=0.557055 student_forward=0.323576 kl_loss=0.009368 optimizer_zero_grad=0.000028 backward=2.287764 grad_clip=0.000544 optimizer_step=0.000835 post_step_cleanup=0.013412 teacher_seq_len=19 teacher_vocab=248320
train_step step=157 prompt_index=12 loss=1.908763806568e-5 rollout_len=22 step_seconds=7.245890
phase_summary step=157 total=7.245889 student_rollout=3.583700 infer_forward_token_logits=0.005354 infer_sync=0.611673 d2d_bridge_import=0.000601 teacher_forward_total=0.617633 student_forward=0.366397 kl_loss=0.010466 optimizer_zero_grad=0.000027 backward=2.653188 grad_clip=0.000605 optimizer_step=0.000549 post_step_cleanup=0.013233 teacher_seq_len=22 teacher_vocab=248320
train_step step=158 prompt_index=13 loss=1.557576251798e-5 rollout_len=18 step_seconds=5.062599
phase_summary step=158 total=5.062598 student_rollout=2.144138 infer_forward_token_logits=0.005258 infer_sync=0.443017 d2d_bridge_import=0.000639 teacher_forward_total=0.448918 student_forward=0.306323 kl_loss=0.009134 optimizer_zero_grad=0.000024 backward=2.139754 grad_clip=0.000537 optimizer_step=0.000480 post_step_cleanup=0.013171 teacher_seq_len=18 teacher_vocab=248320
train_step step=159 prompt_index=14 loss=1.209207766806e-5 rollout_len=20 step_seconds=5.428558
phase_summary step=159 total=5.428558 student_rollout=2.369204 infer_forward_token_logits=0.005273 infer_sync=0.404551 d2d_bridge_import=0.000549 teacher_forward_total=0.410378 student_forward=0.336792 kl_loss=0.009690 optimizer_zero_grad=0.000028 backward=2.288525 grad_clip=0.000522 optimizer_step=0.000474 post_step_cleanup=0.012846 teacher_seq_len=20 teacher_vocab=248320
train_step step=160 prompt_index=15 loss=1.322177195107e-5 rollout_len=21 step_seconds=5.952197
phase_summary step=160 total=5.952196 student_rollout=2.440661 infer_forward_token_logits=0.005393 infer_sync=0.483300 d2d_bridge_import=0.000082 teacher_forward_total=0.488780 student_forward=0.349134 kl_loss=0.009920 optimizer_zero_grad=0.000026 backward=2.649461 grad_clip=0.000532 optimizer_step=0.000843 post_step_cleanup=0.012750 teacher_seq_len=21 teacher_vocab=248320
train_step step=161 prompt_index=0 loss=1.271078872378e-5 rollout_len=21 step_seconds=6.802572
phase_summary step=161 total=6.802571 student_rollout=3.474573 infer_forward_token_logits=0.005408 infer_sync=0.408891 d2d_bridge_import=0.000078 teacher_forward_total=0.414383 student_forward=0.346551 kl_loss=0.010265 optimizer_zero_grad=0.000029 backward=2.542838 grad_clip=0.000531 optimizer_step=0.000501 post_step_cleanup=0.012812 teacher_seq_len=21 teacher_vocab=248320
train_step step=162 prompt_index=1 loss=1.411432003806e-5 rollout_len=20 step_seconds=5.578817
phase_summary step=162 total=5.578816 student_rollout=2.374833 infer_forward_token_logits=0.005357 infer_sync=0.414664 d2d_bridge_import=0.000600 teacher_forward_total=0.420626 student_forward=0.330138 kl_loss=0.009484 optimizer_zero_grad=0.000028 backward=2.429869 grad_clip=0.000519 optimizer_step=0.000473 post_step_cleanup=0.012754 teacher_seq_len=20 teacher_vocab=248320
train_step step=163 prompt_index=2 loss=1.709486605250e-5 rollout_len=19 step_seconds=5.190669
phase_summary step=163 total=5.190668 student_rollout=2.188549 infer_forward_token_logits=0.005248 infer_sync=0.392861 d2d_bridge_import=0.000084 teacher_forward_total=0.398199 student_forward=0.317797 kl_loss=0.009165 optimizer_zero_grad=0.000025 backward=2.262901 grad_clip=0.000528 optimizer_step=0.000482 post_step_cleanup=0.012958 teacher_seq_len=19 teacher_vocab=248320
train_step step=164 prompt_index=3 loss=1.072937084245e-5 rollout_len=15 step_seconds=4.246243
phase_summary step=164 total=4.246242 student_rollout=1.642202 infer_forward_token_logits=0.005117 infer_sync=0.404518 d2d_bridge_import=0.000081 teacher_forward_total=0.409722 student_forward=0.253139 kl_loss=0.007623 optimizer_zero_grad=0.000026 backward=1.919780 grad_clip=0.000516 optimizer_step=0.000473 post_step_cleanup=0.012695 teacher_seq_len=15 teacher_vocab=248320
train_step step=165 prompt_index=4 loss=1.480092942074e-5 rollout_len=21 step_seconds=5.882428
phase_summary step=165 total=5.882428 student_rollout=2.434808 infer_forward_token_logits=0.005372 infer_sync=0.487172 d2d_bridge_import=0.000640 teacher_forward_total=0.493190 student_forward=0.345370 kl_loss=0.009920 optimizer_zero_grad=0.000023 backward=2.584905 grad_clip=0.000554 optimizer_step=0.000863 post_step_cleanup=0.012703 teacher_seq_len=21 teacher_vocab=248320
train_step step=166 prompt_index=5 loss=1.420409262209e-5 rollout_len=20 step_seconds=6.430045
phase_summary step=166 total=6.430044 student_rollout=3.346287 infer_forward_token_logits=0.005711 infer_sync=0.412733 d2d_bridge_import=0.000547 teacher_forward_total=0.418996 student_forward=0.330946 kl_loss=0.009530 optimizer_zero_grad=0.000022 backward=2.310339 grad_clip=0.000538 optimizer_step=0.000491 post_step_cleanup=0.012826 teacher_seq_len=20 teacher_vocab=248320
train_step step=167 prompt_index=6 loss=1.233187686012e-5 rollout_len=19 step_seconds=5.187137
phase_summary step=167 total=5.187136 student_rollout=2.226488 infer_forward_token_logits=0.005252 infer_sync=0.403872 d2d_bridge_import=0.000604 teacher_forward_total=0.409732 student_forward=0.318173 kl_loss=0.009171 optimizer_zero_grad=0.000026 backward=2.209499 grad_clip=0.000537 optimizer_step=0.000490 post_step_cleanup=0.012949 teacher_seq_len=19 teacher_vocab=248320
train_step step=168 prompt_index=7 loss=1.327000245510e-5 rollout_len=17 step_seconds=4.644458
phase_summary step=168 total=4.644458 student_rollout=1.926673 infer_forward_token_logits=0.005419 infer_sync=0.420818 d2d_bridge_import=0.000594 teacher_forward_total=0.426837 student_forward=0.286174 kl_loss=0.008369 optimizer_zero_grad=0.000024 backward=1.982371 grad_clip=0.000545 optimizer_step=0.000473 post_step_cleanup=0.012889 teacher_seq_len=17 teacher_vocab=248320
train_step step=169 prompt_index=8 loss=1.244016857527e-5 rollout_len=21 step_seconds=5.928885
phase_summary step=169 total=5.928884 student_rollout=2.465854 infer_forward_token_logits=0.005316 infer_sync=0.494352 d2d_bridge_import=0.000049 teacher_forward_total=0.499722 student_forward=0.354234 kl_loss=0.009972 optimizer_zero_grad=0.000027 backward=2.584771 grad_clip=0.000540 optimizer_step=0.000844 post_step_cleanup=0.012836 teacher_seq_len=21 teacher_vocab=248320
train_step step=170 prompt_index=9 loss=1.809997229429e-5 rollout_len=18 step_seconds=6.068571
phase_summary step=170 total=6.068570 student_rollout=3.061946 infer_forward_token_logits=0.005382 infer_sync=0.465409 d2d_bridge_import=0.000553 teacher_forward_total=0.471348 student_forward=0.303152 kl_loss=0.008760 optimizer_zero_grad=0.000023 backward=2.208839 grad_clip=0.000541 optimizer_step=0.000855 post_step_cleanup=0.013042 teacher_seq_len=18 teacher_vocab=248320
train_step step=171 prompt_index=10 loss=1.710455944703e-5 rollout_len=21 step_seconds=6.487523
phase_summary step=171 total=6.487523 student_rollout=3.180866 infer_forward_token_logits=0.005398 infer_sync=0.524010 d2d_bridge_import=0.000600 teacher_forward_total=0.530013 student_forward=0.348784 kl_loss=0.009926 optimizer_zero_grad=0.000027 backward=2.404079 grad_clip=0.000539 optimizer_step=0.000483 post_step_cleanup=0.012713 teacher_seq_len=21 teacher_vocab=248320
train_step step=172 prompt_index=11 loss=1.789097404981e-5 rollout_len=19 step_seconds=5.306639
phase_summary step=172 total=5.306638 student_rollout=2.209369 infer_forward_token_logits=0.005339 infer_sync=0.458918 d2d_bridge_import=0.000589 teacher_forward_total=0.464851 student_forward=0.317254 kl_loss=0.009196 optimizer_zero_grad=0.000021 backward=2.292132 grad_clip=0.000528 optimizer_step=0.000476 post_step_cleanup=0.012727 teacher_seq_len=19 teacher_vocab=248320
train_step step=173 prompt_index=12 loss=1.899753988255e-5 rollout_len=22 step_seconds=5.928857
phase_summary step=173 total=5.928857 student_rollout=2.605290 infer_forward_token_logits=0.005317 infer_sync=0.444946 d2d_bridge_import=0.000592 teacher_forward_total=0.450859 student_forward=0.363453 kl_loss=0.010288 optimizer_zero_grad=0.000024 backward=2.484740 grad_clip=0.000524 optimizer_step=0.000837 post_step_cleanup=0.012749 teacher_seq_len=22 teacher_vocab=248320
train_step step=174 prompt_index=13 loss=1.549839180370e-5 rollout_len=18 step_seconds=5.729047
phase_summary step=174 total=5.729046 student_rollout=2.960184 infer_forward_token_logits=0.005287 infer_sync=0.386835 d2d_bridge_import=0.000551 teacher_forward_total=0.392677 student_forward=0.299206 kl_loss=0.008810 optimizer_zero_grad=0.000022 backward=2.054368 grad_clip=0.000534 optimizer_step=0.000473 post_step_cleanup=0.012680 teacher_seq_len=18 teacher_vocab=248320
train_step step=175 prompt_index=14 loss=1.203757346957e-5 rollout_len=20 step_seconds=5.364988
phase_summary step=175 total=5.364987 student_rollout=2.302264 infer_forward_token_logits=0.005252 infer_sync=0.436269 d2d_bridge_import=0.000551 teacher_forward_total=0.442075 student_forward=0.330926 kl_loss=0.009552 optimizer_zero_grad=0.000026 backward=2.265789 grad_clip=0.000524 optimizer_step=0.000841 post_step_cleanup=0.012920 teacher_seq_len=20 teacher_vocab=248320
train_step step=176 prompt_index=15 loss=1.320100818702e-5 rollout_len=21 step_seconds=6.607092
phase_summary step=176 total=6.607092 student_rollout=3.313127 infer_forward_token_logits=0.008883 infer_sync=0.556344 d2d_bridge_import=0.000078 teacher_forward_total=0.565309 student_forward=0.346522 kl_loss=0.009918 optimizer_zero_grad=0.000025 backward=2.358412 grad_clip=0.000543 optimizer_step=0.000490 post_step_cleanup=0.012685 teacher_seq_len=21 teacher_vocab=248320
train_step step=177 prompt_index=0 loss=1.264785896637e-5 rollout_len=21 step_seconds=5.755076
phase_summary step=177 total=5.755075 student_rollout=2.483082 infer_forward_token_logits=0.005236 infer_sync=0.489108 d2d_bridge_import=0.000076 teacher_forward_total=0.494425 student_forward=0.346439 kl_loss=0.009929 optimizer_zero_grad=0.000026 backward=2.407444 grad_clip=0.000550 optimizer_step=0.000475 post_step_cleanup=0.012637 teacher_seq_len=21 teacher_vocab=248320
train_step step=178 prompt_index=1 loss=1.403151236445e-5 rollout_len=20 step_seconds=6.008300
phase_summary step=178 total=6.008299 student_rollout=2.906026 infer_forward_token_logits=0.005292 infer_sync=0.403892 d2d_bridge_import=0.000611 teacher_forward_total=0.409801 student_forward=0.337661 kl_loss=0.009534 optimizer_zero_grad=0.000026 backward=2.331351 grad_clip=0.000533 optimizer_step=0.000493 post_step_cleanup=0.012805 teacher_seq_len=20 teacher_vocab=248320
train_step step=179 prompt_index=2 loss=1.704478927422e-5 rollout_len=19 step_seconds=5.140511
phase_summary step=179 total=5.140510 student_rollout=2.203007 infer_forward_token_logits=0.005242 infer_sync=0.395430 d2d_bridge_import=0.000039 teacher_forward_total=0.400714 student_forward=0.315621 kl_loss=0.009143 optimizer_zero_grad=0.000023 backward=2.198095 grad_clip=0.000537 optimizer_step=0.000477 post_step_cleanup=0.012808 teacher_seq_len=19 teacher_vocab=248320
train_step step=180 prompt_index=3 loss=1.071173664968e-5 rollout_len=15 step_seconds=3.968841
phase_summary step=180 total=3.968840 student_rollout=1.617057 infer_forward_token_logits=0.005281 infer_sync=0.345970 d2d_bridge_import=0.000639 teacher_forward_total=0.351895 student_forward=0.252269 kl_loss=0.007625 optimizer_zero_grad=0.000023 backward=1.725269 grad_clip=0.000526 optimizer_step=0.000470 post_step_cleanup=0.013616 teacher_seq_len=15 teacher_vocab=248320
train_step step=181 prompt_index=4 loss=1.474684177083e-5 rollout_len=21 step_seconds=5.686755
phase_summary step=181 total=5.686754 student_rollout=2.412770 infer_forward_token_logits=0.005537 infer_sync=0.466605 d2d_bridge_import=0.000549 teacher_forward_total=0.472695 student_forward=0.349153 kl_loss=0.009903 optimizer_zero_grad=0.000025 backward=2.428126 grad_clip=0.000538 optimizer_step=0.000836 post_step_cleanup=0.012597 teacher_seq_len=21 teacher_vocab=248320
train_step step=182 prompt_index=5 loss=1.416056420567e-5 rollout_len=20 step_seconds=6.360093
phase_summary step=182 total=6.360093 student_rollout=3.249788 infer_forward_token_logits=0.005297 infer_sync=0.435856 d2d_bridge_import=0.000550 teacher_forward_total=0.441708 student_forward=0.328430 kl_loss=0.009571 optimizer_zero_grad=0.000024 backward=2.316475 grad_clip=0.000530 optimizer_step=0.000514 post_step_cleanup=0.012966 teacher_seq_len=20 teacher_vocab=248320
train_step step=183 prompt_index=6 loss=1.227365009981e-5 rollout_len=19 step_seconds=5.153456
phase_summary step=183 total=5.153455 student_rollout=2.231654 infer_forward_token_logits=0.005236 infer_sync=0.402381 d2d_bridge_import=0.000598 teacher_forward_total=0.408219 student_forward=0.319113 kl_loss=0.009709 optimizer_zero_grad=0.000026 backward=2.170874 grad_clip=0.000531 optimizer_step=0.000497 post_step_cleanup=0.012780 teacher_seq_len=19 teacher_vocab=248320
train_step step=184 prompt_index=7 loss=1.316959969699e-5 rollout_len=17 step_seconds=4.652590
phase_summary step=184 total=4.652589 student_rollout=1.926487 infer_forward_token_logits=0.005290 infer_sync=0.438730 d2d_bridge_import=0.000077 teacher_forward_total=0.444101 student_forward=0.285552 kl_loss=0.008426 optimizer_zero_grad=0.000025 backward=1.974096 grad_clip=0.000530 optimizer_step=0.000466 post_step_cleanup=0.012821 teacher_seq_len=17 teacher_vocab=248320
train_step step=185 prompt_index=8 loss=1.242790494871e-5 rollout_len=21 step_seconds=5.729033
phase_summary step=185 total=5.729033 student_rollout=2.475844 infer_forward_token_logits=0.005285 infer_sync=0.504732 d2d_bridge_import=0.000593 teacher_forward_total=0.510613 student_forward=0.349975 kl_loss=0.009952 optimizer_zero_grad=0.000025 backward=2.368912 grad_clip=0.000538 optimizer_step=0.000472 post_step_cleanup=0.012611 teacher_seq_len=21 teacher_vocab=248320
train_step step=186 prompt_index=9 loss=1.801172220439e-5 rollout_len=18 step_seconds=4.869181
phase_summary step=186 total=4.869180 student_rollout=2.076739 infer_forward_token_logits=0.005269 infer_sync=0.398822 d2d_bridge_import=0.000074 teacher_forward_total=0.404170 student_forward=0.301519 kl_loss=0.008734 optimizer_zero_grad=0.000024 backward=2.064035 grad_clip=0.000539 optimizer_step=0.000474 post_step_cleanup=0.012861 teacher_seq_len=18 teacher_vocab=248320
train_step step=187 prompt_index=10 loss=1.700344546407e-5 rollout_len=21 step_seconds=5.625133
phase_summary step=187 total=5.625133 student_rollout=2.441355 infer_forward_token_logits=0.005257 infer_sync=0.417106 d2d_bridge_import=0.000077 teacher_forward_total=0.422445 student_forward=0.347893 kl_loss=0.010059 optimizer_zero_grad=0.000027 backward=2.389233 grad_clip=0.000546 optimizer_step=0.000830 post_step_cleanup=0.012652 teacher_seq_len=21 teacher_vocab=248320
train_step step=188 prompt_index=11 loss=1.496468848927e-5 rollout_len=19 step_seconds=6.017275
phase_summary step=188 total=6.017274 student_rollout=3.025873 infer_forward_token_logits=0.005150 infer_sync=0.495018 d2d_bridge_import=0.000554 teacher_forward_total=0.500727 student_forward=0.315547 kl_loss=0.009127 optimizer_zero_grad=0.000024 backward=2.152082 grad_clip=0.000525 optimizer_step=0.000480 post_step_cleanup=0.012801 teacher_seq_len=19 teacher_vocab=248320
train_step step=189 prompt_index=12 loss=1.890406565508e-5 rollout_len=22 step_seconds=5.930530
phase_summary step=189 total=5.930529 student_rollout=2.595540 infer_forward_token_logits=0.005326 infer_sync=0.435266 d2d_bridge_import=0.000600 teacher_forward_total=0.441197 student_forward=0.366024 kl_loss=0.010279 optimizer_zero_grad=0.000024 backward=2.503355 grad_clip=0.000544 optimizer_step=0.000473 post_step_cleanup=0.012998 teacher_seq_len=22 teacher_vocab=248320
train_step step=190 prompt_index=13 loss=1.541612618894e-5 rollout_len=18 step_seconds=5.076170
phase_summary step=190 total=5.076169 student_rollout=2.113326 infer_forward_token_logits=0.005346 infer_sync=0.455929 d2d_bridge_import=0.000640 teacher_forward_total=0.461920 student_forward=0.302662 kl_loss=0.008772 optimizer_zero_grad=0.000024 backward=2.175700 grad_clip=0.000526 optimizer_step=0.000460 post_step_cleanup=0.012690 teacher_seq_len=18 teacher_vocab=248320
train_step step=191 prompt_index=14 loss=1.197984965984e-5 rollout_len=20 step_seconds=5.374630
phase_summary step=191 total=5.374629 student_rollout=2.315708 infer_forward_token_logits=0.005326 infer_sync=0.452041 d2d_bridge_import=0.000550 teacher_forward_total=0.457920 student_forward=0.332551 kl_loss=0.009543 optimizer_zero_grad=0.000023 backward=2.245071 grad_clip=0.000534 optimizer_step=0.000479 post_step_cleanup=0.012706 teacher_seq_len=20 teacher_vocab=248320
train_step step=192 prompt_index=15 loss=1.317961414316e-5 rollout_len=21 step_seconds=5.871967
phase_summary step=192 total=5.871966 student_rollout=2.458128 infer_forward_token_logits=0.003689 infer_sync=0.500440 d2d_bridge_import=0.000082 teacher_forward_total=0.504217 student_forward=0.349180 kl_loss=0.009981 optimizer_zero_grad=0.000030 backward=2.536021 grad_clip=0.000532 optimizer_step=0.000848 post_step_cleanup=0.012941 teacher_seq_len=21 teacher_vocab=248320
train_step step=193 prompt_index=0 loss=1.258161046280e-5 rollout_len=21 step_seconds=6.749322
phase_summary step=193 total=6.749321 student_rollout=3.447025 infer_forward_token_logits=0.005360 infer_sync=0.554103 d2d_bridge_import=0.000079 teacher_forward_total=0.559548 student_forward=0.347504 kl_loss=0.009941 optimizer_zero_grad=0.000025 backward=2.371290 grad_clip=0.000548 optimizer_step=0.000485 post_step_cleanup=0.012866 teacher_seq_len=21 teacher_vocab=248320
train_step step=194 prompt_index=1 loss=1.394644550601e-5 rollout_len=20 step_seconds=5.440932
phase_summary step=194 total=5.440931 student_rollout=2.363568 infer_forward_token_logits=0.005440 infer_sync=0.422005 d2d_bridge_import=0.000589 teacher_forward_total=0.428039 student_forward=0.336699 kl_loss=0.009541 optimizer_zero_grad=0.000027 backward=2.289099 grad_clip=0.000533 optimizer_step=0.000466 post_step_cleanup=0.012868 teacher_seq_len=20 teacher_vocab=248320
train_step step=195 prompt_index=2 loss=1.732799137244e-5 rollout_len=19 step_seconds=5.158296
phase_summary step=195 total=5.158296 student_rollout=2.186855 infer_forward_token_logits=0.005304 infer_sync=0.462876 d2d_bridge_import=0.000074 teacher_forward_total=0.468259 student_forward=0.318080 kl_loss=0.009288 optimizer_zero_grad=0.000028 backward=2.161724 grad_clip=0.000533 optimizer_step=0.000477 post_step_cleanup=0.012965 teacher_seq_len=19 teacher_vocab=248320
train_step step=196 prompt_index=3 loss=1.069345125870e-5 rollout_len=15 step_seconds=4.057557
phase_summary step=196 total=4.057556 student_rollout=1.654939 infer_forward_token_logits=0.005416 infer_sync=0.387731 d2d_bridge_import=0.000056 teacher_forward_total=0.393207 student_forward=0.254634 kl_loss=0.007745 optimizer_zero_grad=0.000023 backward=1.733102 grad_clip=0.000544 optimizer_step=0.000476 post_step_cleanup=0.012830 teacher_seq_len=15 teacher_vocab=248320
train_step step=197 prompt_index=4 loss=1.469027210987e-5 rollout_len=21 step_seconds=6.116017
phase_summary step=197 total=6.116016 student_rollout=2.697040 infer_forward_token_logits=0.005373 infer_sync=0.495887 d2d_bridge_import=0.000587 teacher_forward_total=0.501852 student_forward=0.347541 kl_loss=0.009865 optimizer_zero_grad=0.000024 backward=2.545840 grad_clip=0.000530 optimizer_step=0.000512 post_step_cleanup=0.012715 teacher_seq_len=21 teacher_vocab=248320
train_step step=198 prompt_index=5 loss=1.411534321960e-5 rollout_len=20 step_seconds=5.611746
phase_summary step=198 total=5.611746 student_rollout=2.376320 infer_forward_token_logits=0.005370 infer_sync=0.431425 d2d_bridge_import=0.000550 teacher_forward_total=0.437349 student_forward=0.332140 kl_loss=0.009652 optimizer_zero_grad=0.000025 backward=2.442297 grad_clip=0.000531 optimizer_step=0.000481 post_step_cleanup=0.012861 teacher_seq_len=20 teacher_vocab=248320
train_step step=199 prompt_index=6 loss=1.221216643899e-5 rollout_len=19 step_seconds=5.127573
phase_summary step=199 total=5.127572 student_rollout=2.210413 infer_forward_token_logits=0.003237 infer_sync=0.375994 d2d_bridge_import=0.000601 teacher_forward_total=0.379837 student_forward=0.322377 kl_loss=0.009208 optimizer_zero_grad=0.000027 backward=2.191807 grad_clip=0.000524 optimizer_step=0.000479 post_step_cleanup=0.012816 teacher_seq_len=19 teacher_vocab=248320
train_step step=200 prompt_index=7 loss=1.306508693233e-5 rollout_len=17 step_seconds=4.592184
phase_summary step=200 total=4.592184 student_rollout=1.897497 infer_forward_token_logits=0.005293 infer_sync=0.434928 d2d_bridge_import=0.000593 teacher_forward_total=0.440819 student_forward=0.285540 kl_loss=0.008456 optimizer_zero_grad=0.000026 backward=1.945952 grad_clip=0.000540 optimizer_step=0.000474 post_step_cleanup=0.012814 teacher_seq_len=17 teacher_vocab=248320
eval_summary step=200 train_kl=1.484889571657e-5 heldout_kl=1.702545500848e-5 eval_seconds=7.797318
training_summary total_steps=200 total_wall_seconds=1155.176026 mean_step_seconds=5.658789 median_step_seconds=5.728152 first_loss=1.676253305050e-5 final_loss=1.306508693233e-5 sampled_loss_reduction_pct=22.057801
