Key: blocks.1.moe.experts.6.c_fc.bias, Shape: [512]
Key: blocks.2.moe.experts.8.c_proj.weight, Shape: [128, 512]
Key: blocks.0.moe.experts.4.c_proj.bias, Shape: [128]
Key: blocks.4.moe.experts.2.c_fc.weight, Shape: [512, 128]
Key: blocks.3.moe.experts.10.c_proj.bias, Shape: [128]
Key: blocks.3.moe.experts.10.c_fc.bias, Shape: [512]
Key: blocks.0.moe.experts.9.c_proj.bias, Shape: [128]
Key: blocks.2.moe.experts.4.c_fc.bias, Shape: [512]
Key: blocks.2.moe.experts.5.c_fc.weight, Shape: [512, 128]
Key: blocks.2.moe.experts.11.c_fc.bias, Shape: [512]
Key: blocks.4.moe.experts.7.c_fc.bias, Shape: [512]
Key: blocks.4.moe.experts.8.c_fc.weight, Shape: [512, 128]
Key: blocks.2.attn.q_proj.weight, Shape: [128, 128]
Key: blocks.3.attn.v_proj.weight, Shape: [128, 128]
Key: blocks.4.moe.experts.1.c_fc.bias, Shape: [512]
Key: blocks.4.moe.experts.0.c_fc.weight, Shape: [512, 128]
Key: blocks.3.moe.experts.3.c_fc.weight, Shape: [512, 128]
Key: blocks.1.moe.experts.2.c_fc.bias, Shape: [512]
Key: blocks.2.moe.experts.9.c_proj.weight, Shape: [128, 512]
Key: blocks.0.moe.experts.1.c_fc.bias, Shape: [512]
Key: blocks.2.moe.experts.8.c_fc.bias, Shape: [512]
Key: blocks.3.moe.experts.4.c_fc.bias, Shape: [512]
Key: blocks.3.moe.experts.1.c_fc.bias, Shape: [512]
Key: blocks.0.moe.experts.2.c_fc.bias, Shape: [512]
Key: blocks.1.ln2.bias, Shape: [128]
Key: blocks.1.moe.experts.8.c_proj.bias, Shape: [128]
Key: blocks.3.moe.experts.10.c_fc.weight, Shape: [512, 128]
Key: blocks.3.ln2.bias, Shape: [128]
Key: blocks.2.moe.experts.5.c_fc.bias, Shape: [512]
Key: blocks.0.moe.experts.6.c_proj.weight, Shape: [128, 512]
Key: blocks.4.moe.experts.6.c_proj.weight, Shape: [128, 512]
Key: blocks.3.moe.experts.10.c_proj.weight, Shape: [128, 512]
Key: blocks.2.moe.experts.6.c_proj.weight, Shape: [128, 512]
Key: blocks.1.moe.experts.5.c_proj.bias, Shape: [128]
Key: blocks.1.moe.experts.8.c_fc.bias, Shape: [512]
Key: lm_head.weight, Shape: [8000, 128]
Key: blocks.1.attn.q_proj.weight, Shape: [128, 128]
Key: blocks.3.moe.experts.8.c_proj.bias, Shape: [128]
Key: blocks.3.moe.experts.0.c_fc.weight, Shape: [512, 128]
Key: blocks.1.moe.experts.8.c_proj.weight, Shape: [128, 512]
Key: blocks.0.moe.experts.7.c_fc.bias, Shape: [512]
Key: blocks.0.moe.experts.9.c_fc.bias, Shape: [512]
Key: blocks.4.moe.experts.4.c_proj.weight, Shape: [128, 512]
Key: blocks.1.moe.experts.5.c_fc.weight, Shape: [512, 128]
Key: blocks.4.moe.experts.10.c_proj.bias, Shape: [128]
Key: blocks.3.moe.experts.3.c_fc.bias, Shape: [512]
Key: blocks.0.attn.o_proj.weight, Shape: [128, 128]
Key: blocks.2.moe.experts.3.c_fc.weight, Shape: [512, 128]
Key: blocks.4.moe.experts.3.c_fc.bias, Shape: [512]
Key: blocks.1.moe.experts.10.c_fc.bias, Shape: [512]
Key: pos_embed.weight, Shape: [128, 128]
Key: blocks.1.moe.experts.4.c_proj.weight, Shape: [128, 512]
Key: blocks.0.moe.experts.3.c_proj.bias, Shape: [128]
Key: blocks.4.moe.experts.2.c_proj.bias, Shape: [128]
Key: blocks.4.moe.experts.5.c_fc.weight, Shape: [512, 128]
Key: blocks.0.moe.experts.0.c_fc.bias, Shape: [512]
Key: blocks.4.moe.experts.6.c_proj.bias, Shape: [128]
Key: blocks.4.moe.experts.7.c_proj.weight, Shape: [128, 512]
Key: blocks.1.moe.experts.11.c_fc.weight, Shape: [512, 128]
Key: blocks.4.moe.gate.weight, Shape: [12, 128]
Key: blocks.2.moe.experts.2.c_proj.weight, Shape: [128, 512]
Key: blocks.4.attn.k_proj.weight, Shape: [128, 128]
Key: blocks.1.moe.experts.0.c_fc.bias, Shape: [512]
Key: blocks.0.moe.experts.2.c_fc.weight, Shape: [512, 128]
Key: blocks.4.moe.experts.3.c_proj.weight, Shape: [128, 512]
Key: blocks.4.moe.experts.8.c_fc.bias, Shape: [512]
Key: blocks.4.moe.experts.7.c_proj.bias, Shape: [128]
Key: blocks.2.moe.experts.10.c_proj.bias, Shape: [128]
Key: blocks.2.moe.experts.7.c_proj.bias, Shape: [128]
Key: blocks.2.moe.experts.3.c_proj.weight, Shape: [128, 512]
Key: blocks.4.ln2.weight, Shape: [128]
Key: blocks.3.moe.experts.3.c_proj.weight, Shape: [128, 512]
Key: blocks.4.moe.experts.0.c_proj.bias, Shape: [128]
Key: blocks.1.moe.experts.5.c_fc.bias, Shape: [512]
Key: blocks.1.moe.experts.2.c_proj.bias, Shape: [128]
Key: blocks.3.moe.experts.8.c_fc.bias, Shape: [512]
Key: blocks.2.moe.experts.6.c_fc.weight, Shape: [512, 128]
Key: blocks.4.moe.experts.0.c_proj.weight, Shape: [128, 512]
Key: blocks.2.moe.experts.5.c_proj.weight, Shape: [128, 512]
Key: blocks.2.attn.k_proj.weight, Shape: [128, 128]
Key: blocks.0.moe.experts.8.c_fc.weight, Shape: [512, 128]
Key: blocks.4.moe.experts.3.c_fc.weight, Shape: [512, 128]
Key: blocks.2.moe.experts.9.c_proj.bias, Shape: [128]
Key: blocks.3.moe.experts.1.c_fc.weight, Shape: [512, 128]
Key: blocks.1.moe.experts.9.c_fc.weight, Shape: [512, 128]
Key: blocks.4.moe.experts.11.c_proj.weight, Shape: [128, 512]
Key: blocks.0.moe.experts.0.c_proj.bias, Shape: [128]
Key: blocks.1.moe.experts.10.c_proj.bias, Shape: [128]
Key: blocks.4.ln1.weight, Shape: [128]
Key: blocks.3.moe.experts.1.c_proj.weight, Shape: [128, 512]
Key: blocks.1.moe.experts.3.c_proj.weight, Shape: [128, 512]
Key: blocks.1.ln1.weight, Shape: [128]
Key: blocks.2.moe.experts.9.c_fc.weight, Shape: [512, 128]
Key: blocks.2.moe.experts.11.c_proj.weight, Shape: [128, 512]
Key: blocks.4.moe.experts.1.c_fc.weight, Shape: [512, 128]
Key: blocks.0.moe.experts.6.c_fc.weight, Shape: [512, 128]
Key: blocks.0.moe.experts.4.c_fc.bias, Shape: [512]
Key: blocks.2.moe.experts.5.c_proj.bias, Shape: [128]
Key: blocks.0.ln1.bias, Shape: [128]
Key: blocks.1.moe.experts.4.c_proj.bias, Shape: [128]
Key: blocks.4.moe.experts.4.c_proj.bias, Shape: [128]
Key: blocks.4.moe.experts.5.c_proj.bias, Shape: [128]
Key: blocks.4.moe.experts.7.c_fc.weight, Shape: [512, 128]
Key: blocks.2.ln2.weight, Shape: [128]
Key: blocks.3.moe.experts.0.c_proj.bias, Shape: [128]
Key: blocks.3.moe.experts.2.c_proj.bias, Shape: [128]
Key: blocks.4.moe.experts.9.c_fc.weight, Shape: [512, 128]
Key: blocks.0.moe.experts.10.c_fc.weight, Shape: [512, 128]
Key: blocks.1.moe.experts.7.c_fc.weight, Shape: [512, 128]
Key: blocks.4.moe.experts.0.c_fc.bias, Shape: [512]
Key: blocks.3.attn.k_proj.weight, Shape: [128, 128]
Key: blocks.2.moe.gate.weight, Shape: [12, 128]
Key: blocks.1.moe.experts.3.c_fc.weight, Shape: [512, 128]
Key: blocks.3.moe.experts.5.c_fc.weight, Shape: [512, 128]
Key: blocks.2.moe.experts.2.c_proj.bias, Shape: [128]
Key: blocks.1.moe.experts.1.c_fc.weight, Shape: [512, 128]
Key: blocks.4.moe.experts.8.c_proj.bias, Shape: [128]
Key: blocks.4.moe.experts.2.c_proj.weight, Shape: [128, 512]
Key: blocks.0.moe.experts.0.c_proj.weight, Shape: [128, 512]
Key: blocks.1.moe.experts.11.c_fc.bias, Shape: [512]
Key: blocks.3.moe.experts.4.c_proj.bias, Shape: [128]
Key: blocks.1.moe.experts.7.c_proj.weight, Shape: [128, 512]
Key: ln_f.weight, Shape: [128]
Key: blocks.3.moe.experts.11.c_fc.bias, Shape: [512]
Key: blocks.4.moe.experts.10.c_fc.weight, Shape: [512, 128]
Key: blocks.3.moe.experts.5.c_proj.bias, Shape: [128]
Key: blocks.0.moe.experts.8.c_proj.bias, Shape: [128]
Key: blocks.1.moe.experts.1.c_proj.bias, Shape: [128]
Key: blocks.4.moe.experts.11.c_fc.bias, Shape: [512]
Key: blocks.2.moe.experts.0.c_proj.weight, Shape: [128, 512]
Key: blocks.2.moe.experts.6.c_fc.bias, Shape: [512]
Key: blocks.3.moe.experts.6.c_fc.bias, Shape: [512]
Key: blocks.1.moe.experts.0.c_fc.weight, Shape: [512, 128]
Key: blocks.2.moe.experts.6.c_proj.bias, Shape: [128]
Key: blocks.1.moe.experts.9.c_fc.bias, Shape: [512]
Key: blocks.0.moe.experts.10.c_proj.bias, Shape: [128]
Key: blocks.3.moe.experts.2.c_proj.weight, Shape: [128, 512]
Key: blocks.2.moe.experts.7.c_fc.weight, Shape: [512, 128]
Key: blocks.3.moe.experts.7.c_fc.bias, Shape: [512]
Key: blocks.1.moe.experts.11.c_proj.bias, Shape: [128]
Key: blocks.4.moe.experts.11.c_fc.weight, Shape: [512, 128]
Key: blocks.1.moe.experts.10.c_fc.weight, Shape: [512, 128]
Key: blocks.2.ln1.weight, Shape: [128]
Key: blocks.4.moe.experts.4.c_fc.bias, Shape: [512]
Key: blocks.3.moe.experts.11.c_proj.bias, Shape: [128]
Key: blocks.2.ln2.bias, Shape: [128]
Key: blocks.0.moe.experts.10.c_proj.weight, Shape: [128, 512]
Key: blocks.3.moe.experts.5.c_fc.bias, Shape: [512]
Key: blocks.0.attn.k_proj.weight, Shape: [128, 128]
Key: blocks.3.moe.experts.2.c_fc.bias, Shape: [512]
Key: blocks.3.moe.gate.weight, Shape: [12, 128]
Key: blocks.0.moe.experts.8.c_proj.weight, Shape: [128, 512]
Key: embed.weight, Shape: [8000, 128]
Key: blocks.4.ln1.bias, Shape: [128]
Key: blocks.2.moe.experts.0.c_proj.bias, Shape: [128]
Key: blocks.0.moe.experts.7.c_proj.weight, Shape: [128, 512]
Key: blocks.0.attn.q_proj.weight, Shape: [128, 128]
Key: blocks.2.moe.experts.0.c_fc.weight, Shape: [512, 128]
Key: blocks.4.moe.experts.6.c_fc.bias, Shape: [512]
Key: blocks.2.moe.experts.3.c_fc.bias, Shape: [512]
Key: blocks.0.moe.experts.0.c_fc.weight, Shape: [512, 128]
Key: blocks.3.moe.experts.3.c_proj.bias, Shape: [128]
Key: blocks.4.moe.experts.1.c_proj.weight, Shape: [128, 512]
Key: blocks.0.moe.experts.10.c_fc.bias, Shape: [512]
Key: blocks.0.moe.experts.2.c_proj.bias, Shape: [128]
Key: blocks.2.moe.experts.0.c_fc.bias, Shape: [512]
Key: blocks.4.moe.experts.9.c_fc.bias, Shape: [512]
Key: blocks.3.moe.experts.7.c_proj.bias, Shape: [128]
Key: blocks.0.moe.experts.1.c_proj.weight, Shape: [128, 512]
Key: blocks.1.moe.experts.9.c_proj.bias, Shape: [128]
Key: blocks.0.moe.experts.6.c_proj.bias, Shape: [128]
Key: blocks.2.moe.experts.4.c_proj.weight, Shape: [128, 512]
Key: blocks.0.ln2.bias, Shape: [128]
Key: blocks.2.moe.experts.8.c_fc.weight, Shape: [512, 128]
Key: blocks.2.moe.experts.1.c_fc.bias, Shape: [512]
Key: blocks.3.moe.experts.6.c_proj.bias, Shape: [128]
Key: blocks.1.moe.experts.7.c_fc.bias, Shape: [512]
Key: blocks.0.moe.experts.4.c_proj.weight, Shape: [128, 512]
Key: blocks.2.moe.experts.1.c_proj.bias, Shape: [128]
Key: blocks.2.moe.experts.10.c_fc.bias, Shape: [512]
Key: blocks.3.moe.experts.11.c_proj.weight, Shape: [128, 512]
Key: blocks.0.moe.experts.11.c_proj.bias, Shape: [128]
Key: blocks.4.moe.experts.9.c_proj.weight, Shape: [128, 512]
Key: blocks.1.attn.k_proj.weight, Shape: [128, 128]
Key: blocks.1.moe.experts.10.c_proj.weight, Shape: [128, 512]
Key: blocks.1.moe.experts.1.c_fc.bias, Shape: [512]
Key: blocks.3.moe.experts.9.c_proj.bias, Shape: [128]
Key: blocks.0.moe.experts.1.c_fc.weight, Shape: [512, 128]
Key: blocks.0.moe.experts.5.c_proj.weight, Shape: [128, 512]
Key: blocks.1.moe.experts.6.c_fc.weight, Shape: [512, 128]
Key: blocks.0.moe.experts.5.c_proj.bias, Shape: [128]
Key: blocks.4.moe.experts.8.c_proj.weight, Shape: [128, 512]
Key: blocks.0.moe.experts.7.c_proj.bias, Shape: [128]
Key: blocks.3.moe.experts.7.c_proj.weight, Shape: [128, 512]
Key: blocks.0.ln1.weight, Shape: [128]
Key: blocks.3.ln1.bias, Shape: [128]
Key: blocks.3.moe.experts.4.c_proj.weight, Shape: [128, 512]
Key: blocks.1.moe.experts.5.c_proj.weight, Shape: [128, 512]
Key: blocks.0.moe.experts.9.c_fc.weight, Shape: [512, 128]
Key: blocks.4.moe.experts.5.c_proj.weight, Shape: [128, 512]
Key: blocks.4.moe.experts.5.c_fc.bias, Shape: [512]
Key: blocks.3.attn.q_proj.weight, Shape: [128, 128]
Key: blocks.2.moe.experts.8.c_proj.bias, Shape: [128]
Key: blocks.0.moe.experts.11.c_proj.weight, Shape: [128, 512]
Key: blocks.1.moe.experts.11.c_proj.weight, Shape: [128, 512]
Key: blocks.4.attn.v_proj.weight, Shape: [128, 128]
Key: blocks.1.moe.experts.6.c_proj.bias, Shape: [128]
Key: blocks.2.attn.o_proj.weight, Shape: [128, 128]
Key: blocks.4.attn.q_proj.weight, Shape: [128, 128]
Key: blocks.4.ln2.bias, Shape: [128]
Key: blocks.4.moe.experts.3.c_proj.bias, Shape: [128]
Key: blocks.3.moe.experts.0.c_proj.weight, Shape: [128, 512]
Key: blocks.0.moe.experts.9.c_proj.weight, Shape: [128, 512]
Key: ln_f.bias, Shape: [128]
Key: blocks.3.moe.experts.6.c_proj.weight, Shape: [128, 512]
Key: blocks.3.moe.experts.9.c_fc.bias, Shape: [512]
Key: blocks.3.ln1.weight, Shape: [128]
Key: blocks.3.ln2.weight, Shape: [128]
Key: blocks.3.moe.experts.7.c_fc.weight, Shape: [512, 128]
Key: blocks.0.moe.experts.1.c_proj.bias, Shape: [128]
Key: blocks.3.moe.experts.6.c_fc.weight, Shape: [512, 128]
Key: blocks.2.moe.experts.10.c_proj.weight, Shape: [128, 512]
Key: blocks.2.moe.experts.7.c_fc.bias, Shape: [512]
Key: blocks.4.moe.experts.6.c_fc.weight, Shape: [512, 128]
Key: blocks.4.moe.experts.10.c_fc.bias, Shape: [512]
Key: blocks.0.moe.experts.7.c_fc.weight, Shape: [512, 128]
Key: blocks.4.attn.o_proj.weight, Shape: [128, 128]
Key: blocks.2.moe.experts.11.c_fc.weight, Shape: [512, 128]
Key: blocks.3.moe.experts.1.c_proj.bias, Shape: [128]
Key: blocks.3.moe.experts.2.c_fc.weight, Shape: [512, 128]
Key: blocks.1.moe.experts.1.c_proj.weight, Shape: [128, 512]
Key: blocks.2.moe.experts.1.c_proj.weight, Shape: [128, 512]
Key: blocks.1.moe.experts.8.c_fc.weight, Shape: [512, 128]
Key: blocks.1.moe.experts.0.c_proj.weight, Shape: [128, 512]
Key: blocks.1.moe.experts.3.c_proj.bias, Shape: [128]
Key: blocks.3.moe.experts.8.c_proj.weight, Shape: [128, 512]
Key: blocks.0.moe.experts.6.c_fc.bias, Shape: [512]
Key: blocks.1.moe.experts.9.c_proj.weight, Shape: [128, 512]
Key: blocks.0.ln2.weight, Shape: [128]
Key: blocks.2.moe.experts.4.c_proj.bias, Shape: [128]
Key: blocks.2.moe.experts.11.c_proj.bias, Shape: [128]
Key: blocks.0.moe.experts.11.c_fc.weight, Shape: [512, 128]
Key: blocks.4.moe.experts.11.c_proj.bias, Shape: [128]
Key: blocks.0.moe.experts.4.c_fc.weight, Shape: [512, 128]
Key: blocks.3.moe.experts.9.c_fc.weight, Shape: [512, 128]
Key: blocks.4.moe.experts.2.c_fc.bias, Shape: [512]
Key: blocks.0.moe.experts.3.c_proj.weight, Shape: [128, 512]
Key: blocks.3.moe.experts.5.c_proj.weight, Shape: [128, 512]
Key: blocks.1.ln2.weight, Shape: [128]
Key: blocks.1.moe.experts.7.c_proj.bias, Shape: [128]
Key: blocks.0.moe.gate.weight, Shape: [12, 128]
Key: blocks.0.moe.experts.11.c_fc.bias, Shape: [512]
Key: blocks.2.moe.experts.10.c_fc.weight, Shape: [512, 128]
Key: blocks.1.moe.experts.3.c_fc.bias, Shape: [512]
Key: blocks.4.moe.experts.10.c_proj.weight, Shape: [128, 512]
Key: blocks.0.moe.experts.5.c_fc.bias, Shape: [512]
Key: blocks.1.moe.experts.2.c_fc.weight, Shape: [512, 128]
Key: blocks.1.moe.experts.4.c_fc.weight, Shape: [512, 128]
Key: blocks.1.moe.experts.4.c_fc.bias, Shape: [512]
Key: blocks.3.attn.o_proj.weight, Shape: [128, 128]
Key: blocks.1.attn.o_proj.weight, Shape: [128, 128]
Key: blocks.2.attn.v_proj.weight, Shape: [128, 128]
Key: blocks.3.moe.experts.4.c_fc.weight, Shape: [512, 128]
Key: blocks.4.moe.experts.1.c_proj.bias, Shape: [128]
Key: blocks.0.attn.v_proj.weight, Shape: [128, 128]
Key: blocks.2.moe.experts.2.c_fc.weight, Shape: [512, 128]
Key: blocks.1.attn.v_proj.weight, Shape: [128, 128]
Key: blocks.0.moe.experts.8.c_fc.bias, Shape: [512]
Key: blocks.2.moe.experts.1.c_fc.weight, Shape: [512, 128]
Key: blocks.2.moe.experts.3.c_proj.bias, Shape: [128]
Key: blocks.0.moe.experts.3.c_fc.bias, Shape: [512]
Key: blocks.3.moe.experts.0.c_fc.bias, Shape: [512]
Key: blocks.0.moe.experts.5.c_fc.weight, Shape: [512, 128]
Key: blocks.1.moe.experts.2.c_proj.weight, Shape: [128, 512]
Key: blocks.1.ln1.bias, Shape: [128]
Key: blocks.3.moe.experts.8.c_fc.weight, Shape: [512, 128]
Key: blocks.3.moe.experts.9.c_proj.weight, Shape: [128, 512]
Key: blocks.0.moe.experts.2.c_proj.weight, Shape: [128, 512]
Key: blocks.1.moe.experts.6.c_proj.weight, Shape: [128, 512]
Key: blocks.2.moe.experts.2.c_fc.bias, Shape: [512]
Key: blocks.3.moe.experts.11.c_fc.weight, Shape: [512, 128]
Key: blocks.2.moe.experts.4.c_fc.weight, Shape: [512, 128]
Key: blocks.4.moe.experts.9.c_proj.bias, Shape: [128]
Key: blocks.2.moe.experts.9.c_fc.bias, Shape: [512]
Key: blocks.2.moe.experts.7.c_proj.weight, Shape: [128, 512]
Key: blocks.2.ln1.bias, Shape: [128]
Key: blocks.0.moe.experts.3.c_fc.weight, Shape: [512, 128]
Key: blocks.1.moe.experts.0.c_proj.bias, Shape: [128]
Key: blocks.1.moe.gate.weight, Shape: [12, 128]
Key: blocks.4.moe.experts.4.c_fc.weight, Shape: [512, 128]
