{
  "source_model": "/home/ckl/.cache/modelscope/hub/Qwen/Qwen3___5-9B",
  "tq_model": "/home/ckl/.cache/modelscope/hub/Qwen/Qwen3___5-9B-TQ4",
  "tensor_base": "model.language_model.layers.1.mlp.gate_proj",
  "row_start": 0,
  "row_count": 8,
  "shape": [
    8,
    4096
  ],
  "bits": 4,
  "group_size": 128,
  "centroids_config_vs_cuda": {
    "max_abs": 1.4901161193847656e-08,
    "mean_abs": 3.3760443329811096e-09,
    "config": [
      -0.23777656257152557,
      -0.18096588551998138,
      -0.14193911850452423,
      -0.11041539162397385,
      -0.08293881267309189,
      -0.05785765498876572,
      -0.03420550003647804,
      -0.011320935562252998,
      0.011320935562252998,
      0.03420550003647804,
      0.05785765498876572,
      0.08293881267309189,
      0.11041539162397385,
      0.14193911850452423,
      0.18096588551998138,
      0.23777656257152557
    ],
    "cuda": [
      -0.23777654767036438,
      -0.18096588551998138,
      -0.14193911850452423,
      -0.11041539162397385,
      -0.08293882012367249,
      -0.057857658714056015,
      -0.03420550003647804,
      -0.011320936493575573,
      0.011320936493575573,
      0.03420550003647804,
      0.057857658714056015,
      0.08293882012367249,
      0.11041539162397385,
      0.14193911850452423,
      0.18096588551998138,
      0.23777654767036438
    ]
  },
  "metrics": [
    {
      "lhs": "python_faithful_dequant",
      "rhs": "bf16_source",
      "elements": 32768,
      "max_abs": 0.005407883320003748,
      "mean_abs": 0.0008539303671568632,
      "rmse": 0.0010728667257353663,
      "rhs_rms": 0.011161223985254765,
      "rmse_over_rhs_rms": 0.09612446870995012,
      "max_rel": 1238.516845703125,
      "mean_rel": 0.5199573040008545,
      "rel_p50": 0.0958661288022995,
      "rel_p90": 0.5863989591598511,
      "rel_p95": 1.1582262516021729,
      "rel_p99": 5.039450645446777,
      "rel_p999": 45.884220123291016,
      "top1pct_rel_mean": 27.755062103271484,
      "top1pct_rel_max": 1238.516845703125
    },
    {
      "lhs": "arle_cuda_dequant",
      "rhs": "bf16_source",
      "elements": 32768,
      "max_abs": 0.097412109375,
      "mean_abs": 0.009253103286027908,
      "rmse": 0.015700144693255424,
      "rhs_rms": 0.011161223985254765,
      "rmse_over_rhs_rms": 1.4066687232508805,
      "max_rel": 1235.123779296875,
      "mean_rel": 1.3987815380096436,
      "rel_p50": 1.4106343984603882,
      "rel_p90": 2.160231590270996,
      "rel_p95": 2.409989356994629,
      "rel_p99": 5.584445476531982,
      "rel_p999": 46.695430755615234,
      "top1pct_rel_mean": 28.100252151489258,
      "top1pct_rel_max": 1235.123779296875
    },
    {
      "lhs": "arle_cuda_dequant",
      "rhs": "python_faithful_dequant",
      "elements": 32768,
      "max_abs": 0.09718066453933716,
      "mean_abs": 0.008819670416414738,
      "rmse": 0.015668567270040512,
      "rhs_rms": 0.011117699556052685,
      "rmse_over_rhs_rms": 1.4093353747367863,
      "max_rel": 2.0038509368896484,
      "mean_rel": 1.0006495714187622,
      "rel_p50": 0.18905194103717804,
      "rel_p90": 2.001624584197998,
      "rel_p95": 2.002197742462158,
      "rel_p99": 2.0030250549316406,
      "rel_p999": 2.0036303997039795,
      "top1pct_rel_mean": 2.00331449508667,
      "top1pct_rel_max": 2.0038509368896484
    }
  ],
  "fwht_bug_control": {
    "cuda_vs_good_python_max_abs": 0.09718066453933716,
    "cuda_vs_good_python_rmse": 0.015668567270040512,
    "cuda_vs_bug_emulation_max_abs": 0.00012020766735076904,
    "cuda_vs_bug_emulation_rmse": 1.8410868506180122e-05,
    "cuda_vs_bug_emulation_bf16_max_abs": 1.52587890625e-05,
    "cuda_vs_bug_emulation_bf16_rmse": 9.424609714869803e-08,
    "interpretation": "ARLE CUDA output matches the current kernel FWHT sign convention after BF16 rounding; it does not match scripts/turboquant_weights.py faithful FWHT."
  },
  "decision": "cuda_dequant_differs_from_python"
}
