# default
ARLE_DSV4_MOE_BACKEND=deepep ARLE_DSV4_INCREMENTAL_KV=1 ARLE_DSV4_FUSED_DISPATCH_PAYLOAD=1 ARLE_CUDA_DISABLE_MARLIN_W4_FP8=1 /root/arle/target/release/infer --model-path /root/DeepSeek-V4-Flash --port 18178 --num-slots 1 --max-seq-len 4096 --mem-fraction-static 0.10 --kv-cache-dtype fp8 --deepseek-distributed-layers 43
# route_grouped_pair
ARLE_DSV4_MOE_BACKEND=deepep ARLE_DSV4_INCREMENTAL_KV=1 ARLE_DSV4_FUSED_DISPATCH_PAYLOAD=1 ARLE_DSV4_ROUTE_GROUPED_EXPERTS=1 ARLE_CUDA_DISABLE_MARLIN_W4_FP8=1 /root/arle/target/release/infer --model-path /root/DeepSeek-V4-Flash --port 18179 --num-slots 1 --max-seq-len 4096 --mem-fraction-static 0.10 --kv-cache-dtype fp8 --deepseek-distributed-layers 43
