# Gate-0 spike capture
# Host: Lambda gpu_1x_gh200 (us-east-3), arm64 GH200, ConnectX-7 RoCE
# Date: 2026-05-22

## uname
Linux 192-222-50-117 6.8.0-1013-nvidia-64k #14~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Wed Aug 21 01:03:36 UTC 2 aarch64 aarch64 aarch64 GNU/Linux

## ibstat
CA 'mlx5_0'
	CA type: MT4126
	Number of ports: 1
	Firmware version: 32.42.1000
	Hardware version: 1
	Node GUID: 0x<redacted-guid>
	System image GUID: 0x<redacted-guid>
	Port 1:
		State: Active
		Physical state: LinkUp
		Rate: 100
		Base lid: 0
		LMC: 0
		SM lid: 0
		Capability mask: 0x00010000
		Port GUID: 0x<redacted-guid>
		Link layer: Ethernet

## /sys/class/infiniband (devices + link_layer)
total 0
lrwxrwxrwx 1 root root 0 May 22 12:00 mlx5_0 -> ../../devices/pci0000:e5/0000:e5:00.0/0000:e6:00.0/infiniband/mlx5_0
/sys/class/infiniband/mlx5_0/ports/1/ link_layer=Ethernet state=4: ACTIVE

## port counters available
VL15_dropped
excessive_buffer_overrun_errors
link_downed
link_error_recovery
local_link_integrity_errors
multicast_rcv_packets
multicast_xmit_packets
port_rcv_constraint_errors
port_rcv_data
port_rcv_errors
port_rcv_packets
port_rcv_remote_physical_errors
port_rcv_switch_relay_errors
port_xmit_constraint_errors
port_xmit_data
port_xmit_discards
port_xmit_packets
port_xmit_wait
symbol_error
unicast_rcv_packets
unicast_xmit_packets

## tracepoint:ib_* (management plane only)
tracepoint:ib_mad:ib_mad_create_agent
tracepoint:ib_mad:ib_mad_error_handler
tracepoint:ib_mad:ib_mad_handle_ib_smi
tracepoint:ib_mad:ib_mad_handle_opa_smi
tracepoint:ib_mad:ib_mad_handle_out_ib_smi
tracepoint:ib_mad:ib_mad_handle_out_opa_smi
tracepoint:ib_mad:ib_mad_ib_send_mad
tracepoint:ib_mad:ib_mad_recv_done_agent
tracepoint:ib_mad:ib_mad_recv_done_handler
tracepoint:ib_mad:ib_mad_send_done_agent
tracepoint:ib_mad:ib_mad_send_done_handler
tracepoint:ib_mad:ib_mad_send_done_resend
tracepoint:ib_mad:ib_mad_unregister_agent
tracepoint:ib_umad:ib_umad_read_recv
tracepoint:ib_umad:ib_umad_read_send
tracepoint:ib_umad:ib_umad_write

## rdma_core tracepoints (CQ/MR lifecycle + KERNEL-mode cq_poll only)
cq_alloc
cq_alloc_error
cq_drain_complete
cq_free
cq_modify
cq_poll
cq_process
cq_reschedule
cq_schedule
enable
filter
mr_alloc
mr_dereg
mr_integ_alloc

## rdma_cma tracepoints (connection mgmt)
cm_add_one
cm_disconnect
cm_event_done
cm_event_handler
cm_id_attach
cm_id_destroy
cm_qp_create
cm_qp_destroy
cm_remove_one
cm_req_handler
cm_send_mra
cm_send_rej
cm_send_rep
cm_send_req
cm_send_rtu
cm_send_sidr_rep
cm_send_sidr_req
cm_sent_drep
cm_sent_dreq
enable
filter

## any data-path WC/CQE-with-status tracepoint? (expect none)
NONE - confirms userspace ibv_poll_cq path

## libibverbs / librdmacm (uprobe targets)
	librdmacm.so.1 (libc6,AArch64) => /lib/aarch64-linux-gnu/librdmacm.so.1
	librdmacm.so (libc6,AArch64) => /lib/aarch64-linux-gnu/librdmacm.so
	libibverbs.so.1 (libc6,AArch64) => /lib/aarch64-linux-gnu/libibverbs.so.1
	libibverbs.so (libc6,AArch64) => /lib/aarch64-linux-gnu/libibverbs.so

## DECISION: uprobe (libibverbs ibv_poll_cq uretprobe for WC status; async-event fd for QP/port events). Kernel tracepoints are control-plane only; D3 sysfs poller is the independent measured-counter core.
