All files / src/utils graphAlgorithms.ts

93.13% Statements 190/204
70.24% Branches 85/121
100% Functions 22/22
99.39% Lines 165/166

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441                                                                                            80x 421x 80x 658x 657x 657x 657x 657x 658x 657x 657x     80x                                 8711x 8617x                   25x 25x 31x 31x 31x 31x 45x 26x 26x       25x       16x 16x 16x 16x 20x 19x 19x     16x 25x 25x   16x                     63x 348x 63x 597x 597x 597x   63x                                                                       191x 191x 191x 191x 2041x 2041x 2041x   191x 191x 191x       1013x 2776x 2776x 2776x 2697x   1013x       822x 822x 822x 822x       10x 10x                   6174x 6174x 6174x 6174x 6174x 5352x       191x 191x 1013x 1013x 1013x 1013x 1013x       1013x 1013x 6174x 6174x 6174x           1013x 1013x 1013x 1013x 832x 832x                   191x 2041x 191x 1013x 1013x 1013x 1013x               35x 35x 35x 191x   35x 191x 191x 191x     35x 35x       32x 185x   35x                                                               70x     70x 390x 390x 344x 344x 344x 133x 133x     70x         35x 195x 35x               35x 35x 35x 195x     35x 35x 35x 70x 35x 35x 35x 35x   35x         344x 344x   344x 344x 629x 351x 351x     344x               344x 344x 1268x 1268x 1268x   344x                                           32x 301x 32x 31x 31x   31x 301x 297x         31x 31x 175x 175x 175x 1977x 907x 1977x     31x    
/**
 * @fileoverview Graph algorithms for MEP relationship-network OSINT.
 *
 * Pure, deterministic helpers consumed by `network_analysis`:
 * - {@link buildAdjacency} — adjacency map from a weighted edge list.
 * - {@link bfsLimited} — depth-bounded BFS ego-network extraction.
 * - {@link weightedDegree} — weighted-degree centrality per node.
 * - {@link betweennessCentrality} — Brandes' algorithm (weighted, Dijkstra-based, O(V²·log V) with the current array-backed priority queue).
 * - {@link labelPropagation} — deterministic asynchronous label-propagation community detection.
 * - {@link modularity} — Newman's modularity Q for a partition.
 *
 * Algorithms are deterministic — every iteration orders nodes/edges by string
 * comparison so identical input always produces identical output, satisfying
 * the OSINT reproducibility requirement.
 *
 * ISMS Policy: SC-002 (Input Validation), AC-003 (Least Privilege).
 *
 * @module utils/graphAlgorithms
 * @since 1.4.0
 */
 
/** Simple weighted undirected edge representation. */
export interface WeightedEdge {
  /** Stable identifier of the first endpoint. */
  sourceId: string;
  /** Stable identifier of the second endpoint. */
  targetId: string;
  /** Edge weight in `[0, 1]`. */
  weight: number;
}
 
/** Adjacency representation: `adj.get(node)` → neighbour → weight. */
export type AdjacencyMap = Map<string, Map<string, number>>;
 
/**
 * Build an undirected adjacency map from a weighted edge list.
 *
 * Self-loops are dropped. Parallel edges between the same pair are merged
 * by taking the **maximum** weight (so combined networks remain
 * well-defined when both committee and voting edges describe the same pair).
 *
 * @param nodeIds - Full node universe; nodes without edges still appear as empty maps.
 * @param edges - Weighted edges (order-independent).
 * @returns Adjacency map keyed by node id.
 */
export function buildAdjacency(nodeIds: readonly string[], edges: readonly WeightedEdge[]): AdjacencyMap {
  const adj: AdjacencyMap = new Map();
  for (const id of nodeIds) adj.set(id, new Map());
  for (const e of edges) {
    if (e.sourceId === e.targetId) continue;
    const a = adj.get(e.sourceId);
    const b = adj.get(e.targetId);
    Iif (a === undefined || b === undefined) continue;
    const existing = a.get(e.targetId) ?? 0;
    if (e.weight > existing) {
      a.set(e.targetId, e.weight);
      b.set(e.sourceId, e.weight);
    }
  }
  return adj;
}
 
/**
 * Breadth-first traversal capped at `maxDepth` hops from one or more seeds.
 *
 * Returns the set of node ids reachable within `maxDepth` hops (inclusive).
 * `maxDepth=1` returns only the seeds plus their direct neighbours;
 * `maxDepth=0` returns only the seeds themselves.
 *
 * @param adj - Adjacency map produced by {@link buildAdjacency}.
 * @param seeds - One or more starting node ids.
 * @param maxDepth - Non-negative hop limit.
 * @returns Set of reachable node ids.
 */
/** Lex-compare helper for deterministic ordering. */
function lexCompare(a: string, b: string): number {
  if (a < b) return -1;
  Eif (a > b) return 1;
  return 0;
}
 
/** Expand one BFS frontier into the next within an adjacency map. */
function expandFrontier(
  adj: AdjacencyMap,
  frontier: readonly string[],
  visited: Set<string>
): string[] {
  const next: string[] = [];
  for (const node of frontier) {
    const neighbours = adj.get(node);
    Iif (neighbours === undefined) continue;
    const sortedNeighbours = [...neighbours.keys()].sort(lexCompare);
    for (const n of sortedNeighbours) {
      if (!visited.has(n)) {
        visited.add(n);
        next.push(n);
      }
    }
  }
  return next;
}
 
export function bfsLimited(adj: AdjacencyMap, seeds: readonly string[], maxDepth: number): Set<string> {
  const visited = new Set<string>();
  Iif (maxDepth < 0) return visited;
  let frontier: string[] = [];
  for (const s of seeds) {
    if (adj.has(s) && !visited.has(s)) {
      visited.add(s);
      frontier.push(s);
    }
  }
  for (let depth = 0; depth < maxDepth && frontier.length > 0; depth++) {
    frontier.sort(lexCompare);
    frontier = expandFrontier(adj, frontier, visited);
  }
  return visited;
}
 
/**
 * Weighted-degree centrality: sum of incident edge weights per node.
 *
 * @param nodeIds - Node universe (so isolates return 0 rather than be omitted).
 * @param edges - Weighted edges.
 * @returns Map from node id → summed incident weight.
 */
export function weightedDegree(nodeIds: readonly string[], edges: readonly WeightedEdge[]): Map<string, number> {
  const out = new Map<string, number>();
  for (const id of nodeIds) out.set(id, 0);
  for (const e of edges) {
    Iif (e.sourceId === e.targetId) continue;
    Eif (out.has(e.sourceId)) out.set(e.sourceId, (out.get(e.sourceId) ?? 0) + e.weight);
    Eif (out.has(e.targetId)) out.set(e.targetId, (out.get(e.targetId) ?? 0) + e.weight);
  }
  return out;
}
 
/**
 * Brandes' betweenness centrality on a weighted undirected graph.
 *
 * Implements Dijkstra-based shortest-path counting from each source as
 * described in U. Brandes, *A Faster Algorithm for Betweenness Centrality*
 * (2001). Edge weights are interpreted as **similarity** — internally
 * converted to distance `1 / weight` so high-weight edges are "shorter"
 * and therefore preferred routes for brokers.
 *
 * Complexity: `O(V² · log V)` per source with the current array-backed
 * priority queue (each `popClosest` performs a full `Array.prototype.sort`
 * and `relaxShorter` uses `Array.prototype.includes`). A binary-heap
 * priority queue would lower this to the textbook `O(V · (V + E) · log V)`
 * bound but is not required for the small graphs this module is designed
 * for (V is capped at a few hundred nodes by the calling tool).
 *
 * The returned scores are normalised by `1 / ((V-1)(V-2))` for V≥3 (undirected
 * graph — each unordered pair is traversed twice during Brandes) so they
 * land in `[0, 1]`. For V<3, raw scores (all zero) are returned.
 *
 * @param nodeIds - Node universe (deterministic order is enforced internally).
 * @param edges - Weighted edges (similarity in `(0, 1]`).
 * @returns Map from node id → normalised betweenness in `[0, 1]`.
 */
/** Brandes per-source Dijkstra state. */
interface BrandesState {
  stack: string[];
  pred: Map<string, string[]>;
  sigma: Map<string, number>;
  dist: Map<string, number>;
}
 
function initBrandesState(nodes: readonly string[], source: string): BrandesState {
  const pred = new Map<string, string[]>();
  const sigma = new Map<string, number>();
  const dist = new Map<string, number>();
  for (const v of nodes) {
    pred.set(v, []);
    sigma.set(v, 0);
    dist.set(v, Number.POSITIVE_INFINITY);
  }
  sigma.set(source, 1);
  dist.set(source, 0);
  return { stack: [], pred, sigma, dist };
}
 
function popClosest(queue: string[], dist: Map<string, number>): string | undefined {
  queue.sort((a, b) => {
    const da = dist.get(a) ?? Number.POSITIVE_INFINITY;
    const db = dist.get(b) ?? Number.POSITIVE_INFINITY;
    if (da !== db) return da - db;
    return lexCompare(a, b);
  });
  return queue.shift();
}
 
function relaxShorter(state: BrandesState, queue: string[], v: string, w: string, alt: number): void {
  state.dist.set(w, alt);
  state.sigma.set(w, state.sigma.get(v) ?? 0);
  state.pred.set(w, [v]);
  Eif (!queue.includes(w)) queue.push(w);
}
 
function relaxEqual(state: BrandesState, v: string, w: string): void {
  state.sigma.set(w, (state.sigma.get(w) ?? 0) + (state.sigma.get(v) ?? 0));
  state.pred.get(w)?.push(v);
}
 
function relaxEdge(
  state: BrandesState,
  queue: string[],
  v: string,
  w: string,
  weight: number
): void {
  Iif (weight <= 0) return;
  const dv = state.dist.get(v) ?? Number.POSITIVE_INFINITY;
  const alt = dv + 1 / weight;
  const dw = state.dist.get(w) ?? Number.POSITIVE_INFINITY;
  if (alt < dw - 1e-12) relaxShorter(state, queue, v, w, alt);
  else if (Math.abs(alt - dw) < 1e-12) relaxEqual(state, v, w);
}
 
function runDijkstra(adj: AdjacencyMap, state: BrandesState, source: string): void {
  const queue: string[] = [source];
  while (queue.length > 0) {
    const v = popClosest(queue, state.dist);
    Iif (v === undefined) break;
    state.stack.push(v);
    const neighbours = adj.get(v);
    Iif (neighbours === undefined) continue;
    // Iterate neighbours in lex-sorted order so tie-breaking is fully
    // deterministic regardless of the underlying Map insertion order
    // (which depends on the input edge-list ordering).
    const sortedNeighbours = [...neighbours.keys()].sort(lexCompare);
    for (const w of sortedNeighbours) {
      const weight = neighbours.get(w);
      Iif (weight === undefined) continue;
      relaxEdge(state, queue, v, w, weight);
    }
  }
}
 
function propagateDelta(state: BrandesState, w: string, delta: Map<string, number>): void {
  const sw = state.sigma.get(w) ?? 0;
  const dw = delta.get(w) ?? 0;
  Iif (sw <= 0) return;
  for (const v of state.pred.get(w) ?? []) {
    const sv = state.sigma.get(v) ?? 0;
    delta.set(v, (delta.get(v) ?? 0) + (sv / sw) * (1 + dw));
  }
}
 
function accumulateBetweenness(
  state: BrandesState,
  source: string,
  nodes: readonly string[],
  cb: Map<string, number>
): void {
  const delta = new Map<string, number>();
  for (const v of nodes) delta.set(v, 0);
  while (state.stack.length > 0) {
    const w = state.stack.pop();
    Iif (w === undefined) break;
    propagateDelta(state, w, delta);
    if (w !== source) cb.set(w, (cb.get(w) ?? 0) + (delta.get(w) ?? 0));
  }
}
 
export function betweennessCentrality(
  nodeIds: readonly string[],
  edges: readonly WeightedEdge[]
): Map<string, number> {
  const sortedNodes = [...nodeIds].sort(lexCompare);
  const adj = buildAdjacency(sortedNodes, edges);
  const cb = new Map<string, number>();
  for (const v of sortedNodes) cb.set(v, 0);
 
  for (const s of sortedNodes) {
    const state = initBrandesState(sortedNodes, s);
    runDijkstra(adj, state, s);
    accumulateBetweenness(state, s, sortedNodes, cb);
  }
 
  const n = sortedNodes.length;
  if (n >= 3) {
    // Undirected graph: each unordered pair is traversed twice by Brandes
    // (once from each endpoint as source), so the normalisation factor is
    // 1 / ((n-1)(n-2)) rather than the directed 2 / ((n-1)(n-2)).
    const norm = 1 / ((n - 1) * (n - 2));
    for (const [k, v] of cb) cb.set(k, Math.round(v * norm * 10000) / 10000);
  }
  return cb;
}
 
/**
 * Deterministic asynchronous label propagation for community detection.
 *
 * Each node is initialised with its own label (its id). On every iteration
 * each node — visited in sorted id order — adopts the label that maximises
 * the **sum of edge weights** to neighbours sharing that label, with ties
 * broken by lexicographic label order. Labels are updated **in place** so
 * downstream nodes in the same sweep already see their predecessors'
 * decisions. This asynchronous variant avoids the oscillation that pure
 * synchronous label propagation exhibits on symmetric / bipartite graphs.
 *
 * As an additional safety bound the algorithm fingerprints the full label
 * vector after each sweep and stops if either:
 * - no label changed during the sweep (true convergence), or
 * - the fingerprint matches the previous-previous sweep (2-cycle).
 *
 * Sorted-order traversal plus deterministic tie-breaking guarantees
 * reproducible clusters across runs.
 *
 * @param nodeIds - Node universe.
 * @param edges - Weighted edges.
 * @param maxIterations - Safety bound (default 30, typically converges in <10).
 * @returns Map from node id → community label.
 */
function runLabelIteration(
  sorted: readonly string[],
  adj: AdjacencyMap,
  labels: Map<string, string>
): boolean {
  let changed = false;
  // Asynchronous: update labels in place so each node in `sorted` sees the
  // most recent labels assigned earlier in the same sweep.
  for (const node of sorted) {
    const neighbours = adj.get(node);
    if (neighbours === undefined || neighbours.size === 0) continue;
    const score = aggregateNeighbourLabels(neighbours, labels);
    const bestLabel = pickBestLabel(score);
    if (bestLabel !== undefined && bestLabel !== labels.get(node)) {
      labels.set(node, bestLabel);
      changed = true;
    }
  }
  return changed;
}
 
/** Build a deterministic fingerprint of the current label vector. */
function fingerprintLabels(sorted: readonly string[], labels: ReadonlyMap<string, string>): string {
  const parts: string[] = [];
  for (const id of sorted) parts.push(`${id}=${labels.get(id) ?? ''}`);
  return parts.join('|');
}
 
export function labelPropagation(
  nodeIds: readonly string[],
  edges: readonly WeightedEdge[],
  maxIterations = 30
): Map<string, string> {
  const sorted = [...nodeIds].sort(lexCompare);
  const adj = buildAdjacency(sorted, edges);
  const labels = new Map<string, string>();
  for (const id of sorted) labels.set(id, id);
  // Track the previous two fingerprints to detect 2-cycles (label vector
  // alternating between two states without converging).
  let prevPrint = '';
  let prevPrevPrint = '';
  for (let iter = 0; iter < maxIterations; iter++) {
    if (!runLabelIteration(sorted, adj, labels)) break;
    const print = fingerprintLabels(sorted, labels);
    Iif (print === prevPrevPrint) break;
    prevPrevPrint = prevPrint;
    prevPrint = print;
  }
  return labels;
}
 
/** Pick the label with the highest aggregated weight; ties broken lex-smallest. */
function pickBestLabel(score: Map<string, number>): string | undefined {
  Iif (score.size === 0) return undefined;
  const sortedScores = [...score.entries()].sort((a, b) => lexCompare(a[0], b[0]));
  let bestLabel: string | undefined;
  let bestScore = -Infinity;
  for (const [lab, s] of sortedScores) {
    if (s > bestScore + 1e-12) {
      bestScore = s;
      bestLabel = lab;
    }
  }
  return bestLabel;
}
 
/** Aggregate weighted neighbour labels for a single node during label propagation. */
function aggregateNeighbourLabels(
  neighbours: Map<string, number>,
  labels: Map<string, string>
): Map<string, number> {
  const score = new Map<string, number>();
  for (const [n, w] of neighbours) {
    const lab = labels.get(n);
    Iif (lab === undefined) continue;
    score.set(lab, (score.get(lab) ?? 0) + w);
  }
  return score;
}
 
/**
 * Newman's modularity Q for a weighted partition.
 *
 * `Q = (1 / 2m) Σ_{ij} [A_ij - k_i k_j / 2m] δ(c_i, c_j)`
 *
 * where `A_ij` is edge weight, `k_i` weighted degree, and `δ` the Kronecker
 * indicator for same-community. Q ∈ `[-0.5, 1]`; values >0.3 indicate
 * meaningful community structure.
 *
 * @param nodeIds - Node universe.
 * @param edges - Weighted edges.
 * @param labels - Community label per node (from {@link labelPropagation}).
 * @returns Modularity Q rounded to 4 decimal places (`0` when the graph is empty).
 */
export function modularity(
  nodeIds: readonly string[],
  edges: readonly WeightedEdge[],
  labels: Map<string, string>
): number {
  let m2 = 0;
  for (const e of edges) m2 += 2 * e.weight;
  if (m2 === 0) return 0;
  const deg = weightedDegree(nodeIds, edges);
  let q = 0;
  // Edge contributions (A_ij).
  for (const e of edges) {
    if (labels.get(e.sourceId) === labels.get(e.targetId)) {
      q += 2 * e.weight; // undirected pair counted both directions
    }
  }
  // Expected-degree subtraction over all same-community pairs (including self).
  // Iterate in sorted order for deterministic floating-point accumulation.
  const sortedNodeIds = [...nodeIds].sort();
  for (const i of sortedNodeIds) {
    const li = labels.get(i);
    const ki = deg.get(i) ?? 0;
    for (const j of sortedNodeIds) {
      if (labels.get(j) !== li) continue;
      const kj = deg.get(j) ?? 0;
      q -= (ki * kj) / m2;
    }
  }
  return Math.round((q / m2) * 10000) / 10000;
}