1use std::process::Command;
4use std::time::Instant;
5
6use crate::config::{Builder, FleetConfig};
7use crate::error::{NixError, Result};
8
9#[derive(Debug, Clone)]
11pub struct HealthStatus {
12 pub builder: Builder,
14 pub healthy: bool,
16 pub latency_ms: Option<u64>,
18 pub error: Option<String>,
20}
21
22pub fn check_builders(config: &FleetConfig) -> Vec<HealthStatus> {
24 config
26 .builders
27 .values()
28 .map(|builder| check_builder(builder))
29 .collect()
30}
31
32pub fn check_builder(builder: &Builder) -> HealthStatus {
34 let start = Instant::now();
35
36 let ssh_result = Command::new("ssh")
38 .args([
39 "-oStrictHostKeyChecking=no",
40 "-oPasswordAuthentication=no",
41 "-oConnectTimeout=5",
42 "-oBatchMode=yes",
43 "-l",
44 &builder.user,
45 &builder.host,
46 "true",
47 ])
48 .output();
49
50 match ssh_result {
51 Err(e) => HealthStatus {
52 builder: builder.clone(),
53 healthy: false,
54 latency_ms: None,
55 error: Some(format!("SSH exec failed: {}", e)),
56 },
57 Ok(output) if !output.status.success() => {
58 let stderr = String::from_utf8_lossy(&output.stderr);
59 HealthStatus {
60 builder: builder.clone(),
61 healthy: false,
62 latency_ms: None,
63 error: Some(format!("SSH connection failed: {}", stderr.trim())),
64 }
65 }
66 Ok(_) => {
67 let ssh_latency = start.elapsed().as_millis() as u64;
68
69 let store_uri = format!("{}://{}@{}", builder.protocol, builder.user, builder.host);
71 let store_result = Command::new("nix")
72 .args(["store", "ping", "--store", &store_uri])
73 .output();
74
75 match store_result {
76 Err(e) => HealthStatus {
77 builder: builder.clone(),
78 healthy: false,
79 latency_ms: Some(ssh_latency),
80 error: Some(format!("nix store ping failed: {}", e)),
81 },
82 Ok(output) if !output.status.success() => {
83 let stderr = String::from_utf8_lossy(&output.stderr);
84 HealthStatus {
85 builder: builder.clone(),
86 healthy: false,
87 latency_ms: Some(ssh_latency),
88 error: Some(format!("nix store unreachable: {}", stderr.trim())),
89 }
90 }
91 Ok(_) => HealthStatus {
92 builder: builder.clone(),
93 healthy: true,
94 latency_ms: Some(start.elapsed().as_millis() as u64),
95 error: None,
96 },
97 }
98 }
99 }
100}
101
102pub fn healthy_builders(statuses: &[HealthStatus]) -> Vec<&HealthStatus> {
104 let mut healthy: Vec<_> = statuses.iter().filter(|s| s.healthy).collect();
105 healthy.sort_by(|a, b| b.builder.speed_factor.cmp(&a.builder.speed_factor));
106 healthy
107}
108
109pub fn warm_connections(builders: &[&HealthStatus], control_path: &str) -> Result<()> {
111 for status in builders {
112 let b = &status.builder;
113 let output = Command::new("ssh")
114 .args([
115 "-oStrictHostKeyChecking=no",
116 "-oPasswordAuthentication=no",
117 "-oControlMaster=auto",
118 &format!("-oControlPath={}", control_path),
119 "-oControlPersist=10m",
120 "-oBatchMode=yes",
121 "-fN", "-l",
123 &b.user,
124 &b.host,
125 ])
126 .output()
127 .map_err(|e| NixError::SshFailed {
128 host: b.host.clone(),
129 message: format!("failed to warm connection: {}", e),
130 })?;
131
132 if !output.status.success() {
133 eprintln!(
135 "warning: failed to warm SSH connection to {}: {}",
136 b.host,
137 String::from_utf8_lossy(&output.stderr).trim()
138 );
139 }
140 }
141
142 Ok(())
143}