@@ -296,7 +296,14 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool:
296
296
health_checks = [
297
297
(
298
298
"postgres" ,
299
- lambda h : h .run ("sudo -u postgres /usr/bin/pg_isready -U postgres" ),
299
+ lambda h : (
300
+ # First check if PostgreSQL is running
301
+ h .run ("sudo systemctl is-active postgresql" ),
302
+ # Then check if the socket directory exists and has correct permissions
303
+ h .run ("sudo ls -la /run/postgresql" ),
304
+ # Then try pg_isready
305
+ h .run ("sudo -u postgres /usr/bin/pg_isready -U postgres" )
306
+ ),
300
307
),
301
308
(
302
309
"adminapi" ,
@@ -322,23 +329,56 @@ def is_healthy(host, instance_ip, ssh_identity_file) -> bool:
322
329
323
330
for service , check in health_checks :
324
331
try :
325
- cmd = check (host )
326
- if cmd .failed is True :
327
- logger .warning (f"{ service } not ready" )
328
- logger .error (f"{ service } command failed with rc={ cmd .rc } " )
329
- logger .error (f"{ service } stdout: { cmd .stdout } " )
330
- logger .error (f"{ service } stderr: { cmd .stderr } " )
332
+ if service == "postgres" :
333
+ # For PostgreSQL, we need to check multiple things
334
+ systemd_status , socket_check , pg_isready = check (host )
335
+
336
+ if systemd_status .failed :
337
+ logger .error ("PostgreSQL systemd service is not active" )
338
+ logger .error (f"systemd status: { systemd_status .stdout } " )
339
+ logger .error (f"systemd error: { systemd_status .stderr } " )
340
+
341
+ # Check init script logs
342
+ logger .error ("Init script logs:" )
343
+ host .run ("sudo journalctl -u cloud-init --no-pager" )
344
+
345
+ # Check cloud-init logs
346
+ logger .error ("Cloud-init logs:" )
347
+ host .run ("sudo cat /var/log/cloud-init-output.log" )
348
+
349
+ # Check if init script exists and its contents
350
+ logger .error ("Init script status:" )
351
+ host .run ("ls -la /tmp/init.sh" )
352
+ host .run ("cat /tmp/init.sh" )
353
+
354
+ if socket_check .failed :
355
+ logger .error ("PostgreSQL socket directory check failed" )
356
+ logger .error (f"socket check: { socket_check .stdout } " )
357
+ logger .error (f"socket error: { socket_check .stderr } " )
358
+
359
+ if pg_isready .failed :
360
+ logger .error ("pg_isready check failed" )
361
+ logger .error (f"pg_isready output: { pg_isready .stdout } " )
362
+ logger .error (f"pg_isready error: { pg_isready .stderr } " )
331
363
332
- # For PostgreSQL, also check the logs and systemd status
333
- if service == "postgres" :
334
- logger .error ("PostgreSQL logs:" )
335
- host .run ("sudo cat /var/log/postgresql/postgresql-*.log" )
336
- logger .error ("PostgreSQL systemd status:" )
337
- host .run ("sudo systemctl status postgresql" )
338
- logger .error ("PostgreSQL journal logs:" )
339
- host .run ("sudo journalctl -u postgresql --no-pager" )
364
+ # Check PostgreSQL logs for startup issues
365
+ logger .error ("PostgreSQL logs:" )
366
+ host .run ("sudo cat /var/log/postgresql/postgresql-*.log" )
367
+ logger .error ("PostgreSQL systemd status:" )
368
+ host .run ("sudo systemctl status postgresql" )
369
+ logger .error ("PostgreSQL journal logs:" )
370
+ host .run ("sudo journalctl -u postgresql --no-pager" )
340
371
341
- return False
372
+ if any (cmd .failed for cmd in [systemd_status , socket_check , pg_isready ]):
373
+ return False
374
+ else :
375
+ cmd = check (host )
376
+ if cmd .failed is True :
377
+ logger .warning (f"{ service } not ready" )
378
+ logger .error (f"{ service } command failed with rc={ cmd .rc } " )
379
+ logger .error (f"{ service } stdout: { cmd .stdout } " )
380
+ logger .error (f"{ service } stderr: { cmd .stderr } " )
381
+ return False
342
382
except Exception as e :
343
383
logger .warning (
344
384
f"Connection failed during { service } check, attempting reconnect..."
0 commit comments