@@ -348,171 +348,41 @@ def gzip_then_base64_encode(s: str) -> str:
348
348
349
349
def is_healthy (ssh ) -> bool :
350
350
health_checks = [
351
- < << << << HEAD
352
351
("postgres" , "sudo -u postgres /usr/bin/pg_isready -U postgres" ),
353
352
("adminapi" , f"curl -sf -k --connect-timeout 30 --max-time 60 https://localhost:8085/health -H 'apikey: { supabase_admin_key } '" ),
354
353
("postgrest" , "curl -sf --connect-timeout 30 --max-time 60 http://localhost:3001/ready" ),
355
354
("gotrue" , "curl -sf --connect-timeout 30 --max-time 60 http://localhost:8081/health" ),
356
355
("kong" , "sudo kong health" ),
357
356
("fail2ban" , "sudo fail2ban-client status" ),
358
- == == == =
359
- (
360
- "postgres" ,
361
- lambda h : (
362
- # First check if PostgreSQL is running
363
- h .run ("sudo systemctl is-active postgresql" ),
364
- # Then check if the socket directory exists and has correct permissions
365
- h .run ("sudo ls -la /run/postgresql" ),
366
- # Then try pg_isready
367
- h .run ("sudo -u postgres /usr/bin/pg_isready -U postgres" )
368
- ),
369
- ),
370
- (
371
- "adminapi" ,
372
- lambda h : h .run (
373
- f"curl -sf -k --connect-timeout 30 --max-time 60 https://localhost:8085/health -H 'apikey: { supabase_admin_key } '"
374
- ),
375
- ),
376
- (
377
- "postgrest" ,
378
- lambda h : h .run (
379
- "curl -sf --connect-timeout 30 --max-time 60 http://localhost:3001/ready"
380
- ),
381
- ),
382
- (
383
- "gotrue" ,
384
- lambda h : h .run (
385
- "curl -sf --connect-timeout 30 --max-time 60 http://localhost:8081/health"
386
- ),
387
- ),
388
- ("kong" , lambda h : h .run ("sudo kong health" )),
389
- ("fail2ban" , lambda h : h .run ("sudo fail2ban-client status" )),
390
- > >> >> >> 2 bd7b6d9 (test : more logging for healthcheck )
391
357
]
392
358
393
359
for service , command in health_checks :
394
360
try :
395
- << < << < < HEAD
396
361
result = run_ssh_command (ssh , command )
397
362
if not result ['succeeded' ]:
398
363
logger .warning (f"{ service } not ready" )
399
364
logger .error (f"{ service } command failed with rc={ cmd .rc } " )
400
365
logger .error (f"{ service } stdout: { cmd .stdout } " )
401
366
logger .error (f"{ service } stderr: { cmd .stderr } " )
402
- == == == =
403
- if service == "postgres" :
404
- # For PostgreSQL, we need to check multiple things
405
- systemd_status , socket_check , pg_isready = check (host )
406
- >> >> >> > 2 bd7b6d9 (test : more logging for healthcheck )
407
367
408
- # Log Nix profile setup checks
409
- logger .info ("Checking Nix profile setup:" )
410
- nix_profile_result = host .run ("ls -la /home/postgres/.nix-profile" )
411
- logger .info (f"Nix profile directory:\n { nix_profile_result .stdout } \n { nix_profile_result .stderr } " )
412
-
413
- nix_bin_result = host .run ("ls -la /home/postgres/.nix-profile/bin" )
414
- logger .info (f"Nix profile bin directory:\n { nix_bin_result .stdout } \n { nix_bin_result .stderr } " )
415
-
416
- nix_script_result = host .run ("test -x /home/postgres/.nix-profile/bin/switch_pg_cron_version" )
417
- logger .info (f"Switch script executable check: { 'success' if not nix_script_result .failed else 'failed' } " )
418
-
419
- nix_script_output = host .run ("/home/postgres/.nix-profile/bin/switch_pg_cron_version" )
420
- logger .info (f"Switch script output:\n { nix_script_output .stdout } \n { nix_script_output .stderr } " )
421
-
422
368
if systemd_status .failed :
423
369
logger .error ("PostgreSQL systemd service is not active" )
424
370
logger .error (f"systemd status: { systemd_status .stdout } " )
425
371
logger .error (f"systemd error: { systemd_status .stderr } " )
426
-
427
- # Check systemd service unit file
428
- logger .error ("PostgreSQL systemd service unit file:" )
429
- result = host .run ("sudo systemctl cat postgresql" )
430
- logger .error (f"service unit file:\n { result .stdout } \n { result .stderr } " )
431
-
432
- # Check systemd service environment
433
- logger .error ("PostgreSQL systemd service environment:" )
434
- result = host .run ("sudo systemctl show postgresql" )
435
- logger .error (f"service environment:\n { result .stdout } \n { result .stderr } " )
436
-
437
- # Check systemd service dependencies
438
- logger .error ("PostgreSQL systemd service dependencies:" )
439
- result = host .run ("sudo systemctl list-dependencies postgresql" )
440
- logger .error (f"service dependencies:\n { result .stdout } \n { result .stderr } " )
441
-
442
- # Check if service is enabled
443
- logger .error ("PostgreSQL service enabled status:" )
444
- result = host .run ("sudo systemctl is-enabled postgresql" )
445
- logger .error (f"service enabled status:\n { result .stdout } \n { result .stderr } " )
446
-
447
- # Check systemd journal for service execution logs
448
- logger .error ("Systemd journal entries for PostgreSQL service execution:" )
449
- result = host .run ("sudo journalctl -u postgresql -n 100 --no-pager" )
450
- logger .error (f"systemd journal:\n { result .stdout } \n { result .stderr } " )
451
-
452
- # Check systemd journal specifically for ExecStartPre and ExecStart
453
- logger .error ("Systemd journal entries for ExecStartPre and ExecStart:" )
454
- result = host .run ("sudo journalctl -u postgresql -n 100 --no-pager | grep -E 'ExecStartPre|ExecStart'" )
455
- logger .error (f"execution logs:\n { result .stdout } \n { result .stderr } " )
456
-
457
- # Check systemd journal for any errors
458
- logger .error ("Systemd journal entries with error level:" )
459
- result = host .run ("sudo journalctl -u postgresql -n 100 --no-pager -p err" )
460
- logger .error (f"error logs:\n { result .stdout } \n { result .stderr } " )
461
-
462
- # Check pre-start script output
463
- logger .error ("Checking pre-start script output:" )
464
- result = host .run ("sudo -u postgres /usr/local/bin/postgres_prestart.sh" )
465
- logger .error (f"pre-start script output:\n { result .stdout } \n { result .stderr } " )
466
372
467
- # Check PostgreSQL logs directory
468
- logger .error ("Checking PostgreSQL logs directory:" )
469
- result = host .run ("sudo ls -la /var/log/postgresql/" )
470
- logger .error (f"log directory contents:\n { result .stdout } \n { result .stderr } " )
471
-
472
- # Check any existing PostgreSQL logs
473
- logger .error ("Checking existing PostgreSQL logs:" )
474
- result = host .run ("sudo cat /var/log/postgresql/*.log" )
475
- logger .error (f"postgresql logs:\n { result .stdout } \n { result .stderr } " )
476
-
477
- # Try starting PostgreSQL directly with pg_ctl and capture output
478
- logger .error ("Attempting to start PostgreSQL directly with pg_ctl:" )
479
- startup_log = "/tmp/postgres-start.log"
480
- result = host .run (f"sudo -u postgres /usr/lib/postgresql/bin/pg_ctl -D /var/lib/postgresql/data start -l { startup_log } " )
481
- logger .error (f"pg_ctl start attempt:\n { result .stdout } \n { result .stderr } " )
482
-
483
- # Check the startup log
484
- logger .error ("PostgreSQL startup log:" )
485
- result = host .run (f"sudo cat { startup_log } " )
486
- logger .error (f"startup log contents:\n { result .stdout } \n { result .stderr } " )
487
-
488
- # Clean up the startup log
489
- result = host .run (f"sudo rm -f { startup_log } " )
490
-
491
- # Check PostgreSQL configuration
492
- logger .error ("PostgreSQL configuration:" )
493
- result = host .run ("sudo cat /etc/postgresql/postgresql.conf" )
494
- logger .error (f"postgresql.conf:\n { result .stdout } \n { result .stderr } " )
495
-
496
- # Check PostgreSQL authentication configuration
497
- logger .error ("PostgreSQL authentication configuration:" )
498
- result = host .run ("sudo cat /etc/postgresql/pg_hba.conf" )
499
- logger .error (f"pg_hba.conf:\n { result .stdout } \n { result .stderr } " )
500
-
501
- # Check PostgreSQL environment
502
- logger .error ("PostgreSQL environment:" )
503
- result = host .run ("sudo -u postgres env | grep POSTGRES" )
504
- logger .error (f"postgres environment:\n { result .stdout } \n { result .stderr } " )
373
+ # Run detailed checks since we know we have a working connection
374
+ run_detailed_checks (host )
505
375
506
376
if any (cmd .failed for cmd in [systemd_status , socket_check , pg_isready ]):
507
377
return False
508
- else :
509
- cmd = check (host )
510
- if cmd .failed is True :
511
- logger .warning (f"{ service } not ready" )
512
- logger .error (f"{ service } command failed with rc={ cmd .rc } " )
513
- logger .error (f"{ service } stdout: { cmd .stdout } " )
514
- logger .error (f"{ service } stderr: { cmd .stderr } " )
515
- return False
378
+ else :
379
+ cmd = check (host )
380
+ if cmd .failed is True :
381
+ logger .warning (f"{ service } not ready" )
382
+ logger .error (f"{ service } command failed with rc={ cmd .rc } " )
383
+ logger .error (f"{ service } stdout: { cmd .stdout } " )
384
+ logger .error (f"{ service } stderr: { cmd .stderr } " )
385
+ return False
516
386
except Exception as e :
517
387
logger .warning (
518
388
f"Connection failed during { service } check, attempting reconnect..."
0 commit comments