Skip to content

feat: add pg_egress_collect service #486

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions ansible/files/admin_api_scripts/pg_egress_collect.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#!/usr/bin/env perl

# This script receive tcpdump output through STDIN and does:
#
# 1. extract outgoing TCP packet length on all devices port 5432 and 6543
# 2. sum the length up to one minute
# 3. save the total length to file (default is /tmp/pg_egress_collect.txt) per minute
#
# Usage:
#
# tcpdump -s 128 -Q out -i any -nn -tt -vv -p -l 'tcp and (port 5432 or port 6543)' | perl pg_egress_collect.pl -o /tmp/output.txt
#

use POSIX;
use List::Util qw(sum);
use Getopt::Long 'HelpMessage';
use IO::Async::Loop;
use IO::Async::Stream;
use IO::Async::Timer::Periodic;

use strict;
use warnings;

# total captured packets lenth in a time frame
my $captured_len = 0;

# extract tcp packet length captured by tcpdump
#
# Sample input lines:
#
# 1674013833.940253 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
# 10.112.101.122.5432 > 220.235.16.223.62599: Flags [S.], cksum 0x5de3 (incorrect -> 0x63da), seq 2314200657, ack 2071735457, win 62643, options [mss 8961,sackOK,TS val 3358598837 ecr 1277499190,nop,wscale 7], length 0
# 1674013833.989257 IP (tos 0x0, ttl 64, id 24975, offset 0, flags [DF], proto TCP (6), length 52)
# 10.112.101.122.5432 > 220.235.16.223.62599: Flags [.], cksum 0x5ddb (incorrect -> 0xa25b), seq 1, ack 9, win 490, options [nop,nop,TS val 3358598885 ecr 1277499232], length 0
sub extract_packet_length {
my ($line) = @_;

#print("debug: >> " . $line);

if ($line =~ /^\s+\d+\.\d+\.\d+\.\d+\..*, length (\d+)$/) {
# extract tcp packet length and add it up
my $len = $1;
$captured_len += $len;
}
}

# write total length to file
sub write_file {
my ($output) = @_;

my $now = strftime "%F %T", localtime time;
print "[$now] write captured len $captured_len to $output\n";

open(my $fh, "+>", $output) or die "Could not open file '$output' $!";
print $fh "$captured_len";
close($fh) or die "Could not write file '$output' $!";
}

# main
sub main {
# get arguments
GetOptions(
"interval:i" => \(my $interval = 60),
"output:s" => \(my $output = "/tmp/pg_egress_collect.txt"),
"help" => sub { HelpMessage(0) },
) or HelpMessage(1);

my $loop = IO::Async::Loop->new;

# tcpdump extractor
my $extractor = IO::Async::Stream->new_for_stdin(
on_read => sub {
my ($self, $buffref, $eof) = @_;

while($$buffref =~ s/^(.*\n)//) {
my $line = $1;
extract_packet_length($line);
}

return 0;
},
);

# schedule file writer per minute
my $writer = IO::Async::Timer::Periodic->new(
interval => $interval,
on_tick => sub {
write_file($output);

# reset total captured length
$captured_len = 0;
},
);
$writer->start;

print "pg_egress_collect started, egress data will be saved to $output at interval $interval seconds.\n";

$loop->add($extractor);
$loop->add($writer);
$loop->run;
}

main();

__END__

=head1 NAME

pg_egress_collect.pl - collect egress from tcpdump output, extract TCP packet length, aggregate in specified interval and write to output file.

=head1 SYNOPSIS

pg_egress_collect.pl [-i interval] [-o output]

Options:

-i, --interval interval
output file write interval, in seconds, default is 60 seconds

-o, --output output
output file path, default is /tmp/pg_egress_collect.txt

-h, --help
print this help message

=cut
13 changes: 13 additions & 0 deletions ansible/files/pg_egress_collect.service.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[Unit]
Description=Postgres Egress Collector

[Service]
Type=simple
ExecStart=/bin/bash -c "tcpdump -s 128 -Q out -i any -nn -tt -vv -p -l 'tcp and (port 5432 or port 6543)' | perl /root/pg_egress_collect.pl"
User=root
Slice=services.slice
Restart=always
RestartSec=3

[Install]
WantedBy=multi-user.target
1 change: 1 addition & 0 deletions ansible/tasks/internal/admin-api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
- { file: "pg_upgrade_initiate.sh" }
- { file: "pg_upgrade_prepare.sh" }
- { file: "pg_upgrade_pgsodium_getkey.sh" }
- { file: "pg_egress_collect.pl" }

- name: give adminapi user permissions
copy:
Expand Down
15 changes: 15 additions & 0 deletions ansible/tasks/internal/pg_egress_collect.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
- name: pg_egress_collect - install tcpdump and perl async lib
apt:
pkg:
- tcpdump
- libio-async-perl

- name: pg_egress_collect - create service file
template:
src: files/pg_egress_collect.service.j2
dest: /etc/systemd/system/pg_egress_collect.service

- name: pg_egress_collect - reload systemd
systemd:
daemon_reload: yes

3 changes: 3 additions & 0 deletions ansible/tasks/setup-supabase-internal.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,6 @@

- name: Init nftabless
import_tasks: internal/setup-nftables.yml

- name: Install pg_egress_collect
import_tasks: internal/pg_egress_collect.yml
2 changes: 1 addition & 1 deletion common.vars.pkr.hcl
Original file line number Diff line number Diff line change
@@ -1 +1 @@
postgres-version = "15.1.0.26"
postgres-version = "15.1.0.30"