Unverified Commit 5b6711d7 authored by Gavin Brown's avatar Gavin Brown
Browse files

deprecate StatsFile

parent 86484270
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
use Config::Simple; use Config::Simple;
use DBI; use DBI;
use Getopt::Long; use Getopt::Long;
use IO::File;
use Net::DNS; use Net::DNS;
use POSIX qw(setsid strftime floor); use POSIX qw(setsid strftime floor);
use Pod::Usage; use Pod::Usage;
...@@ -278,21 +279,21 @@ sub load_config { ...@@ -278,21 +279,21 @@ sub load_config {
# rdnsd received a SIGHUP: # rdnsd received a SIGHUP:
# #
$debug = $opts->{'debug'} || $config->param('Debug') eq 'true' || undef; $debug = $opts->{'debug'} || $config->param('Debug') eq 'true' || undef;
$loop = $opts->{'loop'} || $config->param('Loop') || 3; $multithreaded = $opts->{'threads'} || $config->param('MultiThreaded') eq 'true' || 1;
$pidfile = $opts->{'pidfile'} || $config->param('PidFile') || '/var/run/rdnsd/rdnsd.pid'; $pidfile = $opts->{'pidfile'} || $config->param('PidFile') || '/var/run/rdnsd/rdnsd.pid';
$database = $opts->{'database'} || $config->param('Database') || '/var/run/rdnsd/rdnsd.sqlite';
$percentile = $opts->{'percentile'} || $config->param('Percentile') || undef;
$family = $opts->{'family'} || $config->param('AddressFamily') || 4; $family = $opts->{'family'} || $config->param('AddressFamily') || 4;
$proto = $opts->{'proto'} || $config->param('Protocol') || 'udp'; $proto = $opts->{'proto'} || $config->param('Protocol') || 'udp';
$question = $opts->{'question'} || $config->param('Question') || '. A IN'; $loop = $opts->{'loop'} || $config->param('Loop') || 3;
$timeout = $opts->{'timeout'} || $config->param('Timeout') || 1; $timeout = $opts->{'timeout'} || $config->param('Timeout') || 1;
$recurse = $opts->{'recurse'} || $config->param('Recurse') eq 'true' || undef; $recurse = $opts->{'recurse'} || $config->param('Recurse') eq 'true' || undef;
$statsfile = $opts->{'statsfile'} || $config->param('StatsFile') || '/var/run/rdnsd/rdnsd.log'; $question = $opts->{'question'} || $config->param('Question') || '. A IN';
$servers = $opts->{'servers'} || $config->param('Servers') || undef; $servers = $opts->{'servers'} || $config->param('Servers') || undef;
$domains = $opts->{'domains'} || $config->param('Domains') || undef; $domains = $opts->{'domains'} || $config->param('Domains') || undef;
$percentile = $opts->{'percentile'} || $config->param('Percentile') || undef;
$optimistic = $opts->{'optimistic'} || $config->param('Optimistic') eq 'true' || undef; $optimistic = $opts->{'optimistic'} || $config->param('Optimistic') eq 'true' || undef;
$update = $opts->{'update'} || $config->param('UpdateInterval') || 290; $update = $opts->{'update'} || $config->param('UpdateInterval') || 290;
$multithreaded = $opts->{'threads'} || $config->param('MultiThreaded') eq 'true' || undef; $statsfile = $opts->{'statsfile'} || $config->param('StatsFile') || undef;
$database = $opts->{'database'} || $config->param('Database') || undef;
# #
# configure question packet # configure question packet
...@@ -346,101 +347,98 @@ sub update_serverlist { ...@@ -346,101 +347,98 @@ sub update_serverlist {
} }
sub update_stats { sub update_stats {
if (!open(STATSFILE, '>'.$statsfile)) { my $fh;
debug("error opening '%s': %s", $statsfile, $!); $fh = IO::File->new($statsfile, 'w') if ($statsfile);
# if ($database) {
# try again in 30s if (!$dbh || $dbh->ping) {
# $dbh = DBI->connect('dbi:SQLite:dbname='.$database, '', '', { 'RaiseError' => 1, 'AutoCommit' => 0 });
$updated = time() - 30; $dbh->do("CREATE TABLE IF NOT EXISTS rdnsd (
id INTEGER NOT NULL PRIMARY KEY,
} else { date DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
if ($database) { host VARCHAR(255) NOT NULL,
if (!$dbh || $dbh->ping) { family INTEGER NOT NULL DEFAULT 4,
$dbh = DBI->connect('dbi:SQLite:dbname='.$database, '', '', { 'RaiseError' => 1, 'AutoCommit' => 0 }); proto CHAR(3) NOT NULL DEFAULT 'udp',
$dbh->do("CREATE TABLE IF NOT EXISTS rdnsd ( rate DECIMAL(3,2) NOT NULL,
id INTEGER NOT NULL PRIMARY KEY, time INTEGER NOT NULL,
date DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, percentile_time INTEGER DEFAULT NULL
host VARCHAR(255) NOT NULL, )");
family INTEGER NOT NULL DEFAULT 4, $dbh->do('CREATE INDEX IF NOT EXISTS host_idx ON rdnsd(host)');
proto CHAR(3) NOT NULL DEFAULT 'udp', $dbh->do('CREATE INDEX IF NOT EXISTS date_idx ON rdnsd(date)');
rate DECIMAL(3,2) NOT NULL, $dbh->do('CREATE INDEX IF NOT EXISTS family_idx ON rdnsd(family)');
time INTEGER NOT NULL, $dbh->do('CREATE INDEX IF NOT EXISTS proto_idx ON rdnsd(proto)');
percentile_time INTEGER DEFAULT NULL
)"); $sth = $dbh->prepare('INSERT INTO rdnsd (host, family, proto, rate, time, percentile_time) VALUES (?, ?, ?, ?, ?, ?)');
$dbh->do('CREATE INDEX IF NOT EXISTS host_idx ON rdnsd(host)');
$dbh->do('CREATE INDEX IF NOT EXISTS date_idx ON rdnsd(date)'); debug('initialised database in %s', $database);
$dbh->do('CREATE INDEX IF NOT EXISTS family_idx ON rdnsd(family)'); }
$dbh->do('CREATE INDEX IF NOT EXISTS proto_idx ON rdnsd(proto)');
$sth = $dbh->prepare('INSERT INTO rdnsd (host, family, proto, rate, time, percentile_time) VALUES (?, ?, ?, ?, ?, ?)');
debug('initialised database in %s', $database);
}
} elsif ($dbh) { } elsif ($dbh) {
$dbh->disconnect; $dbh->disconnect;
undef($dbh); undef($dbh);
undef($sth); undef($sth);
} }
foreach my $ns (sort(@servers)) { foreach my $ns (sort(@servers)) {
my ($rate, $time, $percentile_time); my ($rate, $time, $percentile_time);
if ($stats->{$ns}->{'count'} < 1) { if ($stats->{$ns}->{'count'} < 1) {
$stats->{$ns}->{'time'} = 0; $stats->{$ns}->{'time'} = 0;
if ($optimistic) { if ($optimistic) {
# #
# optimistic, treat server as up # optimistic, treat server as up
# #
$stats->{$ns}->{'count'} = 1; $stats->{$ns}->{'count'} = 1;
$stats->{$ns}->{'success'} = 1; $stats->{$ns}->{'success'} = 1;
} else { } else {
# #
# pessimistic, treat server as down # pessimistic, treat server as down
# #
$stats->{$ns}->{'count'} = 1; $stats->{$ns}->{'count'} = 1;
$stats->{$ns}->{'success'} = 0; $stats->{$ns}->{'success'} = 0;
}
} }
}
$rate = ($stats->{$ns}->{'success'} / $stats->{$ns}->{'count'}); $rate = ($stats->{$ns}->{'success'} / $stats->{$ns}->{'count'});
$time = floor(1000 * $stats->{$ns}->{'time'} / $stats->{$ns}->{'count'}); $time = floor(1000 * $stats->{$ns}->{'time'} / $stats->{$ns}->{'count'});
my $line = sprintf('%s %0.2f %d', $ns, $rate, $time); my $line = sprintf('%s %0.2f %d', $ns, $rate, $time);
if ($percentile) { if ($percentile) {
# sort in ascending order # sort in ascending order
my @times = sort(@{$stats->{$ns}->{'times'}}); my @times = sort(@{$stats->{$ns}->{'times'}});
# find the position which corresponds to the percentile # find the position which corresponds to the percentile
my $pos = floor(scalar(@times) * $percentile / 100) - 1; my $pos = floor(scalar(@times) * $percentile / 100) - 1;
$percentile_time = floor(1000 * $times[$pos]); $percentile_time = floor(1000 * $times[$pos]);
$line .= sprintf(' %d', $percentile_time); $line .= sprintf(' %d', $percentile_time);
} }
print STATSFILE $line."\n"; $fh->print($line."\n") if ($fh);
$sth->execute($ns, $family, $proto, $rate, $time, $percentile_time) if ($sth); $sth->execute($ns, $family, $proto, $rate, $time, $percentile_time) if ($sth);
} }
if ($fh) {
$fh->close;
debug("wrote stats to '%s'", $statsfile); debug("wrote stats to '%s'", $statsfile);
}
$dbh->commit if ($dbh); if ($dbh) {
debug("updated '%s'", $database) if ($database); $dbh->commit;
close(STATSFILE); debug("updated '%s'", $database);
}
if ($fh || $dbh) {
$updated = time(); $updated = time();
$need_update = undef; $need_update = undef;
$stats = {}; $stats = {};
} }
} }
...@@ -506,67 +504,67 @@ of this file. ...@@ -506,67 +504,67 @@ of this file.
=item * C<--debug> =item * C<--debug>
Enable debug mode. Set C<Debug> option.
=item * C<--loop=LOOP> =item * C<--multithreaded>
Set loop duration. Set C<Multithreaded> option.
=item * C<--pidfile=FILE> =item * C<--pidfile=FILE>
Specify pid file. Set C<PidFile> option.
=item * C<--database=FILE>
Set C<Database> option.
=item * C<--percentile=PERCENTILE>
Set C<Percentile> option.
=item * C<--family=(4|6)> =item * C<--family=(4|6)>
Specify IP version. Set C<Family> option.
=item * C<--proto=QUESTION> =item * C<--proto=(udp|tcp)>
Specify protocol. Set C<Protocol> option.
=item * C<--question=QUESTION> =item * C<--loop=LOOP>
Specify question. Set C<Loop> option.
=item * C<--timeout=TIMEOUT> =item * C<--timeout=TIMEOUT>
Specify timeout. Set C<Timeout> option.
=item * C<--recurse> =item * C<--recurse>
Enable recursion. Set C<Recurse> option.
=item * C<--servers=SERVERS> =item * C<--question=QUESTION>
Specify servers to check.
=item * C<--statsfile=FILE>
Specify stats file. Set C<Question> option.
=item * C<--percentile=PERCENTILE> =item * C<--servers=SERVERS>
Specify a percentile to use when generating statistics. Set C<Servers> option.
=item * C<--domains=DOMAINS> =item * C<--domains=DOMAINS>
Specify domain names to query for a list of servers. Set C<Domains> option.
=item * C<--optimistic> =item * C<--optimistic>
Enable Optimistic mode. Set C<Optimistic> option.
=item * C<--update=TIME> =item * C<--update=TIME>
Specify automatic stats update interval. Set C<UpdateInterval> option.
=item * C<--multithreaded>
Run in multithreaded mode.
=item * C<--database=FILE> =item * C<--statsfile=FILE>
Specify SQLite database. Set C<StatsFile> option.
=back =back
...@@ -579,11 +577,11 @@ The format is very simple. Here is an example: ...@@ -579,11 +577,11 @@ The format is very simple. Here is an example:
MultiThreaded true MultiThreaded true
PidFile /var/run/rdnsd/rdnsd.pid PidFile /var/run/rdnsd/rdnsd.pid
Database /var/run/rdnsd/rdnsd.db Database /var/run/rdnsd/rdnsd.db
StatsFile /var/run/rdnsd/rdnsd.log
Percentile 95 Percentile 95
AddressFamily 4 AddressFamily 4
Protocol udp Protocol udp
Loop 3 Loop 3
Timeout 1
Recurse false Recurse false
Question . A IN Question . A IN
Servers ns1.example.com,ns2.example.net Servers ns1.example.com,ns2.example.net
...@@ -599,24 +597,56 @@ configuration file. ...@@ -599,24 +597,56 @@ configuration file.
=item * C<Debug (true|false)> =item * C<Debug (true|false)>
Default: false Default: C<false>
Normally, C<rdnsd> will daemonise once started. If the C<Debug> parameter Normally, C<rdnsd> will daemonise once started. If the C<Debug> parameter
is C<true>, C<rdnsd> will stay in the foreground and spam your terminal is C<true>, C<rdnsd> will stay in the foreground and spam your terminal
with debugging information. with debugging information.
=item * C<Multithreaded (true|false)>
Default: C<true>
This parameter enables multithreaded mode. In this mode, C<rdnsd> will
probe servers in parallel inside separate threads. Otherwise, it probes
them in serial, one after the other. Use of multithreaded mode resolves
some issues with monitoring large numbers of servers, at the cost of
higher CPU load.
=item * C<PidFile /path/to/pid/file> =item * C<PidFile /path/to/pid/file>
Default: /var/run/rdnsd/rdnsd.pid Default: C</var/run/rdnsd/rdnsd.pid>
The file where C<rdnsd> will write its pid. The file where C<rdnsd> will write its pid.
=item * C<StatsFile /path/to/stats/file> =item * C<Database FILE>
Default: /var/run/rdnsd/rdnsd.log Default: C</var/run/rdnsd/rdnsd.sqlite>
The file where C<rdnsd> will write statistics to when signalled. See If set, C<rdnsd> will create an SQLite database at the specified file
L<OBTAINING STATISTICS> for further information. and write statistics to it. The database will contain a single table
named C<rdnsd>, which will contain the following columns:
=over
=item * C<id> - unique row ID
=item * C<date> - date/time the row was inserted
=item * C<host> - hostname
=item * C<family> - IP version (4 or 6)
=item * C<proto> - transport protocol (UDP or TCP)
=item * C<rate> - response rate as a decimal (0.00 - 1.00)
=item * C<time> - average RTT in milliseconds
=item * C<percentile_time> - average RTT in milliseconds at the
configured percentile.
=back
=item * C<Percentile PERCENTILE> =item * C<Percentile PERCENTILE>
...@@ -632,27 +662,34 @@ IPv4. ...@@ -632,27 +662,34 @@ IPv4.
=item * C<Protocol (udp|tcp)> =item * C<Protocol (udp|tcp)>
Default: udp Default: C<udp>
Specify the transport protocol (UDP or TCP) to use. Specify the transport protocol (UDP or TCP) to use.
=item * C<Loop SECONDS> =item * C<Loop SECONDS>
Default: 2 Default: C<3>
This specifies the length of the main loop. If this is set to 2, then This specifies the length of the main loop. If this is set to 2, then
each server will be checked every 2 seconds. This value can be a decimal each server will be checked every 2 seconds. This value can be a decimal
fraction, eg 0.25. fraction, eg 0.25.
=item * C<Timeout SECONDS>
Default: C<1>
This specifies the timeout for DNS queries. A server will be considered
down if it does not respond within this amount of time.
=item * C<Recurse (true|false)> =item * C<Recurse (true|false)>
Default: false Default: C<false>
Enable recursion. Enable recursion.
=item * C<Question QUESTION> =item * C<Question QUESTION>
Default: example.com. IN A Default: C<example.com. IN A>
Specify the DNS question. The format is "QNAME QCLASS QTYPE". Specify the DNS question. The format is "QNAME QCLASS QTYPE".
...@@ -677,7 +714,7 @@ changes, you will need to restart C<rdnsd>. ...@@ -677,7 +714,7 @@ changes, you will need to restart C<rdnsd>.
=item * C<Optimistic (true|false)> =item * C<Optimistic (true|false)>
Default: false Default: C<false>
This parameter controls what happens when C<rdnsd> outputs statistics but This parameter controls what happens when C<rdnsd> outputs statistics but
finds a server in its list that it has not yet had time to send a finds a server in its list that it has not yet had time to send a
...@@ -686,49 +723,19 @@ if false, it will be reported as down. ...@@ -686,49 +723,19 @@ if false, it will be reported as down.
=item * C<UpdateInterval TIME> =item * C<UpdateInterval TIME>
Default: 290 Default: C<293>
This parameter tells C<rdnsd> to automatically update the statistics file This parameter tells C<rdnsd> to automatically update the statistics file
every C<TIME> seconds. every C<TIME> seconds.
=item * C<Multithreaded (true|false)> =item * C<StatsFile /path/to/stats/file>
Default: false
This parameter enables multithreaded mode. In this mode, C<rdnsd> will
probe servers in parallel inside separate threads. Otherwise, it probes
them in serial, one after the other. Use of multithreaded mode resolves
some issues with monitoring large numbers of servers, at the cost of
higher CPU load.
=item * C<Database FILE>
Default: none Default: none
If set, C<rdnsd> will create an SQLite database at the specified file B<Note:> this is a legacy option to provide backwards compatibility.
and write statistics to it. The database will contain a single table
named C<rdnsd>, which will contain the following columns:
=over Th specifies the file where C<rdnsd> will write statistics to when
signalled. See L<OBTAINING STATISTICS> for further information.
=item * C<id> - unique row ID
=item * C<date> - date/time the row was inserted
=item * C<host> - hostname
=item * C<family> - IP version (4 or 6)
=item * C<proto> - transport protocol (UDP or TCP)
=item * C<rate> - response rate as a decimal (0.00 - 1.00)
=item * C<time> - average RTT in milliseconds
=item * C<percentile_time> - average RTT in milliseconds at the
configured percentile.
=back
=back =back
...@@ -743,34 +750,34 @@ new options added to the configuration file. ...@@ -743,34 +750,34 @@ new options added to the configuration file.
=head1 OBTAINING STATISTICS =head1 OBTAINING STATISTICS
Every C<UpdateInterval> seconds, C<rdnsd> will write stats to the file Every C<UpdateInterval> seconds, C<rdnsd> will write stats to the SQLite
specified by C<StatsFile>, and, if set, the SQLite database specified by database specified by C<Database>, and, if set, the file specified by
C<Database>. C<StatsFile>.
The recommended way to obtain statistics is to query the SQLite database
specified by the C<Database> directive.
If C<UpdateInterval> is unset, automatic updates will not occur, so to If C<UpdateInterval> is unset, automatic updates will not occur, so to
get statistics out of C<rdnsd>, you must sending it a C<USR1> signal: get statistics out of C<rdnsd>, you must sending it a C<USR1> signal:
$ kill -USR1 `cat /path/to/pid/file` $ kill -USR1 `cat /path/to/pid/file`
B<NOTE:> if you have C<N> servers and a C<Loop> value of C<M>, you must B<NOTE:> if multithreaded mode is disabled, and you have C<N> servers
be careful not to send the USR1 signal to C<rdnsd> more often than every and a C<Loop> value of C<M>, you must be careful not to send the USR1
C<N x M> seconds, otherwise C<rdnsd> will not have enough time to test signal to C<rdnsd> more often than every C<N x M> seconds, otherwise
every server. You probably want to send the signal about every C<3 x N x M> C<rdnsd> will not have enough time to test every server. You probably
seconds if you want reliable statistics. want to send the signal about every C<3 x N x M> seconds if you want
reliable statistics when not running in multithreaded mode.
If <rdnsd> is running in multithreaded mode, then you can send the C<USR1> If C<rdnsd> I<is> running in multithreaded mode, then you can send the
signal much more often (once every C<Loop x Timeout> seconds). C<USR1> signal much more often (once every C<Loop x Timeout> seconds).
Note that C<rdnsd> will not I<immediately> update the file upon receiving =head2 (LEGACY) STATISTICS FILE FORMAT
the C<USR1> signal. You may need to wait up to C<Loop> seconds for the
current loop iteration to complete before the stats file is updated.
=head2 STATISTICS FILE FORMAT
The statistics file will contain one line for each server that is being The (legacy* statistics file will contain one line for each server that
checked. Each line contains the nameserver checked, the response rate as is being checked. Each line contains the nameserver checked, the response
a decimal fraction, and the average response time (in milliseconds), for rate as a decimal fraction, and the average response time (in milliseconds),
example: for example:
ns0.example.com 1.00 25 ns0.example.com 1.00 25
...@@ -786,6 +793,10 @@ percentile. ...@@ -786,6 +793,10 @@ percentile.