Skip to content

Commit 8c272a7

Browse files
committed
improve process_perfdata.pl in gearman mode
this patch addresses some issues in gearman worker mode: - when forking a child, the forked %children contains all created childs so far, so if that child receives a sigint, it will kill its siblings. - check for exited childs in a loop in case multiple childs exited at once - do not remove the pidfile when a children receives a sigint - fix issue with gearman jobs having a timeout, for details see - gearman/gearmand#301 - ConSol-Monitoring/omd#107
1 parent 6d8c483 commit 8c272a7

File tree

1 file changed

+23
-12
lines changed

1 file changed

+23
-12
lines changed

scripts/process_perfdata.pl.in

+23-12
Original file line numberDiff line numberDiff line change
@@ -648,7 +648,7 @@ sub open_template {
648648
my $xmlfile = shift;
649649
$delayed_write = 0;
650650
if( -e $xmlfile ){
651-
my $mtime = (stat($xmlfile))[9];
651+
my $mtime = (stat(_))[9];
652652
my $t = time();
653653
my $age = ($t - $mtime);
654654
if ( $age < $conf{'XML_UPDATE_DELAY'} ){
@@ -1285,17 +1285,22 @@ sub handle_signal {
12851285
#
12861286
if ( defined ( $opt_gm ) ){
12871287
if($signal eq "CHLD" && defined($opt_gm) ){
1288-
my $pid = waitpid(-1, &WNOHANG);
1289-
if($pid == -1){
1290-
print_log( "### no hanging child ###", 1 );
1291-
} elsif ( WIFEXITED($?)) {
1292-
print_log( "### child $pid exited ###", 1 );
1293-
$children--;
1294-
} else {
1295-
print_log( "### wrong signal ###", 1 );
1296-
$children--;
1288+
while(my $pid = waitpid(-1, &WNOHANG)) {
1289+
if($pid == -1){
1290+
print_log( "### no hanging child ###", 1 );
1291+
last;
1292+
} elsif ( WIFEXITED($?)) {
1293+
print_log( "### child $pid exited ###", 1 );
1294+
delete $children{$pid};
1295+
$children--;
1296+
} else {
1297+
print_log( "### wrong signal ###", 1 );
1298+
delete $children{$pid};
1299+
$children--;
1300+
}
1301+
$SIG{'CHLD'} = \&handle_signal;
12971302
}
1298-
$SIG{'CHLD'} = \&handle_signal;
1303+
return;
12991304
}
13001305
if($signal eq "INT" || $signal eq "TERM"){
13011306
local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
@@ -1499,14 +1504,20 @@ sub new_child {
14991504
# Child can *not* return from this subroutine.
15001505
$SIG{INT} = 'DEFAULT'; # make SIGINT kill us as it did before
15011506

1507+
# empty children list so we won't kill siblings if one children dies
1508+
%children = ();
1509+
1510+
# do not delete pidfile if child exits
1511+
undef $opt_pidfile;
1512+
15021513
# unblock signals
15031514
sigprocmask(SIG_UNBLOCK, $sigset)
15041515
or die "Can't unblock SIGINT for fork: $!\n";
15051516

15061517
my $worker = Gearman::Worker->new();
15071518
my @job_servers = split(/,/, $conf{'GEARMAN_HOST'}); # allow multiple gearman job servers
15081519
$worker->job_servers(@job_servers);
1509-
$worker->register_function("perfdata", 2, sub { return main(@_); });
1520+
$worker->register_function("perfdata", undef, sub { return main(@_); });
15101521
my %opt = (
15111522
on_complete => sub { $req++; },
15121523
stop_if => sub { if ( $req > $conf{'REQUESTS_PER_CHILD'} ) { return 1;}; }

0 commit comments

Comments
 (0)