#!/usr/bin/perl -w =head1 NAME Cron - cron-like scheduler for Perl subroutines =head1 SYNOPSIS use Schedule::Cron; # Subroutines to be called sub dispatcher { print "ID: ",shift,"\n"; print "Args: ","@_","\n"; } sub check_links { # do something... } # Create new object with default dispatcher my $cron = new Schedule::Cron(\&dispatcher); # Load a crontab file $cron->load_crontab("/var/spool/cron/perl"); # Add dynamically crontab entries $cron->add_entry("3 4 * * *",ROTATE => "apache","sendmail"); $cron->add_entry("0 11 * * Mon-Fri",\&check_links); # Run scheduler $cron->run(detach=>1); =head1 DESCRIPTION This module provides a simple but complete cron like scheduler. I.e this module can be used for periodically executing Perl subroutines. The dates and parameters for the subroutines to be called are specified with a format known as crontab entry (see L<"METHODS">, C and L) The philosophy behind C is to call subroutines periodically from within one single Perl program instead of letting C trigger several (possibly different) Perl scripts. Everything under one roof. Furthermore, C provides mechanism to create crontab entries dynamically, which isn't that easy with C. C knows about all extensions (well, at least all extensions I'm aware of, i.e those of the so called "Vixie" cron) for crontab entries like ranges including 'steps', specification of month and days of the week by name, or coexistence of lists and ranges in the same field. It even supports a bit more (like lists and ranges with symbolic names). =head1 METHODS =over 4 =cut #' package Schedule::Cron; use Time::ParseDate; use Data::Dumper; use strict; use vars qw($VERSION $DEBUG); use subs qw(dbg); my $HAS_POSIX; BEGIN { eval { require POSIX; import POSIX ":sys_wait_h"; }; $HAS_POSIX = $@ ? 0 : 1; } $VERSION = "1.02_3"; our $DEBUG = 0; my %STARTEDCHILD = (); my @WDAYS = qw( Sunday Monday Tuesday Wednesday Thursday Friday Saturday Sunday ); my @ALPHACONV = ( { }, { }, { }, { qw(jan 1 feb 2 mar 3 apr 4 may 5 jun 6 jul 7 aug 8 sep 9 oct 10 nov 11 dec 12) }, { qw(sun 0 mon 1 tue 2 wed 3 thu 4 fri 5 sat 6)}, { } ); my @RANGES = ( [ 0,59 ], [ 0,23 ], [ 0,31 ], [ 0,12 ], [ 0,7 ], [ 0,59 ] ); my @LOWMAP = ( {}, {}, { 0 => 1}, { 0 => 1}, { 7 => 0}, {}, ); # Currently, there are two ways for reaping. One, which only waits explicitly # on PIDs it forked on its own, and one which waits on all PIDs (even on those # it doesn't forked itself). The later has been proved to work on Win32 with # the 64 threads limit (RT #56926), but not when one creates forks on ones # own. The specific reaper works for RT #55741. # It tend to use the specific one, if it also resolves RT #56926. Both are left # here for reference until a decision has been done for 1.01 sub REAPER { &_reaper_all(); } # Specific reaper sub _reaper_specific { local ($!,%!,$?); if ($HAS_POSIX) { foreach my $pid (keys %STARTEDCHILD) { if ($STARTEDCHILD{$pid}) { my $res = $HAS_POSIX ? waitpid($pid, WNOHANG) : waitpid($pid,0); if ($res > 0) { # We reaped a truly running process $STARTEDCHILD{$pid} = 0; dbg "Reaped child $res" if $DEBUG; } } } } else { my $waitedpid = 0; while($waitedpid != -1) { $waitedpid = wait; } } } # Catch all reaper sub _reaper_all { #local ($!,%!,$?,${^CHILD_ERROR_NATIVE}); # Localizing ${^CHILD_ERROR_NATIVE} breaks signalhander.t which checks that # chained SIGCHLD handlers are called. I don't know why, though, hence I # leave it out for now. See #69916 for some discussion why this handler # might be needed. local ($!,%!,$?); my $kid; do { # Only on POSIX systems the wait will return immediately # if there are no finished child processes. Simple 'wait' # waits blocking on childs. $kid = $HAS_POSIX ? waitpid(-1, WNOHANG) : wait; dbg "Kid: $kid" if $DEBUG; if ($kid != 0 && $kid != -1 && defined $STARTEDCHILD{$kid}) { # We don't delete the hash entry here to avoid an issue # when modifying global hash from multiple threads $STARTEDCHILD{$kid} = 0; dbg "Reaped child $kid" if $DEBUG; } } while ($kid != 0 && $kid != -1); # Note to myself: Is the %STARTEDCHILD hash really necessary if we use -1 # for waiting (i.e. for waiting on any child ?). In the current # implementation, %STARTEDCHILD is not used at all. It would be only # needed if we iterate over it to wait on pids specifically. } # Cleaning is done in extra method called from the main # process in order to avoid event handlers modifying this # global hash which can lead to memory errors. # See RT #55741 for more details on this. # This method is called in strategic places. sub _cleanup_process_list { my ($self, $cfg) = @_; # Cleanup processes even on those systems, where the SIGCHLD is not # propagated. Only do this for POSIX, otherwise this call would block # until all child processes would have been finished. # See RT #56926 for more details. # Do not cleanup if nofork because jobs that fork will do their own reaping. &REAPER() if $HAS_POSIX && !$cfg->{nofork}; # Delete entries from this global hash only from within the main # thread/process. Hence, this method must not be called from within # a signalhandler for my $k (keys %STARTEDCHILD) { delete $STARTEDCHILD{$k} unless $STARTEDCHILD{$k}; } } =item $cron = new Schedule::Cron($dispatcher,[extra args]) Creates a new C object. C<$dispatcher> is a reference to a subroutine, which will be called by default. C<$dispatcher> will be invoked with the arguments parameter provided in the crontab entry if no other subroutine is specified. This can be either a single argument containing the argument parameter literally has string (default behavior) or a list of arguments when using the C option described below. The date specifications must be either provided via a crontab like file or added explicitly with C (L<"add_entry">). I can be a hash or hash reference for additional arguments. The following parameters are recognized: =over =item file => Load the crontab entries from =item eval => 1 Eval the argument parameter in a crontab entry before calling the subroutine (instead of literally calling the dispatcher with the argument parameter as string) =item nofork => 1 Don't fork when starting the scheduler. Instead, the jobs are executed within current process. In your executed jobs, you have full access to the global variables of your script and hence might influence other jobs running at a different time. This behaviour is fundamentally different to the 'fork' mode, where each jobs gets its own process and hence a B of the process space, independent of each other job and the main process. This is due to the nature of the C system call. =item nostatus => 1 Do not update status in $0. Set this if you don't want ps to reveal the internals of your application, including job argument lists. Default is 0 (update status). =item skip => 1 Skip any pending jobs whose time has passed. This option is only useful in combination with C where a job might block the execution of the following jobs for quite some time. By default, any pending job is executed even if its scheduled execution time has already passed. With this option set to true all pending which would have been started in the meantime are skipped. =item catch => 1 Catch any exception raised by a job. This is especially useful in combination with the C option to avoid stopping the main process when a job raises an exception (dies). =item after_job => \&after_sub Call a subroutine after a job has been run. The first argument is the return value of the dispatched job, the reminding arguments are the arguments with which the dispatched job has been called. Example: my $cron = new Schedule::Cron(..., after_job => sub { my ($ret,@args) = @_; print "Return value: ",$ret," - job arguments: (",join ":",@args,")\n"; }); =item log => \&log_sub Install a logging subroutine. The given subroutine is called for several events during the lifetime of a job. This method is called with two arguments: A log level of 0 (info),1 (warning) or 2 (error) depending on the importance of the message and the message itself. For example, you could use I (L) for logging purposes for example like in the following code snippet: use Log::Log4perl; use Log::Log4perl::Level; my $log_method = sub { my ($level,$msg) = @_; my $DBG_MAP = { 0 => $INFO, 1 => $WARN, 2 => $ERROR }; my $logger = Log::Log4perl->get_logger("My::Package"); $logger->log($DBG_MAP->{$level},$msg); } my $cron = new Schedule::Cron(.... , log => $log_method); =item loglevel => <-1,0,1,2> Restricts logging to the specified severity level or below. Use 0 to have all messages generated, 1 for only warnings and errors and 2 for errors only. Default is 0 (all messages). A loglevel of -1 (debug) will include job argument lists (also in $0) in the job start message logged with a level of 0 or above. You may have security concerns with this. Unless you are debugging, use 0 or higher. A value larger than 2 will disable logging completely. Although you can filter in your log routine, generating the messages can be expensive, for example if you pass arguments pointing to large hashes. Specifying a loglevel avoids formatting data that your routine would discard. =item processprefix => Cron::Schedule sets the process' name (i.e. C<$0>) to contain some informative messages like when the next job executes or with which arguments a job is called. By default, the prefix for this labels is C. With this option you can set it to something different. You can e.g. use C<$0> to include the original process name. You can inhibit this with the C option, and prevent the argument display by setting C to zero or higher. =item sleep => \&hook If specified, &hook will be called instead of sleep(), with the time to sleep in seconds as first argument and the Schedule::Cron object as second. This hook allows you to use select() instead of sleep, so that you can handle IO, for example job requests from a network connection. e.g. $cron->run( { sleep => \&sleep_hook, nofork => 1 } ); sub sleep_hook { my ($time, $cron) = @_; my ($rin, $win, $ein) = ('','',''); my ($rout, $wout, $eout); vec($rin, fileno(STDIN), 1) = 1; my ($nfound, $ttg) = select($rout=$rin, $wout=$win, $eout=$ein, $time); if ($nfound) { handle_io($rout, $wout, $eout); } return; } =back =cut sub new { my $class = shift; my $dispatcher = shift || die "No dispatching sub provided"; die "Dispatcher not a ref to a subroutine" unless ref($dispatcher) eq "CODE"; my $cfg = ref($_[0]) eq "HASH" ? $_[0] : { @_ }; $cfg->{processprefix} = "Schedule::Cron" unless $cfg->{processprefix}; my $timeshift = $cfg->{timeshift} || 0; my $self = { cfg => $cfg, dispatcher => $dispatcher, timeshift => $timeshift, queue => [ ], map => { } }; bless $self,(ref($class) || $class); $self->load_crontab if $cfg->{file}; $self; } =item $cron->load_crontab($file) =item $cron->load_crontab(file=>$file,[eval=>1]) Loads and parses the crontab file C<$file>. The entries found in this file will be B to the current time table with C<$cron-Eadd_entry>. The format of the file consists of cron commands containing of lines with at least 5 columns, whereas the first 5 columns specify the date. The rest of the line (i.e columns 6 and greater) contains the argument with which the dispatcher subroutine will be called. By default, the dispatcher will be called with one single string argument containing the rest of the line literally. Alternatively, if you call this method with the optional argument C1> (you must then use the second format shown above), the rest of the line will be evaled before used as argument for the dispatcher. For the format of the first 5 columns, please see L<"add_entry">. Blank lines and lines starting with a C<#> will be ignored. There's no way to specify another subroutine within the crontab file. All calls will be made to the dispatcher provided at construction time. If you want to start up fresh, you should call C<$cron-Eclean_timetable()> before. Example of a crontab fiqw(le:) # The following line runs on every Monday at 2:34 am 34 2 * * Mon "make_stats" # The next line should be best read in with an eval=>1 argument * * 1 1 * { NEW_YEAR => '1',HEADACHE => 'on' } =cut #' sub load_crontab { my $self = shift; my $cfg = shift; if ($cfg) { if (@_) { $cfg = ref($cfg) eq "HASH" ? $cfg : { $cfg,@_ }; } elsif (!ref($cfg)) { my $new_cfg = { }; $new_cfg->{file} = $cfg; $cfg = $new_cfg; } } my $file = $cfg->{file} || $self->{cfg}->{file} || die "No filename provided"; my $eval = $cfg->{eval} || $self->{cfg}->{eval}; open(F,$file) || die "Cannot open schedule $file : $!"; my $line = 0; while () { $line++; # Strip off trailing comments and ignore empty # or pure comments lines: s/#.*$//; next if /^\s*$/; next if /^\s*#/; chomp; s/\s*(.*)\s*$/$1/; my ($min,$hour,$dmon,$month,$dweek,$rest) = split (/\s+/,$_,6); my $time = [ $min,$hour,$dmon,$month,$dweek ]; # Try to check, whether an optional 6th column specifying seconds # exists: my $args; if ($rest) { my ($col6,$more_args) = split(/\s+/,$rest,2); if ($col6 =~ /^[\d\-\*\,\/]+$/) { push @$time,$col6; dbg "M: $more_args"; $args = $more_args; } else { $args = $rest; } } $self->add_entry($time,{ 'args' => $args, 'eval' => $eval}); } close F; } =item $cron->add_entry($timespec,[arguments]) Adds a new entry to the list of scheduled cron jobs. B