Index: infosystem/cluster.pl =================================================================== --- infosystem/cluster.pl (Revision 6229) +++ infosystem/cluster.pl (Revision 6671) @@ -254,18 +254,29 @@ } #nordugrid-cluster-runtimeenvironment -if ($config{runtimedir}){ - if (opendir DIR, $config{runtimedir}) { - @runtimeenvironment = `find $config{runtimedir} -type f ! -name ".*" ! -name "*~"` ; - closedir DIR; - foreach my $listentry (@runtimeenvironment) { - chomp($listentry); - $listentry=~s/$config{runtimedir}\/*//; - } - }else{ - # $listentry=""; - warning("Can't acess $config{runtimedir}"); - } +if (defined $parsedconfig{janitor}) { + eval ' + use lib $parsedconfig{janitor}{basedir}; + use RuntimeEnvironments qw(list_of_rte); + @runtimeenvironment = list_of_rte; + '; + if ($@) { + warning("Can't get list of available RTEs from Janitor: $@\n"); + } +} else { + if ($config{runtimedir}){ + if (opendir DIR, $config{runtimedir}) { + @runtimeenvironment = `find $config{runtimedir} -type f ! -name ".*" ! -name "*~"` ; + closedir DIR; + foreach my $listentry (@runtimeenvironment) { + chomp($listentry); + $listentry=~s/$config{runtimedir}\/*//; + } + }else{ + # $listentry=""; + warning("Can't access $config{runtimedir}"); + } + } } ############################################################### Index: doc/arc.conf.template =================================================================== --- doc/arc.conf.template (Revision 6229) +++ doc/arc.conf.template (Revision 6671) @@ -1494,3 +1494,59 @@ # registration blockname. Don't set it unless you want to # overwrite the default. registrantsuffix="nordugrid-se-name=sename:myhost.org,Mds-Vo-name=local,o=grid" + + +#################################################################### +# +# [janitor] +# Configures and enables the Janitor. + +[janitor] + +# basedir - where the Janitor is installed +basedir="/opt/janitor" + +# logconf - place of the log4perl config file +logconf="/opt/janitor/log.conf" + +# registrationdir - the directory used by the janitor to store state +# information +registrationdir="/var/lib/nordugrid/janitor" + +# installationdir - the directory where DREs are installed. Must be +# available at the same place on all the workernodes +installationdir="/grid/runtime/janitor" + +# downloaddir - a directory used for temporarily storing downloaded files +downloaddir="/tmp" + +# jobexpirytime - jobs older than this are considered dead by the Janitor. +# This is used by the --force option of sweep. +jobexpirytime="10000000" + +# uid - the uid to use for installing software +uid="janitor" + +# gid - the gid to use for installing software +gid="janitor" + +# allow_base, allow_rte - a pattern which basesystems and REs are allowed. Can +# be used multiple times. +allow_base="*tar*" +allow_rte="*" + +# deny_base, deny_rte - see above +deny_base="*" +deny_rte="*" + + +#################################################################### +# +# [janitor/] +# Configures a catalog source. "" is an arbitrary name. + +[janitor/] + +# catalog - place of the catalog file. Currently only local files are +# supported. +catalog="/opt/janitor/catalog.rdf" Index: grid-manager/LRMS/pbs/submit-pbs-job.in =================================================================== --- grid-manager/LRMS/pbs/submit-pbs-job.in (Revision 6229) +++ grid-manager/LRMS/pbs/submit-pbs-job.in (Revision 6671) @@ -81,7 +81,19 @@ queue_node_string=$CONFIG_queue_node_string ############################################################## +# and now check if we want to use the janitor +############################################################## +WITH_JANITOR= +config_update_from_section "janitor" +if [ $? = 0 ]; then + WITH_JANITOR=yes + JANITOR=${CONFIG_basedir}/rjanitor + JANITOR_RTE=$(${JANITOR} info ${joboption_gridid} | sed -n "s/^uses: *\([^ ]*\) *$/\1/p") +fi + +############################################################## + if [ -z "$joboption_controldir" ] ; then joboption_controldir=`dirname "$arg_file"` if [ "$joboption_controldir" = '.' ] ; then @@ -118,25 +130,43 @@ ############################################################## # Zero stage of runtime environments ############################################################## -joboption_num=0 -eval "var_is_set=\${joboption_runtime_$joboption_num+yes}" -while [ ! -z "${var_is_set}" ] ; do - eval "var_value=\${joboption_runtime_$joboption_num}" - if [ -r "$RUNTIME_CONFIG_DIR/${var_value}" ] ; then - . "$RUNTIME_CONFIG_DIR/${var_value}" "0" - if [ $? -ne '0' ] ; then - echo "ERROR: runtime script ${var_value} failed" 1>&2 - echo "Submission: runtime script ${var_value} failed.">>"$failures_file" +if [ "x$WITH_JANITOR" = "xyes" ]; then + for RTE in ${JANITOR_RTE} + do + if [ -r "$RTE" ] ; then + . "$RTE" "0" + if [ $? -ne '0' ] ; then + echo "ERROR: runtime script ${$RTE} failed" 1>&2 + echo "Submission: runtime script ${$RTE} failed.">>"$failures_file" + exit 1 + fi + else + echo "ERROR: runtime script ${RTE} is missing" 1>&2 + echo "Submission: runtime script ${RTE} is missing.">>"$failures_file" exit 1 fi - else - echo "ERROR: runtime script ${var_value} is missing" 1>&2 - echo "Submission: runtime script ${var_value} is missing.">>"$failures_file" - exit 1 - fi - joboption_num=$(( joboption_num + 1 )) + done +else + joboption_num=0 eval "var_is_set=\${joboption_runtime_$joboption_num+yes}" -done + while [ ! -z "${var_is_set}" ] ; do + eval "var_value=\${joboption_runtime_$joboption_num}" + if [ -r "$RUNTIME_CONFIG_DIR/${var_value}" ] ; then + . "$RUNTIME_CONFIG_DIR/${var_value}" "0" + if [ $? -ne '0' ] ; then + echo "ERROR: runtime script ${var_value} failed" 1>&2 + echo "Submission: runtime script ${var_value} failed.">>"$failures_file" + exit 1 + fi + else + echo "ERROR: runtime script ${var_value} is missing" 1>&2 + echo "Submission: runtime script ${var_value} is missing.">>"$failures_file" + exit 1 + fi + joboption_num=$(( joboption_num + 1 )) + eval "var_is_set=\${joboption_runtime_$joboption_num+yes}" + done +fi PBS_QSUB='qsub -r n -S /bin/sh -m n ' if [ ! -z "$PBS_BIN_PATH" ] ; then @@ -527,28 +557,47 @@ ############################################################## echo "# Running runtime scripts" >> $PBS_JOB_SCRIPT echo "RUNTIME_CONFIG_DIR=\${RUNTIME_CONFIG_DIR:-$RUNTIME_CONFIG_DIR}" >> $PBS_JOB_SCRIPT -i=0 -eval "var_is_set=\${joboption_runtime_$i+yes}" -echo "runtimeenvironments=" >> $PBS_JOB_SCRIPT -while [ ! -z "${var_is_set}" ] ; do - if [ "$i" = '0' ] ; then - echo "if [ ! -z \"\$RUNTIME_CONFIG_DIR\" ] ; then" >> $PBS_JOB_SCRIPT +if [ "x$WITH_JANITOR" = "xyes" ]; then + # XXX This is NOT exactly the same as in the non-janitor case. + # XXX We are missing the different RUNTIME_CONFIG_DIR-feature + # XXX and the collection of actually available RTEs in + # XXX $runtimeenvironments + echo "runtimeenvironments=" >> $PBS_JOB_SCRIPT + for RTE in ${JANITOR_RTE} + do + echo " if [ -r \"$RTE\" ] ; then" >> $PBS_JOB_SCRIPT + echo " source \"$RTE\" 1 " >> $PBS_JOB_SCRIPT + echo " if [ \$? -ne '0' ] ; then " >> $PBS_JOB_SCRIPT + echo " echo \"Runtime script $RTE failed \" 1>&2 " >> $PBS_JOB_SCRIPT + echo " echo \"Runtime script $RTE failed \" 1>\"\$RUNTIME_JOB_DIAG\" " >> $PBS_JOB_SCRIPT + echo " RESULT=1 " >> $PBS_JOB_SCRIPT + echo " fi " >> $PBS_JOB_SCRIPT + echo " fi" >> $PBS_JOB_SCRIPT + done +else + i=0 + eval "var_is_set=\${joboption_runtime_$i+yes}" + echo "runtimeenvironments=" >> $PBS_JOB_SCRIPT + while [ ! -z "${var_is_set}" ] ; do + if [ "$i" = '0' ] ; then + echo "if [ ! -z \"\$RUNTIME_CONFIG_DIR\" ] ; then" >> $PBS_JOB_SCRIPT + fi + eval " var_value=\"\${joboption_runtime_$i}\"" + echo " if [ -r \"\${RUNTIME_CONFIG_DIR}/${var_value}\" ] ; then" >> $PBS_JOB_SCRIPT + echo " runtimeenvironments=\"\${runtimeenvironments}${var_value};\"" >> $PBS_JOB_SCRIPT + echo " source \${RUNTIME_CONFIG_DIR}/${var_value} 1 " >> $PBS_JOB_SCRIPT + echo " if [ \$? -ne '0' ] ; then " >> $PBS_JOB_SCRIPT + echo " echo \"Runtime ${var_value} script failed \" 1>&2 " >> $PBS_JOB_SCRIPT + echo " echo \"Runtime ${var_value} script failed \" 1>\"\$RUNTIME_JOB_DIAG\" " >> $PBS_JOB_SCRIPT + echo " RESULT=1 " >> $PBS_JOB_SCRIPT + echo " fi " >> $PBS_JOB_SCRIPT + echo " fi" >> $PBS_JOB_SCRIPT + i=$(( i + 1 )) + eval "var_is_set=\${joboption_runtime_$i+yes}" + done + if [ ! "$i" = '0' ] ; then + echo "fi" >> $PBS_JOB_SCRIPT fi - eval " var_value=\"\${joboption_runtime_$i}\"" - echo " if [ -r \"\${RUNTIME_CONFIG_DIR}/${var_value}\" ] ; then" >> $PBS_JOB_SCRIPT - echo " runtimeenvironments=\"\${runtimeenvironments}${var_value};\"" >> $PBS_JOB_SCRIPT - echo " source \${RUNTIME_CONFIG_DIR}/${var_value} 1 " >> $PBS_JOB_SCRIPT - echo " if [ \$? -ne '0' ] ; then " >> $PBS_JOB_SCRIPT - echo " echo \"Runtime ${var_value} script failed \" 1>&2 " >> $PBS_JOB_SCRIPT - echo " echo \"Runtime ${var_value} script failed \" 1>\"\$RUNTIME_JOB_DIAG\" " >> $PBS_JOB_SCRIPT - echo " RESULT=1 " >> $PBS_JOB_SCRIPT - echo " fi " >> $PBS_JOB_SCRIPT - echo " fi" >> $PBS_JOB_SCRIPT - i=$(( i + 1 )) - eval "var_is_set=\${joboption_runtime_$i+yes}" -done -if [ ! "$i" = '0' ] ; then - echo "fi" >> $PBS_JOB_SCRIPT fi echo "" >> $PBS_JOB_SCRIPT @@ -612,21 +661,32 @@ ############################################################## # Runtime (post)configuration at computing node ############################################################## -i=0 -eval "var_is_set=\${joboption_runtime_$i+yes}" -while [ ! -z "${var_is_set}" ] ; do - if [ "$i" = '0' ] ; then - echo "if [ ! -z \"\$RUNTIME_CONFIG_DIR\" ] ; then" >> $PBS_JOB_SCRIPT +if [ "x$WITH_JANITOR" = "xyes" ]; then + # XXX This is NOT exactly the same as in the non-janitor case. + # XXX We are missing the different RUNTIME_CONFIG_DIR-feature. + for RTE in ${JANITOR_RTE} + do + echo " if [ -r \"$RTE\" ] ; then" >> $PBS_JOB_SCRIPT + echo " source \"$RTE\" 2 " >> $PBS_JOB_SCRIPT + echo " fi" >> $PBS_JOB_SCRIPT + done +else + i=0 + eval "var_is_set=\${joboption_runtime_$i+yes}" + while [ ! -z "${var_is_set}" ] ; do + if [ "$i" = '0' ] ; then + echo "if [ ! -z \"\$RUNTIME_CONFIG_DIR\" ] ; then" >> $PBS_JOB_SCRIPT + fi + eval "var_value=\"\${joboption_runtime_$i}\"" + echo " if [ -r \"\${RUNTIME_CONFIG_DIR}/${var_value}\" ] ; then" >> $PBS_JOB_SCRIPT + echo " source \${RUNTIME_CONFIG_DIR}/${var_value} 2 " >> $PBS_JOB_SCRIPT + echo " fi" >> $PBS_JOB_SCRIPT + i=$(( i + 1 )) + eval "var_is_set=\${joboption_runtime_$i+yes}" + done + if [ ! "$i" = '0' ] ; then + echo "fi" >> $PBS_JOB_SCRIPT fi - eval "var_value=\"\${joboption_runtime_$i}\"" - echo " if [ -r \"\${RUNTIME_CONFIG_DIR}/${var_value}\" ] ; then" >> $PBS_JOB_SCRIPT - echo " source \${RUNTIME_CONFIG_DIR}/${var_value} 2 " >> $PBS_JOB_SCRIPT - echo " fi" >> $PBS_JOB_SCRIPT - i=$(( i + 1 )) - eval "var_is_set=\${joboption_runtime_$i+yes}" -done -if [ ! "$i" = '0' ] ; then - echo "fi" >> $PBS_JOB_SCRIPT fi echo "" >> $PBS_JOB_SCRIPT Index: grid-manager/jobs/job.h =================================================================== --- grid-manager/jobs/job.h (Revision 6229) +++ grid-manager/jobs/job.h (Revision 6671) @@ -91,6 +91,9 @@ /* uid and gid of job's owner */ uid_t job_uid; gid_t job_gid; + /* pointer to janitor child process. used during prepare & finish */ + RunElement* janitor; + bool done_downloader, done_janitor, have_janitor, cleanup_janitor, deploy_needed; public: /* external utility beeing run to perform tasks like stage-in/our, submit/cancel. (todo - move to private) */ Index: grid-manager/jobs/states.cc =================================================================== --- grid-manager/jobs/states.cc (Revision 6229) +++ grid-manager/jobs/states.cc (Revision 6671) @@ -14,6 +14,7 @@ #include "../jobs/job_request.h" #include "../run/run_parallel.h" #include "../config/environment.h" +#include "../../arclib/arc/configcore.h" #include "../misc/inttostring.h" #include "../misc/stringtoint.h" #include "../mail/send_mail.h" @@ -356,7 +357,7 @@ }; } -bool JobsList::state_loading(const JobsList::iterator &i,bool &state_changed,bool up) { +bool JobsList::state_loading_downloader(const JobsList::iterator &i,bool &state_changed,bool up) { /* RSL was analyzed/parsed - now run child process downloader to download job input files and to wait for user uploaded ones */ if(i->child == NULL) { /* no child started */ @@ -539,6 +540,91 @@ return true; } + +//invoke the janitor. steps: 1=accepting, 2=preparing 3=finishing +//returns return code +int JobsList::state_janitor(const JobsList::iterator &i, bool &state_changed, int which_step) { + if(i->janitor == NULL) { /* janitor is not running (yet) */ + odlog(VERBOSE)<job_id<<": state: ACCEPTING/PREPARING/FINISHING: starting janitor"< arglist; + std::string cmd = nordugrid_libexec_loc+"/janitor"; + arglist.push_back(cmd); + + if(which_step==1) arglist.push_back(std::string("register")); + else if(which_step==2) arglist.push_back(std::string("deploy")); + else if(which_step==3) arglist.push_back(std::string("remove")); + + arglist.push_back(i->job_id); + + if(which_step==1) { + //create grami which contains rte list + const char *local_transfer_s = NULL; + if(use_local_transfer) { + local_transfer_s="joboption_localtransfer=yes"; + }; + if(!write_grami(*i,*user,local_transfer_s)) { + olog<job_id<<": State: ACCEPTING: failed creating grami for janitor"<AddFailure("Failed creating grami for janitor (accepting)"); + state_changed=true; + return -1; + }; + + //open grami + char buf[256]; + std::string fgrami = user->ControlDir() + "/job." + i->job_id + ".grami"; + std::ifstream f(fgrami.c_str()); + if(!f.is_open()) + { + olog<job_id<<": State: ACCEPTING: failed to load grami for janitor"<AddFailure("Failed to load grami for janitor (accepting)"); + state_changed=true; + return -1; + } + + //we need list of rte's + const char* runtime_line = "joboption_runtime_"; + int runtime_line_len = strlen(runtime_line); + + for(;!f.eof();) { + istream_readline(f,buf,sizeof(buf)); + if(strncmp(runtime_line,buf,runtime_line_len)) continue; + if(!strchr(buf,'='))continue; + std::string str(strchr(buf,'=')+1); + arglist.push_back(str); + }; + f.close(); + } + + int argn = arglist.size(); + char ** args = new char* [argn+1]; + for(int iii=0;iiiCachePrivate() || user->StrictSession()); + if(!RunParallel::run(*user,*i,args,&(i->janitor),switch_user)) { + olog<job_id<<": Failed to run janitor process."<AddFailure("Failed to run janitor (accepting)"); + else if(which_step==2) i->AddFailure("Failed to run janitor (preparing)"); + else if(which_step==3) i->AddFailure("Failed to run janitor (finishing)"); + state_changed=true; + return -1; + }; + } else { + if(i->janitor->get_pid() != -1) { + odlog(VERBOSE)<job_id<<": State: ACCEPTING/PREPARING/FINISHING: janitor is running"<janitor->get_exit_code(); + odlog(VERBOSE)<job_id<<": State: ACCEPTING/PREPARING/FINISHING janitor finished with exit code: "<janitor); i->janitor=NULL; + state_changed=true; + return exit_code; + } + //JobFailStateRemember(i,JOB_STATE_PREPARING); + return 0; +} + bool JobsList::JobPending(JobsList::iterator &i) { if(i->job_pending) return true; i->job_pending=true; @@ -723,9 +809,50 @@ job_error=true; return; /* go to next job */ }; + + odlog(VERBOSE)<job_id<<": State: ACCEPTED: i->done_janitor: "<done_janitor<done_janitor) { + int janitor_return = -1; + //check if we are supposed to use the janitor + if(!i->janitor) { + try { + ReadConfig(nordugrid_config_loc).FindConfGrp("janitor",""); + i->have_janitor = true; + i->done_janitor = false; + i->deploy_needed = true; + } catch(ConfigError) { + //there is no janitor configuration block, so nothing to do here + i->have_janitor = false; + i->done_janitor = false; + i->deploy_needed = false; + } + } + + if (i->have_janitor) { + janitor_return = state_janitor(i,i->done_janitor,1); + + if(i->done_janitor) { + if(janitor_return < 0) { + i->AddFailure("error spawning janitor"); + job_error=true; + return; + } else if(janitor_return > 1) { + i->AddFailure("janitor says RTE not possible"); + job_error=true; + return; + } else { + i->deploy_needed = (janitor_return == 1); + } + } + return; //block further processing of this job while janitor is running + } + } + + // XXX I'm not sure about all the conditions. This is just a try. if((max_jobs_processing == -1) || - (use_local_transfer) || - (i->local->downloads == 0) || + (((use_local_transfer) || + (i->local->downloads == 0)) && ((!i->have_janitor) || (!i->deploy_needed))) || (JOB_NUM_PROCESSING < max_jobs_processing) || ((JOB_NUM_FINISHING >= max_jobs_processing) && (JOB_NUM_PREPARING < max_jobs_processing_emergency))) { @@ -761,24 +888,75 @@ /* preparing state - means job is parsed and we are going to download or already downloading input files. process downloader is run for that. it also checks for files user interface have to upload itself*/ + + if(i->job_pending) { + if((JOB_NUM_RUNNINGjob_state = JOB_STATE_SUBMITING; + state_changed=true; once_more=true; + } else { + state_changed=false; + JobPending(i); + } + return; + } + odlog(VERBOSE)<job_id<<": State: PREPARING"<job_pending || state_loading(i,state_changed,false)) { - if(i->job_pending || state_changed) { - if((JOB_NUM_RUNNINGjob_state = JOB_STATE_SUBMITING; - state_changed=true; once_more=true; + + if( i->child==NULL && i->janitor==NULL ) { + odlog(VERBOSE)<job_id<<": State: PREPARING: we just entered, set janitor and downloader to NOT DONE"<done_downloader = false; + i->done_janitor = false; + i->cleanup_janitor = false; + if(!i->have_janitor || !i->deploy_needed) { + odlog(VERBOSE)<job_id<<": State: PREPARING: skipping janitor"<done_janitor = true; + } + } + + // is the downloader still running? + if (!i->done_downloader) { + bool success_downloader = state_loading_downloader(i,i->done_downloader,false); + if(!success_downloader) { + i->AddFailure("downloader failed (preparing)"); + if(!i->done_janitor && i->janitor) { + //janitor is running .. wait then cleanup + i->cleanup_janitor = true; } else { - state_changed=false; - JobPending(i); - }; - }; + JobFailStateRemember(i,JOB_STATE_PREPARING); + job_error = state_changed = true; + return; + } + } + } + + // is the janitor still running? + if (!i->done_janitor) { + int return_janitor = state_janitor(i,i->done_janitor,2); + if( i->done_janitor ) { + if(return_janitor!=0) { + i->AddFailure("janitor failed (preparing)"); + i->cleanup_janitor = true; + } + + if(i->cleanup_janitor) { + odlog(VERBOSE)<job_id<<": State: PREPARING: something failed. cleaning up janitor"<done_janitor,3); // XXX maybe i is already freed when i->done_janitor is written... + RunParallel::release(i->janitor); i->janitor=NULL; + + JobFailStateRemember(i,JOB_STATE_PREPARING); + job_error = state_changed = true; + return; + } + } + } + + if(i->done_downloader && i->done_janitor) { + JobPending(i); + state_changed=false; + once_more=true; } else { - if(i->GetFailure().length() == 0) - i->AddFailure("downloader failed (pre-processing)"); - job_error=true; - return; /* go to next job */ - }; - return; + state_changed=false; + } } void JobsList::ActJobSubmiting(JobsList::iterator &i,bool hard_job, @@ -877,20 +1055,62 @@ bool& once_more,bool& delete_job, bool& job_error,bool& state_changed) { odlog(VERBOSE)<job_id<<": State: FINISHING"<job_state = JOB_STATE_FINISHED; - once_more=true; hard_job=true; - }; - } else { - // i->job_state = JOB_STATE_FINISHED; - state_changed=true; /* to send mail */ + + if( i->child==NULL && i->janitor==NULL ) { + odlog(VERBOSE)<job_id<<": State: FINISHING: we just entered, set downloader and janitor to NOT DONE"<done_downloader = false; + i->done_janitor = false; + i->cleanup_janitor = false; + if(!i->have_janitor) i->done_janitor = true; + } + + /* is the downloader still running? */ + if(!i->done_downloader) { + bool success_downloader = state_loading_downloader(i,i->done_downloader,true); + if(!success_downloader) { + odlog(VERBOSE)<job_id<<": State: FINISHING: downloader failed"<AddFailure("downloader failed (post-processing)"); + + if(!i->done_janitor && i->janitor) { + i->cleanup_janitor = true; + } else { + i->done_janitor = true; + state_changed=true; + once_more=true; + hard_job=true; + job_error = true; + return; + } + } + } + + /* is the janitor still running? */ + if(!i->done_janitor) { + int return_janitor = state_janitor(i,i->done_janitor,3); + if(i->done_janitor) { + if( return_janitor!=0) { + odlog(VERBOSE)<job_id<<": State: FINISHING: janitor failed"<AddFailure("janitor failed (post-processing)"); + i->cleanup_janitor = true; + } + if(i->cleanup_janitor) { + odlog(VERBOSE)<job_id<<": State: FINISHING: something failed .. so cleanup"<done_janitor = true; + state_changed=true; + once_more=true; + hard_job=true; + job_error = true; // XXX actually, from the point of view of the Job, this is NOT an error + return; + } + } + } + + + if(i->done_downloader && i->done_janitor) { + i->job_state = JOB_STATE_FINISHED; + state_changed = true; once_more=true; hard_job=true; - if(i->GetFailure().length() == 0) - i->AddFailure("uploader failed (post-processing)"); - job_error=true; - return; /* go to next job */ - }; + } return; } @@ -1029,6 +1249,12 @@ RunParallel::release(i->child); i->child=NULL; }; + /* kill running janitor */ + if(i->janitor) { + i->janitor->kill(); + RunParallel::release(i->janitor); + i->janitor=NULL; + }; /* put some explanation */ i->AddFailure("User requested to cancel the job"); /* behave like if job failed */ Index: grid-manager/jobs/states.h =================================================================== --- grid-manager/jobs/states.h (Revision 6229) +++ grid-manager/jobs/states.h (Revision 6671) @@ -77,8 +77,12 @@ /* Perform actions necessary in case job goes to/is in SUBMITTING/CANCELING state */ bool state_submiting(const iterator &i,bool &state_changed,bool cancel=false); + /* Same for PREPARING/FINISHING */ - bool state_loading(const iterator &i,bool &state_changed,bool up); + bool state_loading_downloader(const iterator &i,bool &state_changed,bool up); + /* used during ACCEPTING/PREPARING/FINISHING to invoke the janitor */ + int state_janitor(const iterator &i,bool &state_changed,int which_step); + bool JobPending(JobsList::iterator &i); job_state_t JobFailStateGet(const iterator &i); bool JobFailStateRemember(const iterator &i,job_state_t state); Index: grid-manager/jobs/job.cc =================================================================== --- grid-manager/jobs/job.cc (Revision 6229) +++ grid-manager/jobs/job.cc (Revision 6671) @@ -63,7 +63,11 @@ job_state=JOB_STATE_UNDEFINED; job_pending=false; child=NULL; + janitor=NULL; local=NULL; + done_janitor = done_downloader = false; + have_janitor = false; + deploy_needed = true; job_uid=0; job_gid=0; } @@ -76,6 +80,9 @@ keep_finished=job.keep_finished; keep_deleted=job.keep_deleted; child=NULL; + janitor=NULL; + have_janitor=job.have_janitor; + done_janitor=job.done_janitor; // is this needed? done_downloader isn't set local=job.local; job_uid=job.job_uid; job_gid=job.job_gid; } @@ -88,12 +95,15 @@ keep_finished=DEFAULT_KEEP_FINISHED; keep_deleted=DEFAULT_KEEP_DELETED; child=NULL; + janitor=NULL; + done_janitor=false; local=NULL; job_uid=0; job_gid=0; } JobDescription::~JobDescription(void){ /* child is not destroyed here */ + /* thus, janitor is not destroyed here either */ } bool JobDescription::GetLocalDescription(const JobUser &user) {