#!/bin/ksh # Usage: qsubfim.frost [directory] # # submits FIM job via qsub # # If directory argument is present, assume we are running via test automation, cd # to the specified directory, set the sync option, submit FIM job and wait # until it finishes. # # All other variables are set the FIMnamelist file. #TODO: Remove duplication with other ?subfim scripts! CONTEXT="qsubfim.frostintel" # Source shared-functions code & set up tracing . ./functions.ksh # Most function definitions can be found here. set +o xtrace # comment out to enable verbose qsubfim trace ksh_check # Verify that ksh93 is running/available. if [[ "$#" -eq 0 ]] then sync="no" elif [[ "$#" -eq 1 ]] then test -d "$1" || fail "Run directory not found: $1" cd $1 || fail "Cannot cd to $1" sync="yes" else fail "Too many arguments" fi set_fimnamelist FIMSETUP="fim_setup.ksh" # Make sure FIMnamelist exists test -f "$fimnamelist" || \ fail "Please \"cp ${fimnamelist}.default $fimnamelist\" and edit the latter \ appropriately." # Get SRCDIR & make absolute get_srcdir # This must always come before any other get_* calls cd $SRCDIR || fail "Cannot cd to $SRCDIR." SRCDIR="$PWD" cd - # Set up run directory rundir="qsubfim_$$" mkdir $rundir || fail "Cannot make directory $rundir." print "Made directory $rundir." copyfiles $PWD $rundir || fail "Cannot copy contents of $PWD -> $rundir." copyfiles $SRCDIR/bin $rundir || \ fail "Cannot copy contents of $SRCDIR/bin -> $rundir." cp $SRCDIR/$FIMSETUP $rundir || \ fail "Cannot cpy $SRCDIR/$FIMSETUP -> $rundir" cd $rundir || fail "Cannot cd to $rundir" ksh_fix # Modify run scripts to use ksh93, if necessary. # Get number of cores to ask for ./get_num_cores | grep "num_cores_batch:" | sed 's/^.*://' | read N || \ fail "Could not get num_cores_batch." # Find out if we'll run serial or parallel and set up appropriately get_from_nl ComputeTasks as PES get_from_nl Parallelism as parallelism if [[ "$parallelism" == "parallel" ]] then FIM="fim" ParaSuffix="$PES" else FIM="fimS" ParaSuffix="S" fi # Get build configuration from $SRCDIR ./get_buildconfig.ksh $SRCDIR | read FC || fail "$FC" # Set up run-time environment xsource_notrace ./$FIMSETUP $FC # Determine other runtime parameters get_from_nl GLVL get_from_nl NVL ./GetQueueTime | read QT || fail "GetQueueTime failed." # Choose a run queue # debug queue gives faster turnaround M-Th 9-5 Eastern. Time limit is 1 hour Q="debug" Q="batch" # Do COMPARE_VAR setup compare_var_setup # Diagnostics check_nems ./get_num_cores | grep "num_cores_donothing:" | sed 's/^.*://' | read dnt || \ fail "Could not get num_cores_donothing." ./get_num_cores | grep "num_nodes_wt:" | sed 's/^.*://' | read num_nodes_wt || \ fail "Could not get num_nodes_wt." ./get_num_cores | grep "num_cores_notattached:" | sed 's/^.*://' | read num_cores_notattached || \ fail "Could not get num_cores_notattached." print "Submitting job to queue $Q:" print "compute tasks: $PES" print "write tasks: $nwt (write nodes: $num_nodes_wt)" print "do_nothing tasks: $dnt" print "total core request: $N (no partial nodes)" print "cores unattached: $num_cores_notattached" SUBMIT_CMD="qsub" SUBMIT_ARGS="-N fim${GLVL}_${NVL}_${ParaSuffix} \ -q $Q \ -lnodes=$NODES:ppn=$cpn \ -l walltime=$QT \ -A csc025fsc \ -d $PWD" # Create script for later potential submission to restart the job cat > qsubfim.restart <