#!/usr/bin/perl # tweak_hostfile - reconfigure the hostfile for particular settings # changes are done inplace to $MACHINE_FILE or # all files listed on the command line # use strict; use Getopt::Std; my %options=(); getopts("n:p:g:i:s:e:hd",\%options); my $debug; my $ionodes; my $start_ionodes; my $end_ionodes; my $hostspernode; my $procset; my @grid; if (defined($options{h})) { &print_help(); } if (defined($options{d})) { $debug=1; } if (defined($options{i})) { $ionodes=$options{i}; } if (defined($options{p})) { $procset=$options{p}; } if (defined($options{s})) { $start_ionodes=$options{s}; } if (defined($options{e})) { $end_ionodes=$options{e}; } if (defined($options{n})) { $hostspernode=$options{n}; } else { $hostspernode=-1; } if (defined($options{g})) { if($options{g} =~ /\A([1-9][0-9]*)x([1-9][0-9]*):([1-9][0-9]*)x([1-9][0-9]*)\z/) { @grid=($1,$2,$3,$4); } else { print STDERR "ERROR: You specified an invalid option for -g; you must specify -g NxM:OxP where N, M, O and P are positive integers."; } } if ((defined($options{s})) && (defined($options{i}))) { print STDERR "Warning, you specified both -s and -i, which are mutually exclusive. Ignorning the -i option.\n"; $ionodes=0; } if ((defined($options{e})) && (defined($options{i}))) { print STDERR "Warning, you specified both -e and -i, which could conflict. If -i and -e are needed, you should really use -s, not -i. Attempting to follow orders\n"; } if (defined($ENV{"PBS_NODEFILE"})) { print("Found MACHINE_FILE: $ENV{\"PBS_NODEFILE\"}\n") if $debug; my $found=0; my $s; foreach $s (@ARGV) { if ($s eq $ENV{"PBS_NODEFILE"}) { print("WARNING: The file listed on the command line ($s) matches the file set in \$PBS_NODEFILE, ignorning \$PBS_NODEFILE\n"); $found=1; } } if (!$found) { unshift(@ARGV, $ENV{"PBS_NODEFILE"}); } } # Turn $start_ionodes and $end_ionodes into an array my @start_ioar=(); my $startiolen=0; my @end_ioar=(); my $endiolen=0; if ($start_ionodes ne "") { @start_ioar=split(/:/,$start_ionodes); $startiolen=@start_ioar; print "Start_io Array @start_ioar, $startiolen\n", if $debug; } if ($end_ionodes ne "") { @end_ioar=split(/:/,$end_ionodes); $endiolen=@end_ioar; print "End_io Array @end_ioar, $endiolen\n", if $debug; } my $tmpfn="/tmp/tweak_tmp.$$"; print("Using $tmpfn for temporary data\n") if $debug; foreach (@ARGV) { my $hostfn=$_; print "file: $hostfn\n" if $debug; my %nodelist; my @nodear; open(FP,"<$hostfn") || die "Unable to open $_\n"; while() { chomp($_); if (!defined($nodelist{$_})) { $nodelist{$_}=1; @nodear=(@nodear, $_); } else { $nodelist{$_}++; } } close(FP); # Now create ane new file open(oFP,">$tmpfn") || die "Unable to open temporary file $tmpfn for writing\n"; my $k; my $pos=0; my $total_length=@nodear; my $inode; my ($inode,$x,$y,$j,$step,$i,$rep); my $ingrid=0; my $procleft=$procset; my $procnodes; for($inode=0;$inode<=$#nodear;$inode+=$step) { $step=1; $k=$nodear[$inode]; my $cnt; print("$k $pos $ionodes $total_length $step $inode\n") if $debug; if ($pos<$ionodes) { $cnt=1; } elsif ($pos<$startiolen) { print("Tweaking start_ioar $pos\n") if $debug; $cnt=$start_ioar[$pos]; } elsif ($pos>=$total_length-$endiolen) { print("Tweaking end_ioar $pos $total_length $endiolen\n") if $debug; $cnt=$end_ioar[$total_length-$pos-1]; } elsif ($#grid<0 || $ingrid==2) { if(defined($procset)) { if(defined($procnodes)) { # We already started distributing the processors $cnt=int($procleft/$procnodes + 0.5); $procnodes=$procnodes-1; $procleft-=$cnt; print(" ... put $cnt on node $inode, leaving $procleft for $procnodes nodes...\n") if $debug; } else { # Distribute $procset processors over the # remaining non-end (-e) nodes. $procnodes=$total_length-$endiolen-$inode; $procleft=$procset; print("Distribute $procset processors over $procnodes nodes ...") if $debug; $cnt=int($procleft/$procnodes + 0.5); $procleft-=$cnt; $procnodes=$procnodes-1; print(" ... put $cnt on node $inode, leaving $procleft for $procnodes nodes...\n") if $debug; } } elsif ($hostspernode <=0) { # if specific process count is not specified, don't change it $cnt=$nodelist{$k}; } else { $cnt=$hostspernode; } } else { $ingrid=1; } if($ingrid==1) { $step=$grid[0]*$grid[1]; if($pos+$step > $total_length-$endiolen) { die "ABORT!! Tried to use more nodes than are allocated (your grid is too big). Machine file is unmodified.\n"; } print("Writing $grid[0] x $grid[1] quilt of $grid[2] x $grid[3] patches starting with node $nodear[$inode]\n") if $debug; $pos+=$step; for($j=$inode;$j<$inode+$step;$j++) { if($grid[2]*$grid[3]>$nodelist{$nodear[$j]}) { print("tweak_hostfile: WARNING, you are requesting that we write more hosts out for host $nodear[$j] than were originally available. Blissfully following orders....\n"); } } for($y=0;$y<$grid[1];$y++) { for($rep=0;$rep<$grid[3];$rep++) { for($x=0;$x<$grid[0];$x++) { $j=$y*$grid[0]+$x+$inode; print( oFP ( "$nodear[$j]\n" x $grid[2] ) ); } } } $ingrid=2; } else { $pos++; if ($cnt > $nodelist{$k}) { print("tweak_hostfile: WARNING, you are requesting that we write more hosts out for host $k than were originally available. Blissfully following orders....\n"); } print("Writing $k, $cnt times\n") if $debug; print(oFP ( "$k\n" x $cnt )); } } close(oFP); # Now, move the file to the original file system("mv $tmpfn $hostfn"); } sub print_help { print("\n"); print(" tweak_hostfile -n -i -h ... -n , specify an integer for the number of processes you want to execute on each host -p , distribute num_processes processes over the nodes that are not used by the -i, -s, -g or -e below. These processes will occur after -i, -s and -g processors, but before -e processors. If -n and -p are both specified, -p overrides and -n is ignored. -i , specify and integer for the number of hosts, from the beginning of the list, that should be listed only once for use as IO nodes. -s , specify integers separated by colons for each host starting with the first on how many cores will be used for each host. For example, \"-s 1\" is the same as \"-i 1\". If you wanted to skip use of the first 4 hosts, you could do \"-s 0:0:0:0\". If you wanted to schedule only 2 processes on the first 3 hosts, you could do \"-s 2:2:2\". -e , specify integers separated by colons for each host starting with the last to specify number of cores. Similar to the \"-s\" option, but this starts with the last host and counts backwards. For example, \"-e 1\" specifies that only one process will be on the last host. Using \"-e 1:2\", speifies the last host has one process, and the 2nd to last has 2 processes. Note that with this option, the hosts are identified from last moving forward. -g NxM:OxP, creates an NxM quilt of nodes, each of which contains an OxP patch of processes. Any nodes left over after this will be handled by -n or -e. For use with WRF nprocs_x and nprocs_y. Set nprocs_x=N*O and nprocs_y=M*P. -h This message -d Debugging information "); exit(1); }