Changes need to port OSG modifications to GRAM5 jobmanager-condor

Overview

OSG uses a patched/augmented version jobmanager-condor (condor.in->condor.pm). The jobmanager-condor changed slightly between GT2 and GRAM5, so we had to merge the two.

-- IgorSfiligoi - 2009/11/16

The changes implemented to the UCSD GT2 condor.pm file from the GRAM5 Beta1 condor.in file are as follows (bear in mind, this is not currently a working file and still requires further patching).

- (line 9)

use Config;

-/+ (line 16) my ($condor_submit, $condor_rm, $condor_config);

+ (line 22)

$condor_config = '@CONDOR_CONFIG@';
$condor_check_vanilla_files
= '@CONDOR_CHECK_VANILLA_FILES@';
$condor_mpi_script = '@CONDOR_MPI_SCRIPT@';
if ($condor_config ne '')
{
$ENV{CONDOR_CONFIG} = $condor_config;
}

+ (line 150)

elsif($description->jobtype() eq 'mpi' && $condor_mpi_script ne 'no')
{
$universe = 'parallel';
}

-/+ (line 176)

if($universe eq 'standard' || $condor_check_vanilla_files eq 'yes')

-/+ (line 203)

foreach my $tuple ($description->environment())

-/+ (line 268)

$argument_string = '"' . join(' ',
map
{
$_ =~ s/'/''/g;
$_ =~ s/"/""/g;
$_ = "'$_'";
}
@arguments) . '"';

-/+ (line 355)

my $shouldtransferfiles = $description->shouldtransferfiles();
if (defined($shouldtransferfiles))
{
$self->log("Adding \"should_transfer_files = $shouldtransferfiles\"\n");
print SCRIPT_FILE "should_transfer_files = $shouldtransferfiles\n";
}
my $WhenToTransferOutput = $description->whentotransferoutput();
if (defined($WhenToTransferOutput))
{
$self->log("Adding \"WhenToTransferOutput = $WhenToTransferOutput\"\n");
print SCRIPT_FILE "WhenToTransferOutput = $WhenToTransferOutput\n";
}
my $transfer_input_files = $description->transferinputfiles();
if (defined($transfer_input_files))
{
$self->log("Adding explicitly \"transfer_input_files = "."$transfer_input_files\"\n");
print SCRIPT_FILE "transfer_input_files = $transfer_input_files\n";
}
else
{
my @transfer_input_files = $description->transferinputfiles();
if (defined($transfer_input_files[0]))
{
my $file_list_string = "";
foreach my $file (@transfer_input_files)
{
$file_list_string .= "$file, ";
}
$file_list_string ~ s/, $//;
$self->log("Adding \"transfer_input_files = $file_list_string\"\n");
print SCRIPT_FILE "transfer_input_files = $file_list_string\n";
}
}
my $transfer_output_files = $description->transferoutputfiles();
if (defined($transfer_output_files))
{
$self->log("Adding explicitly \"transfer_output_files = "."$transfer_output_files\"\n");
print SCRIPT_FILE "transfer_output_files = $transfer_output_files\n";
}
else
{
my @transfer_output_files = $description->transferoutputfiles();
if (defined($transfer_output_files[0]))
{
my $file_list_string = "";
foreach my $file (@transfer_output_files)
{
$file_list_string . "$file, ";
}
$file_list_string =~ s/, $//;
$self->log("Adding \"transfer_output_files = "."$file_list_string\"\n");
print SCRIPT_FILE "transfer_output_files = $file_list_string\n";
}
}
if ($universe eq 'parallel')
{
print SCRIPT_FILE "Output = " . $description->stdout() . "\n";
print SCRIPT_FILE "Output = " . $description->stderr() . "\n";
print SCRIPT_FILE "machine_count = " . $description->count() . "\n";
print SCRIPT_FILE "queue\n";
}
else
{
for (my $i = 0; $i < $description->count(); $i++) {
if ($multi_output) {
print SCRIPT_FILE "Output = " .
$self->{STDIO_MERGER}->add_file('out') . "\n";
print SCRIPT_FILE "Error = " .
$self->{STDIO_MERGER}->add_file('err') . "\n";
} else {
print SCRIPT_FILE "Output = " . $description->stdout() . "\n";
print SCRIPT_FILE "Error = " . $description->stderr() . "\n";
}
print SCRIPT_FILE "queue 1\n";
}
}

These are the GRAM5 changes, below are the NSF-Lite changes

+ (line 28)

my $isNFSLite = 1; # Flag to tell if we are using NFS lite. 1 or true for yes
my $scratch_isset = 0; # Flag if the SCRATCH_DIRECTORY environment variable is set indicating likely GRAM job

-/+ (line 216)

# NFS lite start
if ($isNFSLite) {
my $osg_grid = '';
my $use_osg_grid = 1;
map {
if ($_->[0] eq "OSG_GRID") {
$osg_grid = $_->[1];
} elsif ($_->[0] eq "OSG_DONT_USE_OSG_GRID_FOR_GL") {
$use_osg_grid = 0;
} elsif ($_->[0] eq "LOGNAME") {
$logname = $_->[1];
} elsif ($_->[0] eq "SCRATCH_DIRECTORY") {
$scratch_isset = 1;
$scratch_directory = $_->[1];
$_->[1] = '$_CONDOR_SCRATCH_DIR';
} elsif ($_->[0] eq "X509_USER_PROXY") {
$_->[0] = "CHANGED_X509";
}
} @environment;
# If this is an OSG installation, we set GLOBUS_LOCATION based on OSG_GRID
if ($osg_grid ne '') {
map {
if ($use_osg_grid && $_->[0] eq "GLOBUS_LOCATION") {
$_->[1] = $osg_grid . "/globus";
}
} @environment;
}
if ($scratch_isset) {
# Remote_InitialDir apparently suppresses the setting of the SCRATCH_DIRECTORY env variable
$wrapper_arguments .= " -wrapper_iwd " . ' $_CONDOR_SCRATCH_DIR'; # UCSD Mod
push(@environment,["MY_INITIAL_DIR",'$_CONDOR_SCRATCH_DIR']);
}
elsif ( $description->directory() ~ m/.+$logname/xms ) {
# If the directory ends with the logname it might be a globus-job-run job
# so take control of the initial_dir
$wrapper_arguments . " -wrapper_iwd " . ' $_CONDOR_SCRATCH_DIR'; # UCSD Mod
push(@environment,["MY_INITIAL_DIR",'$_CONDOR_SCRATCH_DIR'] );
}
else {
# assume that remote_initialdir is set and the submitter knows what they are
# doing.
$wrapper_arguments .= " -wrapper_iwd " . $description->directory(); # UCSD Mod
push(@environment,["MY_INITIAL_DIR",$description->directory()] );
}
}
# NFS Lite End
# START UCSD Mods
# Setup for groups by matching the logname to the appropriate condor group
#$AccountingGroup = "group_other." . $logname;
#if ($logname =~ /.*cms.*/) { $AccountingGroup = "group_cms." . $logname; }
#if ($logname =~ /.*cmsprod.*/) { $AccountingGroup = "group_cmsprod." . $logname; }
#if ($logname =~ /.*cdf.*/) { $AccountingGroup = "group_cdf." . $logname; }
#if ($logname =~ /.*caf.*/) { $AccountingGroup = "group_cdf." . $logname; }
# STOP UCSD Modification

-/+ (line 334)

print SCRIPT_FILE "Requirements = OpSys? = \"LINUX\" && (Arch \"X86_64\" || Arch =\"INTEL\")\n"; # UCSD Mod

+ (line 345)

# START UCSD Modification
print SCRIPT_FILE "Arguments = $argument_string $wrapper_arguments\n"; # UCSD added wrapper args
# END UCSD Modification

+ (line 349)

$description->save("/var/tmp/description-". $description->uniq_id() . ".desc");
print SCRIPT_FILE "+AccountingGroup = \"$AccountingGroup\"\n"; # UCSD
# NFS Lite mode
if ($isNFSLite) {
print SCRIPT_FILE "should_transfer_files = YES\n";
print SCRIPT_FILE "when_to_transfer_output = ON_EXIT\n";
print SCRIPT_FILE "transfer_output = true\n";
# Lets test to make sure the scratch directory exists
if ( -d "$scratch_directory" ) {
$self->logMe("$scratch_directory found!");
}
else {
$self->logMe("$scratch_directory NOT found!");
}
# GRAM Files to transfer to the worker node
# Only do this if we are dealing with a GRAM job that has set up a scratch area
# otherwise we assume it is a globus-job-run or the users is using remote_initialdir
$self->logMe($scratch_directory);
if ( $scratch_isset && !( $self->isWSGramGlobus() ) ) {
$self->logMe("Passed the test");
if ( $self->isWSGramCondorG() ) {
$scratch_directory = $description->directory();
$self->logMe($scratch_directory);
}
my $sdir;
$self->logMe($scratch_directory);
opendir($sdir,$scratch_directory);
my @sfiles = grep { !/^\./} readdir($sdir);
$self->logMe(@sfiles);
close $sdir;

print SCRIPT_FILE "transfer_input_files = ";
SFILE:
foreach $f ( @sfiles ) {
$f =~ s{\/\/}{\/}g;
$f = $scratch_directory . "/" . $f;
next SFILE if $f eq $description->executable();
next SFILE if $f eq $description->stdin();
next SFILE if $f eq $description->stdout();
push (@flist,"$f");
}
print SCRIPT_FILE join(",",@flist);
print SCRIPT_FILE "\n";
}
}
# End NFS Lite Mode

+ (line 354)

# UCSD Mode, placed here to over ride the attempt by users to override the following
# 36 hours
print SCRIPT_FILE "maxRunTime = 129600\n";
# Two Weeks
print SCRIPT_FILE "maxQTime = 1209600\n";
print SCRIPT_FILE "periodic_remove = (RemoteWallClockTime? > \$(maxRunTime)) || ((QDate - CurrentTime? ) > \$(maxQTime))\n";
# UCSD Mode Ends

+ line (370)

### UCSD MOD ###
# Save the script we make
my $tmpfname = "/var/tmp/script-" . $description->uniq_id() . ".script";
system("/bin/cp $script_filename $tmpfname");
### END UCSD MOD ###

+ (line 625) my $condor_version_number = 0;

+ (line 627)

unless ($condor_version_number) {
my $condor_version_string = `condor_version 2>/dev/null`;
$condor_version_number =
join("",
map { m&^(\d+)&?sprintf("%03d", $1):"000" }
($condor_version_string =~ m!^\$CondorVersion: ([^\.]+)\.([^\.]+)\.([^\.]+)!s));
return 1 unless ($condor_version_number < 6009000);
}

-/+ (line 659) sub isWSGramCondorG {

my $self = shift;
my $description = $self->{JobDescription};
$self->logMe("Inside isWSGramCondorG");
my $jobcredentialendpoint = "";
$jobcredentialendpoint = $description->jobcredentialendpoint();
$self->logMe("Got Job Credential $jobcredentialendpoint");
if ( !($self->isWSGramGlobus() ) && ($jobcredentialendpoint ne "") ) {
$self->logMe("Inside isWSGramCondorG test true");
return 1;
}
else {
$self->logMe("Failed to get job credential");
return 0;
}
}

sub logMe {

my $self = shift;
my $description = $self->{JobDescription};
my $entry = shift;
my $unid = $description->uniq_id();
open(LOG,">>/var/tmp/logme-$unid.log") || die "Could not open file /var/tmp/logme-$unid.log:$!\n";
print LOG $entry;
print LOG "\n";
close LOG;
}

sub isWSGramGlobus {

my $self = shift;
my $description = $self->{JobDescription};
$self->logMe("Inside isWSGramGlobus");
my $extensions = $description->extensions();
$self->logMe("Extensions $extensions");
if ($extensions =~ /globusrun/) {
$self->logMe("Yes WSGramGlobus? ");
return 1;
}
else {
$self->logMe("No WSGramGlobus? ");
return 0;
}

}

-- ChristopherTheissen - 2009/11/25

Edit | Attach | Print version | History: r6 | r4 < r3 < r2 < r1 | Backlinks | Raw View | Raw edit | More topic actions...
Topic revision: r2 - 2009/11/25 - 07:13:15 - ChristopherTheissen
 
This site is powered by the TWiki collaboration platformCopyright © by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding TWiki? Send feedback