- One of the more recent tests (2K x 1 submitter) resulted in 0 held jobs, however there are two jobs that never even started. A hold and release cycle got them started again.
- In a followup 2k x 2 submitter test about 2 hours in 5% of the jobs (204/4000) had gone into various hold states.
HoldReason = "Globus error: GT4_GRAM_JOB_SUBMIT timed out"
...
HoldReason = "Globus error: GT4_GRAM_JOB_DESTROY timed out"
HoldReason = "Globus error: GT4_GRAM_JOB_DESTROY timed out"
HoldReason = "Globus error: GT4_GRAM_JOB_DESTROY timed out"
HoldReason = "Globus error: GT4_GRAM_JOB_DESTROY timed out"
HoldReason = "Globus error: GT4_GRAM_JOB_DESTROY timed out"
HoldReason = "Globus error: GT4_GRAM_JOB_DESTROY timed out"
...
HoldReason = "Globus error: GT4_GRAM_JOB_SUBMIT timed out"
HoldReason = "Globus error: GT4_GRAM_JOB_SUBMIT timed out"
HoldReason = "Globus error: GT4_GRAM_JOB_SUBMIT timed out"
LastHoldReason = "Spooling input data files"
HoldReason = "Globus error 155: the job manager could not stage out a file"
HoldReason = "Globus error 155: the job manager could not stage out a file"
HoldReason = "Globus error 155: the job manager could not stage out a file"
HoldReason = "Globus error 155: the job manager could not stage out a file"
...
HoldReason = "Globus error: org.globus.wsrf.impl.security.authorization.exceptions.AuthorizationException: \"/DC=org/DC=doegrids/OU=People/CN=Terr
ence Martin 525658\" is not authorized to use operation: {http://www.globus.org/namespaces/2004/10/gram/job}createManagedJob on this service"
HoldReason = "Globus error: GT4_GRAM_JOB_DESTROY timed out"
HoldReason = "Globus error: org.globus.wsrf.impl.security.authorization.exceptions.AuthorizationException: \"/DC=org/DC=doegrids/OU=People/CN=Terr
ence Martin 525658\" is not authorized to use operation: {http://www.globus.org/namespaces/2004/10/gram/job}createManagedJob on this service"
|