Skip to content

Commit f43656f

Browse files
committed
v0.1.6
2 parents d2f88af + 0030daa commit f43656f

File tree

12 files changed

+58
-28
lines changed

12 files changed

+58
-28
lines changed

pandaharvester/commit_timestamp.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
timestamp = "04-10-2019 11:57:38 on release (by fahui)"
1+
timestamp = "14-11-2019 12:03:49 on release (by fahui)"

pandaharvester/harvesterbody/monitor.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -374,10 +374,10 @@ def monitor_agent_core(self, lockedBy, queueName, workSpecsList, from_fifo=False
374374
workSpec.checkTime = datetime.datetime.utcnow()
375375
isCheckedList.append(isChecked)
376376
if monStatus == WorkSpec.ST_failed:
377-
if not workSpec.has_pilot_error():
377+
if not workSpec.has_pilot_error() and workSpec.errorCode is None:
378378
workSpec.set_pilot_error(PilotErrors.ERR_GENERALERROR, diagMessage)
379379
elif monStatus == WorkSpec.ST_cancelled:
380-
if not workSpec.has_pilot_error():
380+
if not workSpec.has_pilot_error() and workSpec.errorCode is None:
381381
workSpec.set_pilot_error(PilotErrors.ERR_PANDAKILL, diagMessage)
382382
if monStatus in [WorkSpec.ST_finished, WorkSpec.ST_failed, WorkSpec.ST_cancelled]:
383383
workSpec.set_work_params({'finalMonStatus': monStatus})

pandaharvester/harvesterbody/preparator.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ def run(self):
4343
harvester_config.preparator.checkInterval,
4444
harvester_config.preparator.lockInterval,
4545
lockedBy,
46-
max_files_per_job=maxFilesPerJob,
47-
ng_file_status_list=['ready'])
46+
max_files_per_job=maxFilesPerJob)
4847
mainLog.debug('got {0} jobs to check'.format(len(jobsToCheck)))
4948
# loop over all jobs
5049
for jobSpec in jobsToCheck:

pandaharvester/harvestermessenger/apache_messenger.py

+14-10
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
from pandaharvester.harvestercore import core_utils
3+
from pandaharvester.harvesterconfig import harvester_config
34
from pandaharvester.harvestermessenger import http_server_messenger
45
from pandaharvester.harvestermisc.frontend_utils import HarvesterToken
56

@@ -49,17 +50,20 @@ def application(environ, start_response):
4950
# get params
5051
try:
5152
request_body_size = int(environ.get('CONTENT_LENGTH', 0))
52-
except:
53+
except Exception as e:
54+
_logger.warning('Zero request body due to {0}: {1}'.format(e.__class__.__name__, e))
5355
request_body_size = 0
5456
# check token
55-
try:
56-
auth_str = environ.get('HTTP_AUTHORIZATION', '').split()[-1]
57-
token = HarvesterToken()
58-
payload = token.get_payload(auth_str)
59-
except:
60-
errMsg = 'Auth failed: Invalid token'
61-
start_response('403 Forbidden', [('Content-Type', 'text/plain')])
62-
return [errMsg.encode('ascii')]
57+
if getattr(harvester_config.frontend, 'authEnable', True):
58+
try:
59+
auth_str = environ.get('HTTP_AUTHORIZATION', '').split()[-1]
60+
token = HarvesterToken()
61+
payload = token.get_payload(auth_str)
62+
except Exception as e:
63+
_logger.warning('Invalid token due to {0}: {1}'.format(e.__class__.__name__, e))
64+
errMsg = 'Auth failed: Invalid token'
65+
start_response('403 Forbidden', [('Content-Type', 'text/plain')])
66+
return [errMsg.encode('ascii')]
6367
request_body = environ['wsgi.input'].read(request_body_size)
6468
params = json.loads(request_body)
6569
# make handler
@@ -71,7 +75,7 @@ def application(environ, start_response):
7175
_logger.debug("{0} Phrase".format(handler.responseCode))
7276
start_response("{0} Phrase".format(handler.responseCode), handler.headerList)
7377
return [handler.message]
74-
except:
78+
except Exception:
7579
errMsg = core_utils.dump_error_message(_logger)
7680
start_response('500 Phrase', [('Content-Type', 'text/plain')])
7781
return [errMsg]

pandaharvester/harvestermessenger/http_server_messenger.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,8 @@ def do_POST(self):
172172
self.send_response(500)
173173
message = core_utils.dump_error_message(_logger)
174174
if harvester_config.frontend.verbose:
175-
self.tmpLog.debug('method={0} json={1} msg={2}'.format(methodName, dataStr, message))
175+
self.tmpLog.debug('ip={3} - method={0} json={1} msg={2}'.format(methodName, dataStr, message,
176+
self.client_address[0]))
176177
# set the response
177178
self.do_postprocessing(message)
178179
return

pandaharvester/harvestermonitor/act_monitor.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from pandaharvester.harvestercore import core_utils
55
from pandaharvester.harvestercore.work_spec import WorkSpec
66
from pandaharvester.harvestercore.plugin_base import PluginBase
7+
from pandaharvester.harvestercore.worker_errors import WorkerErrors
78
from pandaharvester.harvesterconfig import harvester_config
89

910
from act.common.aCTConfig import aCTConfigARC
@@ -24,7 +25,11 @@ def __init__(self, **kwarg):
2425

2526
# Set up aCT DB connection
2627
self.log = core_utils.make_logger(baseLogger, 'aCT submitter', method_name='__init__')
27-
self.actDB = aCTDBPanda(self.log)
28+
try:
29+
self.actDB = aCTDBPanda(self.log)
30+
except Exception as e:
31+
self.log.error('Could not connect to aCT database: {0}'.format(str(e)))
32+
self.actDB = None
2833

2934
# get access point
3035
def get_access_point(self, workspec, panda_id):
@@ -68,10 +73,11 @@ def check_workers(self, workspec_list):
6873
method_name='check_workers')
6974
try:
7075
tmpLog.debug('Querying aCT for id {0}'.format(workSpec.batchID))
71-
columns = ['actpandastatus', 'pandastatus', 'computingElement', 'node']
76+
columns = ['actpandastatus', 'pandastatus', 'computingElement', 'node', 'error']
7277
actjobs = self.actDB.getJobs("id={0}".format(workSpec.batchID), columns)
7378
except Exception as e:
74-
tmpLog.error("Failed to query aCT DB: {0}".format(str(e)))
79+
if self.actDB:
80+
tmpLog.error("Failed to query aCT DB: {0}".format(str(e)))
7581
# send back current status
7682
retList.append((workSpec.status, ''))
7783
continue
@@ -85,12 +91,16 @@ def check_workers(self, workspec_list):
8591
actstatus = actjobs[0]['actpandastatus']
8692
workSpec.nativeStatus = actstatus
8793
newStatus = WorkSpec.ST_running
94+
errorMsg = ''
8895
if actstatus in ['waiting', 'sent', 'starting']:
8996
newStatus = WorkSpec.ST_submitted
9097
elif actstatus == 'done':
9198
newStatus = self.check_pilot_status(workSpec, tmpLog)
9299
elif actstatus == 'donefailed':
93100
newStatus = WorkSpec.ST_failed
101+
errorMsg = actjobs[0]['error'] or 'Unknown error'
102+
error_code = WorkerErrors.error_codes.get('GENERAL_ERROR')
103+
workSpec.set_supplemental_error(error_code=error_code, error_diag=errorMsg)
94104
elif actstatus == 'donecancelled':
95105
newStatus = WorkSpec.ST_cancelled
96106

@@ -108,6 +118,6 @@ def check_workers(self, workspec_list):
108118
except:
109119
tmpLog.warning('Could not extract panda ID for worker {0}'.format(workSpec.batchID))
110120

111-
retList.append((newStatus, ''))
121+
retList.append((newStatus, errorMsg))
112122

113123
return True, retList

pandaharvester/harvesterpreparator/go_bulk_preparator.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,11 @@ def check_stage_in_status(self, jobspec):
250250
if fileSpec.scope is not None :
251251
scope = fileSpec.scope
252252
hash = hashlib.md5()
253-
hash.update('%s:%s' % (scope, fileSpec.lfn))
253+
if sys.version_info.major == 2:
254+
hash.update('%s:%s' % (scope, fileSpec.lfn))
255+
if sys.version_info.major == 3:
256+
hash_string = "{0}:{1}".format(scope, fileSpec.lfn)
257+
hash.update(bytes(hash_string, 'utf-8'))
254258
hash_hex = hash.hexdigest()
255259
correctedscope = "/".join(scope.split('.'))
256260
#srcURL = fileSpec.path
@@ -410,7 +414,7 @@ def trigger_preparation(self, jobspec):
410414
tmpLog.debug('Change self.dummy_transfer_id from {0} to {1}'.format(old_dummy_transfer_id,self.dummy_transfer_id))
411415
# set the dummy transfer ID which will be replaced with a real ID in check_stage_in_status()
412416
inFiles = jobspec.get_input_file_attributes(skip_ready=True)
413-
lfns = inFiles.keys()
417+
lfns = list(inFiles.keys())
414418
#for inLFN in inFiles.keys():
415419
# lfns.append(inLFN)
416420
tmpLog.debug('number of lfns - {0} type(lfns) - {1}'.format(len(lfns),type(lfns)))

pandaharvester/harvesterstager/go_bulk_stager.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def check_stage_out_status(self, jobspec):
170170
# get the scope of the log files
171171
outfileattrib = jobspec.get_output_file_attributes()
172172
scopeLog = 'xxxx'
173-
for key in outfileattrib.keys():
173+
for key in list(outfileattrib.keys()):
174174
if "log.tgz" in key :
175175
scopeLog = outfileattrib[key]['scope']
176176
# get transfer groups
@@ -274,7 +274,11 @@ def check_stage_out_status(self, jobspec):
274274
msgStr = "printed first 25 files skipping the rest".format(fileSpec.lfn, fileSpec.scope)
275275
tmpLog.debug(msgStr)
276276
hash = hashlib.md5()
277-
hash.update('%s:%s' % (scope, fileSpec.lfn))
277+
if sys.version_info.major == 2:
278+
hash.update('%s:%s' % (scope, fileSpec.lfn))
279+
if sys.version_info.major == 3:
280+
hash_string = "{0}:{1}".format(scope, fileSpec.lfn)
281+
hash.update(bytes(hash_string, 'utf-8'))
278282
hash_hex = hash.hexdigest()
279283
correctedscope = "/".join(scope.split('.'))
280284
srcURL = fileSpec.path

pandaharvester/harvestersubmitter/htcondor_submitter.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ def _get_prodsourcelabel_pilotypeopt_piloturlstr(pilot_type, pilot_version='1'):
198198
if pilot_version == '2':
199199
# pilot 2
200200
pt_psl_map = {
201-
'RC': ('rc_test2', 'RC', '--piloturl http://project-atlas-gmsb.web.cern.ch/project-atlas-gmsb/pilot2-dev.tar.gz'),
201+
'RC': ('rc_test2', 'RC', '--piloturl http://cern.ch/atlas-panda-pilot/pilot2-dev.tar.gz'),
202202
'ALRB': ('rc_alrb', 'ALRB', ''),
203203
'PT': ('ptest', 'PR', ''),
204204
}

pandaharvester/harvestersweeper/act_sweeper.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@ def __init__(self, **kwarg):
1818
PluginBase.__init__(self, **kwarg)
1919

2020
self.log = core_utils.make_logger(baseLogger, 'aCT sweeper', method_name='__init__')
21-
self.actDB = aCTDBPanda(self.log)
21+
try:
22+
self.actDB = aCTDBPanda(self.log)
23+
except Exception as e:
24+
self.log.error('Could not connect to aCT database: {0}'.format(str(e)))
25+
self.actDB = None
2226

2327

2428
# kill a worker
@@ -44,7 +48,8 @@ def kill_worker(self, workspec):
4448
self.actDB.updateJobs("id={0} AND actpandastatus IN ('sent', 'starting', 'running')".format(workspec.batchID),
4549
{'actpandastatus': 'tobekilled', 'pandastatus': None})
4650
except Exception as e:
47-
tmpLog.error('Failed to cancel job {0} in aCT: {1}'.format(workspec.batchID, str(e)))
51+
if self.actDB:
52+
tmpLog.error('Failed to cancel job {0} in aCT: {1}'.format(workspec.batchID, str(e)))
4853
return False, str(e)
4954

5055
tmpLog.info('Job {0} cancelled in aCT'.format(workspec.batchID))

pandaharvester/panda_pkg_info.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
release_version = "0.1.5"
1+
release_version = "0.1.6"

templates/panda_harvester.cfg.rpmnew.template

+3
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,9 @@ verbose = False
684684
# type : simple or apache
685685
type = simple
686686

687+
# enable token authentication of apache frontend; default is True
688+
authEnable = True
689+
687690
# file of secret used in token signature
688691
secretFile = /FIXME
689692

0 commit comments

Comments
 (0)