#!/usr/bin/python
# Usage : python dq2bulkreg.py [task id]
# Query the production database for datasets and lfns.
# Query the RLS for guids.
# Register the lfns and guids to datasets 
#
# When modifying the code, be CAREFUL. The splitting 
# of registration in subblocks makes it a bit intricate.

import getpass
import commands
import sys,re
import cx_Oracle
import math
from xml.dom import minidom

def printErrorMsg(msg,qry):
  print msg
  print 
  print qry
  print
  print '--- Failed. Clean up and try again.'
  sys.exit(-1)

def getLFNandDS(dbuser,dbpasswd,dbname,taskfk):
  print '... Getting lfns and dataset names from production database.'
  files = []
  # Define sql query.
  sql1  = 'select jobname, controlstatus, currentstatus, joboutputs,'
  sql1 += 'joblogs from ejobdef where taskfk=' + taskfk + ' order by jobname'
  print '...',sql1
  connection = cx_Oracle.connect("%s/%s@%s" % (dbuser, dbpasswd, dbname));
  cur1 = connection.cursor()
  cur1.execute(sql1)
  entries = cur1.fetchall()
  for entry in entries:
    if (entry[2] == 'DONE'):
      # Some jobs have had faulty xml
      try:
        xmlJobOutput = minidom.parseString(entry[3])
      except:
        tekst = "<dummyroot>" + entry[3] + "</dummyroot>"
        xmlJobOutput = minidom.parseString(tekst)
      try:
        xmlJobLogs   = minidom.parseString(entry[4])
      except:
        tekst = "<dummyroot>" + entry[4] + "</dummyroot>"
        xmlJobLogs = minidom.parseString(tekst)
      outputfiles  = xmlJobOutput.getElementsByTagName ("joboutput")
      outputfiles += xmlJobLogs.getElementsByTagName ("joblogs")
      for file in outputfiles:
        lfn = file.getElementsByTagName("lfn")[0].firstChild.data
        # Use next line to register only one dataset
#        if not re.search(r'log',lfn) : continue
        ds  = file.getElementsByTagName("dataset")[0].firstChild.data
        files.append([lfn,ds])
  cur1.close()
  return files

def getGUIDs(RLSurl,files):
  # Be aware that globus returns last guid first
  print '... Getting guids from RLS.'
  dq_guids = []
  cmd = 'globus-rls-cli attribute bulk query dq_guid lfn ' 
  for i in range(len(files)):
    cmd += files[i][0] + ' '   
  cmd += RLSurl
  query=commands.getstatusoutput(cmd)
  if not query[0]==0:
    print cmd
    print
    print '--- globus-rls-cli dq_guid query failed.'
    print '--- No registration done.'
    print query[1]
    sys.exit(-1)
  else:
    guid_string = query[1].split()
    for i in range(3,len(guid_string),4):
      dq_guids.insert(0, guid_string[i])
  return dq_guids

def register(files,dq_guids,DsLoc,dataset):
  print '... Registering new datasets and adding files.'
  cmd = 'dq2 registerFilesInDataset ' + dataset
  i=0
  for file in files:
    if dataset != file[1]: 
      if i!=0:
        #print cmd
        qry=commands.getstatusoutput(cmd)
        if not qry[0]==0: printErrorMsg(cmd,qry[1])
      dataset = file[1]	
      newcmd = 'dq2 registerNewDataset ' + dataset
      #print 'pr1 ', newcmd	
      qry=commands.getstatusoutput(newcmd)
      if not qry[0]==0: printErrorMsg(newcmd,qry[1])
      newcmd = 'dq2 registerDatasetLocation -i ' + dataset +' '+DsLoc
      #print 'pr2 ', newcmd	
      qry=commands.getstatusoutput(newcmd)
      #print 'pr3'
      if not qry[0]==0:
        #print 'pr3a'
        printErrorMsg(newcmd,qry[1])
        #print 'pr3d'
      #print 'pr4'
      cmd = 'dq2 registerFilesInDataset ' + dataset
    cmd += ' ' + file[0] + ' ' + dq_guids[i]
    i += 1
  #print 'pr5', cmd
  qry=commands.getstatusoutput(cmd)
  #print 'pr6'
  if not qry[0]==0: printErrorMsg(cmd,qry[1])
  return dataset


# MAIN PROGRAM
if len(sys.argv[1:]) != 1:
  print 'Usage : python dq2bulkreg.py [TASKID]'
  sys.exit(-1)

RLSurl = 'rls://atlasrls.nordugrid.org:39281'      	# RLS Server
DSLoc  = 'NDGFDISK'             	                # Dataset Loacation
dbname = 'atlas_prodsys'
dbuser = 'atlas_ps_adhoc'
lblock = 500                                            # Max block size  

dbpasswd = getpass.getpass('Database password:')            # Get the database password
files    = getLFNandDS(dbuser,dbpasswd,dbname,sys.argv[1])  # Query prodsys database
files.sort()

nblocks = math.ceil(len(files)/float(lblock))              
rest=0
dataset=''
for i in range(nblocks):
   if i==nblocks-1: rest=int(nblocks*lblock-len(files))
   dq_guids = getGUIDs(RLSurl,files[i*lblock:(i+1)*lblock-rest])  # Query RLS 
   dataset = register(files[i*lblock:(i+1)*lblock-rest],dq_guids,DSLoc,dataset)
