-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathsdb_getphot.py
More file actions
executable file
·444 lines (380 loc) · 18 KB
/
sdb_getphot.py
File metadata and controls
executable file
·444 lines (380 loc) · 18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
#!/usr/bin/env python3
"""Extract photometry from command line
When run from command line the arguments can either specify a number of
individual sdbids, or a sample which resides in a table in
sed_db_samples.
"""
import argparse
import re
from collections import OrderedDict
from os import mkdir,rename,remove,utime
from os.path import isdir,isfile,getmtime,basename,dirname
import glob
import hashlib
import filelock
import numpy as np
from astropy.table import Table,vstack,unique,Column
import mysql.connector
import sqlite3
import config as cfg
import sdf.db
import sdf.filter
def sdb_write_rawphot(file,tphot,tspec):
"""Write raw photometry to a file.
Takes filename to write to, and astropy Tables with photometry and
information on spectra. Each contains metadata that is printed out
as key=value. As the spectra are printed as instrument=file these
are stored in reverse in the meta dictionary (i.e. instrument
non-unique)
TODO: Table.write can do the meta, they need to be "comments" and
"keywords" entries in the meta dict.
"""
fh = open(file,'w')
print('\ photometry/spectra for '+tphot.meta['keywords']['id']['value']+'\n\\',file=fh)
tphot.write(fh, format='ascii.ipac',
formats={'Phot': '%6.4g', 'Err': '%6.4g', 'Sys': '%6.4g'})
fh.close()
def filehash(file):
"""Return an md5 hash for a given file, ignoring IPAC comments."""
hasher = hashlib.md5()
buf = b''
with open(file,'r') as f:
for text in f:
if len(text) <= 2: # "\" and newline at a minimum
pass
elif text[0:2] == '\ ':
pass
else:
# print(len(text),"|"+text+"|")
buf += text.encode()
hasher.update(buf)
return hasher.hexdigest()
def sdb_getphot_one(id, verb=False):
"""Extract photometry and other info from a database.
Results are put in text files within sub-directories for the given
id. Assuming some photometry exists, a 'public' directory is always
created, and as many "private" ones as needed are created to ensure
that private data are separated from public data and each other. The
private directories also get an .htaccess file that requires a user
to be in the group of the same name to view the contents.
"""
# set up connection
if cfg.db['type'] == 'sqlite':
cnx, cursor = sdf.db.get_cnx(':memory:')
elif cfg.db['type'] == 'mysql':
cnx, cursor = sdf.db.get_cnx(cfg.db['db_sdb'])
if cfg.db['type'] == 'mysql':
# allow GROUP BY used by some xmatch queries
cursor.execute("SET SESSION SQL_MODE = REPLACE(@@SQL_MODE,'ONLY_FULL_GROUP_BY','');")
elif cfg.db['type'] == 'sqlite':
cursor.execute(f'ATTACH DATABASE "{cfg.db["path"]}photometry.db" AS photometry')
# set up temporary table with what we'll want in the output
cursor.execute("CREATE TEMPORARY TABLE fluxes ( Band varchar(15) NOT NULL DEFAULT '', "
"Phot double DEFAULT NULL, Err double DEFAULT 0.0, Sys double DEFAULT 0.0, "
"Lim int(1) NOT NULL DEFAULT '0', Unit varchar(10) NOT NULL DEFAULT '', "
"bibcode varchar(30) NOT NULL DEFAULT '', "
"Note1 varchar(100) NOT NULL DEFAULT '', Note2 varchar(100) NOT NULL DEFAULT '', "
"SourceID varchar(100) DEFAULT NULL, private int(1) NOT NULL DEFAULT '0', "
"exclude int(1) NOT NULL DEFAULT '0');")
# get sdbid and xids
cursor.execute(f"SELECT DISTINCT sdbid FROM xids WHERE xid='{id}';")
if cursor.rowcount > 1:
print("Found multiple sdbids for given ID in xids {}, exiting".format(id))
exit()
elif cursor.rowcount == 0:
print("No sdbid for ID in xids {}, exiting".format(id))
exit()
sdbid = cursor.fetchall()[0][0]
# make cross id table to match on
cursor.execute('DROP TABLE IF EXISTS TheIDs;')
cursor.execute(f"CREATE TEMPORARY TABLE TheIDs AS SELECT * from xids WHERE sdbid = '{sdbid}';")
cursor.execute('CREATE INDEX xidind ON TheIDs (xid);')
# now add photometry to that table, use extra cursor
cnx1, cursor1 = sdf.db.get_cnx(cfg.db['db_sdb'])
cursor1.execute('SELECT incl,`table`,xid,band,col,ecol,syserr,'
'lim,bibcode,unit,com1,com2,'
'exclude,exclude_join,extra,private FROM xmatch;')
for (incl,table,xid,band,col,ecol,sys,lim,bib,unit,c1,c2,
excl,excl_join,extra,priv) in cursor1:
if incl != 1:
continue
# columns to select
# stmt = 'Insert INTO fluxes SELECT DISTINCT '+band+', '+col+', '+ecol+', '+sys+', '+lim+', %(unit)s, '+bib+', '+c1+', '+c2+', TheIDs.xid, %(priv)s'
stmt = f'Insert INTO fluxes SELECT DISTINCT {band}, {col}, {ecol}, {sys}, {lim}, ' \
f"'{unit}', {bib}, {c1}, {c2}, TheIDs.xid, {priv}"
# excludes
stmt += ',IF(0 '
# table excludes, if band is present include must be null
# (i.e. we are not actually including instead)
if excl_join != None:
stmt += ' OR IF(IF(exclude_band IS NOT NULL,1,0) AND IF(include IS NULL,1,0),1,0)'
# column based excludes (which can be overridden by joined table)
if excl != None and excl_join is None:
stmt += ' OR ('+excl+')'
if excl != None and excl_join != None:
stmt += ' OR ( ('+excl+') AND IF(include is NOT NULL,0,1))'
stmt += ',1,0)'
# main join
stmt += ' FROM TheIDs LEFT JOIN '+table+' ON TheIDs.xid = '+xid
# join exlude table if exists
if excl_join != None:
stmt += ' LEFT JOIN phot_exclude ON ('+excl_join+'=phot_exclude.join_id ' \
'AND '+band+'=phot_exclude.exclude_band AND '+bib+'=phot_exclude.exclude_ref)'
# require flux column not null
stmt += ' WHERE '+col+' IS NOT NULL'
# any extra conditions for WHERE
if extra != None:
stmt += ' '+extra
# db-specific edits
if cfg.db['type'] == 'sqlite':
stmt = stmt.replace('IF(', 'IIF(')
elif cfg.db['type'] == 'mysql':
stmt = stmt.replace('LIKE', 'BINARY LIKE')
if verb:
print(stmt)
cursor.execute(stmt)
cursor1.close()
cnx1.close()
# now get the fluxes
cursor.execute("SELECT DISTINCT * FROM fluxes;")
tphot = Table(names=[desc[0] for desc in cursor.description],
dtype=('S15','f','f','f','i1','S10','S25','S200','S200','S100','i1','i1'))
for row in cursor:
try:
tphot.add_row(row)
except:
print("ERROR: "+sdbid+" not exported, returning.")
print("Probably wierd (+meaningless) characters in row\n",row)
return()
# fix any NULL values that went to nan
tphot['Err'][np.invert(np.isfinite(tphot['Err']))] = 0.0
tphot['Sys'][np.invert(np.isfinite(tphot['Sys']))] = 0.0
# add wavelengths, sort, and remove
tphot['wave'] = np.zeros(len(tphot))
for i,filt in enumerate(tphot['Band']):
if sdf.filter.iscolour(filt):
f = sdf.filter.Colour.get(filt)
else:
f = sdf.filter.Filter.get(filt)
tphot['wave'][i] = f.mean_wavelength
tphot.sort('wave')
tphot.remove_column('wave')
# ready these to receive meta
tphot.meta['keywords'] = {}
tphot.meta['comments'] = []
# get some addtional stellar data
cursor.execute("SELECT main_id,raj2000,dej2000,sp_type,sp_bibcode,"
"COALESCE(simbad.plx_value,gaia_dr2.plx) AS plx_value,"
"COALESCE(simbad.plx_err,gaia_dr2.e_plx) AS plx_err,"
"COALESCE(plx_bibcode,CASE WHEN gaia_dr2.plx IS NULL THEN NULL ELSE '2018yCat.1345....0G' END) AS plx_bibcode "
"FROM sdb_pm LEFT JOIN simbad USING (sdbid) LEFT JOIN gaia_dr2 USING (sdbid) "
f"WHERE sdbid='{sdbid}';")
vals = cursor.fetchall()
keys = [desc[0] for desc in cursor.description]
if len(vals) > 0:
tphot.meta['keywords'] = OrderedDict( zip(keys,tuple(vals[0])) )
tphot.meta['keywords']['id'] = sdbid
# look in xids for an id if nothing from simbad table
if tphot.meta['keywords']['main_id'] is None:
cursor.execute(f"SELECT DISTINCT xid FROM xids WHERE sdbid='{sdbid}' "
"ORDER BY xid LIMIT 1")
if cursor.rowcount == 1:
tphot.meta['keywords']['main_id'] = cursor.fetchall()[0][0]
# get aors for any spectra and add file names
cursor.execute('SELECT instrument,aor_key,bibcode,private,IFNULL(exclude,0) as exclude FROM spectra '
'LEFT JOIN spectra_exclude USING (aor_key,instrument) '
f"WHERE sdbid = '{sdbid}' ORDER BY aor_key DESC;")
tspec = Table(names=[desc[0] for desc in cursor.description],
dtype=('S20','i8','S19','i1','i1'))
for row in cursor:
tspec.add_row(row)
cursor.close()
cnx.close()
# mark photometry duplicates to be excluded from fitting
bs,js,ns = np.unique(tphot['Band'],
return_inverse=True,return_counts=True)
for i,(band,n) in enumerate(zip(bs,ns)):
if n == 1 or band not in cfg.phot['merge_dupes']:
continue
# indices where this band are in the phot table
bib_i = np.where(js == i)[0]
bibcodes = tphot['bibcode'][bib_i]
srt = np.flipud(np.array(np.argsort(bibcodes)))
# reversed argsort of these indices, this puts the highest dates
# first, but alphabetical entries come first at this point
srt = bib_i[srt]
# now shift any non-[0-9] starting indices to the end
r = re.compile('^[0-9]')
vmatch = np.vectorize(lambda x:bool(r.match(x.decode())))
num = vmatch(tphot['bibcode'][srt])
if np.sum(num) < len(srt) and np.sum(num) > 0:
num = np.where(num)[0]
srt = np.roll(srt,len(srt)-np.min(num))
# set exclude for everything beyond the first entry, unless any
# of these were alredy excluded, in which case everything beyond
# the first included measurement is excluded
one_ok = False
for j in srt:
if one_ok is False and tphot['exclude'][j] == 0:
one_ok = True
else:
tphot['exclude'][j] = 1
# add empty column for file names
add = np.chararray(len(tspec),itemsize=200)
add[:] = ''
tspec.add_column(Column(add,name='file'))
for aor in tspec['aor_key']:
for inst in cfg.spectra.keys():
loc,g1,g2 = cfg.spectra[inst]
specfile = glob.glob(cfg.file['spectra']+loc+g1+str(aor)+g2)
if len(specfile) > 1:
print("WARNING: More than one file for aor:",aor)
elif len(specfile) == 1:
file_path = loc+basename(specfile[0])
tspec['file'][(tspec['aor_key']==aor)] = file_path
# now add these as table metadata, adding a number to the instrument
# name so that the keys are unique
for i in range(len(tspec)):
if tspec['file'][i] != '':
if tspec['exclude'][i]:
tphot.meta['comments'].append(tspec['instrument'][i]+str(i)+'='+tspec['file'][i])
else:
tphot.meta['keywords'][tspec['instrument'][i]+str(i)] = tspec['file'][i]
# rearrange to ensure keyword structure is correct for IPAC ascii
# format, which is {'keywords':{'keyword': {'value': value} }
for key in tphot.meta['keywords'].keys():
tphot.meta['keywords'][key] = {'value':tphot.meta['keywords'][key]}
# find any private data by making table from phot and spec,
# bibcode and private are the common columns
tpriv = vstack([tphot,tspec],join_type='inner')
if len(tpriv[tpriv['private'] == 1]) == 0:
npriv = 0
else:
npriv = len(unique(tpriv[tpriv['private'] == 1]))
# if there wasn't any photometry or spectra, then there's nothing to do
if len(tpriv) == 0:
print("No photometry or spectra for {}, skipping".format(sdbid))
return
elif len(tpriv) == len(tspec):
print("No photometry, only spectra for {}, skipping".format(sdbid))
return
# write file(s), organising things if there is private data
sedroot = cfg.file['sedroot']+sdbid+'/'
if isdir(sedroot) == False:
mkdir(sedroot)
filename = sdbid+'-rawphot.txt'
# make list of new dirs needed, 'public' will contain only public
# photometry, 'all' will contain everything and is only needed if
# there are multiple private sets of photometry
newdirs = np.array(['public'])
if npriv > 0:
newdirs = np.append(newdirs,unique(tpriv[tpriv['private'] == 1])['bibcode'])
if len(newdirs) > 2:
newdirs = np.append(newdirs,'all')
# go through samples, updating only if different
cur = glob.glob(sedroot+'*/')
for dir in newdirs:
seddir = sedroot+dir+'/'
# make dir if needed, if exists get or invent hash on old file
oldhash = ''
if isdir(seddir) == False:
mkdir(seddir)
mtime = getmtime(seddir)
else:
if isfile(seddir+filename):
oldhash = filehash(seddir+filename)
mtime = getmtime(seddir+filename)
# create the .htaccess file if necessary
if not isfile(seddir+'/.htaccess') and dir != 'public':
fd = open(seddir+'/.htaccess','w')
fd.write('AuthName "Must login"\n')
fd.write('AuthType Basic\n')
fd.write('AuthUserFile '+cfg.www['root']+'.htpasswd\n')
fd.write('AuthGroupFile '+cfg.www['root']+'.htgroup\n')
fd.write('require group admin '+dir+'\n')
fd.close()
# only write if we can get a lock, sdf may be fitting the old file
lock = filelock.FileLock(seddir+'/.sdf_lock-'+filename)
try:
with lock.acquire(timeout = 0):
# figure which rows to keep, and write temporary file
okphot = np.logical_or(tphot['private'] == 0,
tphot['bibcode'].astype(str) == dir.astype(str) )
okspec = np.logical_or(tspec['private'] == 0,
tspec['bibcode'].astype(str) == dir.astype(str) )
tmpfile = cfg.file['sedtmp']
if dir != 'all':
sdb_write_rawphot(tmpfile,tphot[okphot],tspec[okspec])
else:
sdb_write_rawphot(tmpfile,tphot,tspec)
# see if update needed, if so move it, otherwise delete
if filehash(tmpfile) != oldhash:
if oldhash != '':
print(sdbid,dir,": Different hash, updating file")
else:
print(sdbid,dir,": Creating file")
rename(tmpfile,seddir+filename)
# hack to retain mod time of old file
# utime(seddir+filename,times=(mtime,mtime))
else:
print(sdbid,dir,": Same hash, leaving old file")
remove(tmpfile)
# remove this dir listing if it's there
if seddir in cur:
cur.remove(seddir)
# check for orphaned SED directories
if len(cur) > 0:
print("\nWARNING: orphaned directories exist:",cur,"\n")
except filelock.Timeout:
print(" WARNING: can't acquire lock for {}".format(dir+'/'+filename))
print(" skipping")
def sdb_getphot(idlist, verb=False):
"""Call sdb_getphot for a list of ids."""
if not isinstance(idlist, list):
print("sdb_getphot expects a list")
raise TypeError
for id in idlist:
sdb_getphot_one(id, verb=verb)
# run from the command line
if __name__ == "__main__":
# inputs
parser1 = argparse.ArgumentParser(description='Export photometry from SED DB')
parser = parser1.add_mutually_exclusive_group(required=True)
parser.add_argument('--idlist','-i',nargs='+',action='append',
dest='idlist',metavar='sdbid',help='Get photometry for one sdbid')
parser.add_argument('--sample','-s',type=str,metavar='table',help='Get photometry for sample')
parser.add_argument('--all','-a',action='store_true',help='Get all photometry')
parser1.add_argument('--ra','-r',type=float,metavar='X',help='Only extract for RA>Xh with -a')
parser1.add_argument('--dbname',type=str,help='Database containing sample table',
default=cfg.db['db_samples'],metavar=cfg.db['db_samples'])
parser1.add_argument('--verb','-v',action='store_true',help='Be more verbose')
args = parser1.parse_args()
if args.idlist != None:
print("Running sdb_getphot for list:",args.idlist[0])
sdb_getphot(args.idlist[0], verb=args.verb)
elif args.sample != None:
print("Running sdb_getphot for targets in sample:",args.sample)
cnx = mysql.connector.connect(user=cfg.db['user'],password=cfg.db['passwd'],
host=cfg.db['host'],database=args.dbname,
auth_plugin='mysql_native_password')
cursor = cnx.cursor(buffered=True)
cursor.execute("SELECT sdbid FROM "+args.dbname+"."+args.sample+" "
"WHERE sdbid IS NOT NULL;")
for id in cursor:
sdb_getphot_one(id[0], verb=args.verb)
cursor.close()
cnx.close()
elif args.all != None:
print("Running sdb_getphot for all targets")
cnx = mysql.connector.connect(user=cfg.db['user'],password=cfg.db['passwd'],
host=cfg.db['host'],database='sdb',
auth_plugin='mysql_native_password')
cursor = cnx.cursor(buffered=True)
if args.ra is None:
cursor.execute("SELECT sdbid FROM sdb_pm;")
else:
cursor.execute("SELECT sdbid FROM sdb_pm WHERE raj2000>{};".format(args.ra*15))
for id in cursor:
sdb_getphot_one(id[0], verb=args.verb)
cursor.close()
cnx.close()