#!/usr/bin/env python
#######################################################################################
# python 3 compatibility
from __future__ import absolute_import, division, print_function
##################### IMPORT STANDARD MODULES ######################################
import sys,os
import six
import numpy as np #for numerical analysis
import fortranformat as ff #reading/writing fortran formatted text
import pandas as pd
import h5py
import time
import warnings
from .. import constants
if sys.version_info[0] >= 3: unicode = str
#################### I/O ROUTINES ######################################
[docs]def get_travel_times1D(table,distance_in_degree,period,output='pvel',overtone=0,phase=None):
"""
Return the arrival time in seconds of a surface-wave phase or mode type/overtone.
When phase is queries, mode_type/overtone/arc are ignored
Input Parameters:
----------------
table <str>: path to hdf5 format modes table
distance_in_degree <float>: great circle distance, can be minor or major arc
period <float>: period in second
mode_type <str>: either "spheroidal", "toroidal", or "radial"
overtone <int>: overtone number (defaults to 0, ie. fundamental mode)
"""
# Perform some checks
if phase == None: #if no phase is given, assume minor arc
mode_type='spheroidal'
iorbit=1
else:
if isinstance(phase, str):
# heuristics: 'R1' - call spheroidal, G1=toroidal
if phase.startswith('L') or phase.startswith('G'):
mode_type='toroidal'
elif phase.startswith('R'):
mode_type='spheroidal'
iorbit=int(phase[1])
else:
raise ValueError('Only phases like R1, G1 can be called')
# Perform interpolation to get velocity for requested period
vel_i = get_velocity(table=table,period=period,overtone=overtone,mode_type=mode_type,output=output)
# Find travel time
if iorbit==1:
distance_in_km = constants.deg2km.magnitude * distance_in_degree
igc_count=0
else:
if np.mod(iorbit,2) == 0: #even orbit hence G2,G4, etc.
igc_count=iorbit/2 #number of times almost circled the earth
distance_in_km = constants.deg2km.magnitude * (360.*float(igc_count)-distance_in_degree)
else:
igc_count=(iorbit-1)/2
distance_in_km = constants.deg2km.magnitude * (360.*float(igc_count)+distance_in_degree)
time_in_s = distance_in_km * (1./vel_i)
return time_in_s
[docs]def get_velocity(table,period,overtone,mode_type,output='pvel'):
"""
Return the arrival time in seconds of a surface-wave phase or mode type/overtone.
When phase is queries, mode_type/overtone/arc are ignored
Input Parameters:
----------------
table <str>: path to hdf5 format modes table
distance_in_degree <float>: great circle distance, can be minor or major arc
period <float>: period in second
mode_type <str>: either "spheroidal", "toroidal", or "radial"
overtone <int>: overtone number (defaults to 0, ie. fundamental mode)
"""
table = h5py.File(table,'r')
omega_query = (1./period) * (2.*np.pi) #the requested frequency in rad/s
if mode_type=='S':
mode_type='spheroidal'
elif mode_type=='T':
mode_type='toroidal'
elif mode_type=='R':
mode_type='radial'
omega = table[mode_type][str(overtone)].attrs['omega']
vel = table[mode_type][str(overtone)].attrs[output]
if omega_query < np.min(omega) or omega_query > np.max(omega):
raise ValueError('period {} doesnt exist in table for requested mode'.format(period))
# Perform interpolation to get velocity for requested period
vel_i = np.interp(omega_query,omega,vel)
return vel_i
[docs]def get_dispersion_curve(table,mode_type,output='pvel',overtone=0,freq_units='mhz'):
'''
return a dispersion curve for
params:
table <str>: path to hdf5 format modes table
mode_type <str>: either "spheroidal", "toroidal", or "radial"
output <str>: either "gvel" or "pvel" for group or phase velocity
overtone <int>: overtone number (defaults to 0, ie. fundamental mode)
freq_units <str>: units of frequency axis. either "rad/s","hz",or "mhz"
returns:
freq,vel: frequency (in freq_units) and (group or phase) velocity in km/s
'''
table = h5py.File(table,'r')
omega = table[mode_type][str(overtone)].attrs['omega']
vel = table[mode_type][str(overtone)].attrs[output]
if freq_units.lower() == 'hz':
freq = omega/(2.*np.pi)
elif freq_units.lower() == 'mhz':
freq = (omega/(2.*np.pi)) * 1000.
elif freq_units.lower() == 'rad/s':
freq = omega
return freq,vel
[docs]def readSWascii(file, delim = '-',required = None,warning=False):
"""Reads the AVNI format for analysis and plotting.
Input parameters:
----------------
file : input file in default AVNI format
delim : delimiter that combines fields into a joint field e.g. network-station
seperate out during I/O.
required : fields needed as comments (e.g. #CITE: Author et al., YEAR) in the file
defaults: 'CITE', 'SHORTCITE', 'REFERENCE MODEL', 'PVEL', 'CRUST',
'MODEL3D', 'SIGMATYPE', 'WEITYPE','EQTYPE', 'STATTYPE',
'FORMAT', 'WRITE', 'FIELDS'
Output:
------
SWdata : dictionary with fields data, metadata and comments
"""
# defaults
if required == None: required = ['CITE', 'SHORTCITE', 'REFERENCE MODEL', 'PVEL', 'CRUST', 'MODEL3D', 'SIGMATYPE', 'WEITYPE','EQTYPE', 'STATTYPE', 'FORMAT', 'WRITE', 'FIELDS']
if (not os.path.isfile(file)): raise IOError("Filename ("+file+") does not exist")
# checks for CITE and SHORTCITE comments
comments = []
for line in open(file):
if line.startswith("#"): comments.append(line.rstrip())
metadata={}; notes=[]
for line in comments:
if line.startswith("#") and ':' in line and not line.startswith("#NOTES:"):
key = line[1:].split(':')[0]
value = line[1:].split(':')[1].rstrip().lstrip()
metadata[key] = value
elif line.startswith("#NOTES:"):
notes.append(line)
metadata['COMMENTS'] = notes
#check if required fields exist
for key in required:
try:
namelist = metadata[key].split()
except KeyError:
if warning:
print(key+" should be defined as a comment in "+file)
else:
raise KeyError(key+" should be defined as a comment in "+file)
data = pd.read_table(file,header=None,comment='#',sep='\s+',names=metadata['FIELDS'].split())
# replace the fields by divided fields if concatenated
for column in data.columns.tolist():
if delim in column:
newcolumns = column.split(delim)
try:
data[newcolumns] = data[column].str.split(delim,expand=True)
data = data.drop(column, 1) # get rid of old composite column
data.replace('', np.nan, inplace=True) #replace blank with nan
except:
warnings.warn('could not split the column '+column+' with delimiter '+delim)
if 'stat' in data.columns.values and 'net' in data.columns.values:
data['path'] = data['cmtname'] + '_'+ data['stat'] + '-' + data['net']
SWdata = {}; SWdata['data'] = data; SWdata['metadata'] = metadata
return SWdata
[docs]def writeSWascii(SWdata,filename,iflagthreshold=None,delim='-',writeheader=True,writedata=True,verbose=True):
"""
Writes the AVNI format for analysis and plotting
Input parameters:
----------------
SWdata : dictionary of SW data with metadata and data fields
filename : output file name
delim : delimiter that combines fields into a joint field e.g. network-station
seperate out during I/O.
iflagthreshold : threshold for iflag which corresponds to the processing level
that was cleared
"""
metadata = SWdata['metadata']
header_line = ff.FortranRecordWriter('('+metadata['WRITE']+')')
# write out header
if writeheader:
printstr = [unicode('#'+key+':'+metadata[key]+'\n') for key in metadata.keys() if key not in ['FIELDS','WRITE','COMMENTS']]
printstr.append(unicode('#'*len(metadata['FIELDS'])+'\n'))
for comment in metadata['COMMENTS']: printstr.append(unicode(comment+'\n'))
printstr.append(unicode('#'*len(metadata['FIELDS'])+'\n'))
printstr.append(unicode('#WRITE:'+metadata['WRITE']+'\n'))
printstr.append(unicode('#FIELDS:'+metadata['FIELDS']+'\n'))
with open(filename,'w') as f: f.writelines(printstr)
# append the series to output file (one column per row now)
if writedata:
if iflagthreshold is None:
data = SWdata['data']
else:
data = SWdata['data'][SWdata['data']['iflag'] >= iflagthreshold]
if len(data) == 0: raise ValueError('No data row clears threshold for ', iflagthreshold)
if isinstance(data, pd.Series): data = pd.DataFrame(data.to_dict(), index=[0])
namelist = metadata['FIELDS'].split()
# replace the fields by divided fields if concatenated
for column in namelist:
if delim in column and column not in data.columns.values:
oldcolumns = column.split(delim)
data[column]=data[oldcolumns].fillna('').apply(lambda x: delim.join(x.astype(str).values), axis=1)
data = data.drop(oldcolumns, 1) # get rid of old separate columns
# reorder according to FIELDS
data = data.reindex(namelist,axis=1)
# all pandas version
cols=metadata['FIELDS']
# initialize fortranformat writer -- apply by row
line = ff.FortranRecordWriter('('+metadata['WRITE']+')')
# using apply
for field in ['cmtname','stat-net-chan-loc']:
data[field] = data[field].str.ljust(15)
Formated_Series=data.apply(lambda x : line.write(x.values),axis=1)
Formated_Series.to_csv(filename,index=False,header=False,mode='a')
if verbose: print("....written "+str(len(data))+" observations to "+filename)
return
[docs]def SWasciitohdf5(files,hdffile = 'Summary.SW.data.h5',datatype='summary',delim='-'):
"""
Read a list of files to hdf5 container format.
Input parameters:
----------------
files: a panda list of ascii files to append
hdffile: output hdf5 file to append fields to
datatype: type of data to group the data with
delim : delimiter that combines fields into a joint field e.g. network-station
seperate out during I/O.
"""
hf = h5py.File(hdffile, 'a')
for _ , row in files.iterrows():
file = row[0]
print("... creating group folders based on "+file+" in "+hdffile+" with the "+datatype+" group")
SWdata = readSWascii(file,delim=delim)
metadata = SWdata['metadata']; data = SWdata['data']
group = metadata['SHORTCITE']
uniquefolders = ['overtone', 'period']
for ii in np.arange(len(uniquefolders)):
key = uniquefolders[ii]
if len(np.unique(data[key])) != 1: raise ValueError('Number of unique values of '+key+' should be 1 in '+file)
if ii == 0:
folder = str(np.unique(data[key])[0])
else:
folder = folder+'/'+str(np.unique(data[key])[0])
g1 = hf.require_group(folder)
g1.attrs['name']=key
if key == 'period': g1.attrs['units']='s'
# Loop over every type or orbit and store values e.g. R1,R2,R3
for typeiorb in np.unique(data['typeiorb']):
folderorb = folder+'/'+typeiorb
g1 = hf.require_group(folderorb)
g1.attrs['name']='typeiorb'
g2 = g1.require_group(group)
g2.attrs['FILE'] = file
for field in metadata.keys():
if field == 'COMMENTS':
asciiList = [n.encode("ascii", "ignore") for n in metadata['COMMENTS']]
g2.attrs.create('COMMENTS', asciiList, (len(asciiList),1),'S200')
else:
g2.attrs[field]=metadata[field]
g3 = g2.require_group('data/'+datatype)
subfolder = folderorb+'/'+group+'/data/'+datatype
#### store path ####
g4 = g3.require_group('arrival/paths')
g4.attrs['desc'] = 'ipath is the unique path in CMT_STATION-NETWORK format'
#### store source ####
g4 = g3.require_group('source/'+metadata['EQTYPE'])
#### store station ####
g4 = g3.require_group('station/'+metadata['STATTYPE'])
#### store data and predictions ####
g4 = g3.require_group('arrival/observed')
g4.attrs['units']='s'
g4.attrs['desc'] = 'observed relative arrivals w.r.t. various 1D reference models'
g5 = g4.require_group(metadata['REFERENCE MODEL'])
g5.attrs['pvel'] = float(metadata['PVEL'].split()[0])
g5.attrs['pvel_units'] = metadata['PVEL'].split()[1]
g4 = g3.require_group('arrival/predicted')
g4.attrs['units']='s'
g4.attrs['desc'] = 'predicted relative and absolute arrivals based on various 1D and 3D mantle and crustal models'
# store weights/uncertainties
g4 = g3.require_group('arrival/weight')
g4.attrs['desc'] = 'weight of the measurement to account fo uneven sampling etc.'
g4 = g3.require_group('arrival/sigma')
g4.attrs['desc'] = 'uncertainity of the measurement'
#### store flag ####
g4 = g3.require_group('arrival/others')
g4.attrs['desc'] = 'other fields relevant to the arrival e.g. iflag describes the processing scheme : 0 is raw'
hf.close()
####################################################
# Now store relevant pandas Dataframe
for _ , row in files.iterrows():
file = row[0]
print("... adding "+file+" to "+hdffile+" in the "+datatype+" group")
SWdata = readSWascii(file,delim=delim)
metadata = SWdata['metadata']; data = SWdata['data']
group = metadata['SHORTCITE']
uniquefolders = ['overtone', 'period']
for ii in np.arange(len(uniquefolders)):
key = uniquefolders[ii]
if len(np.unique(data[key])) != 1: raise ValueError('Number of unique values of '+key+' should be 1 in '+file)
if ii == 0:
folder = str(np.unique(data[key])[0])
else:
folder = folder+'/'+str(np.unique(data[key])[0])
# Loop over every type or orbit and store values e.g. R1,R2,R3
for typeiorb in np.unique(data['typeiorb']):
folderorb = folder+'/'+typeiorb
subfolder = folderorb+'/'+group+'/data/'+datatype
#### store path ####
columns = ['distkm', 'path','delellphase']
data.loc[data['typeiorb'] == typeiorb][columns].to_hdf(hdffile, key=unicode(subfolder+'/arrival/paths'),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True)
#### store source ####
columns = ['cmtname', 'eplat', 'eplon', 'epdep']
data.loc[data['typeiorb'] == typeiorb][columns].to_hdf(hdffile, key=unicode(subfolder+'/source/'+metadata['EQTYPE']),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True)
#### store station ####
columns = ['stlat', 'stlon','stat','net', 'chan', 'loc']
data.loc[data['typeiorb'] == typeiorb][columns].to_hdf(hdffile, key=unicode(subfolder+'/station/'+metadata['STATTYPE']),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True)
#### store data and predictions ####
columns = ['refphase', 'delobsphase']
data.loc[data['typeiorb'] == typeiorb][columns].to_hdf(hdffile, key=unicode(subfolder+'/arrival/observed/'+metadata['REFERENCE MODEL']),format='table', mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True)
if metadata['MODEL3D'] != 'None':
data.loc[data['typeiorb'] == typeiorb]['delpredphase'].to_hdf(hdffile, key=unicode(subfolder+'/arrival/predicted/'+metadata['REFERENCE MODEL']+'/'+metadata['MODEL3D']),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True)
if metadata['CRUST'] != 'None':
data.loc[data['typeiorb'] == typeiorb]['delcruphase'].to_hdf(hdffile, key=unicode(subfolder+'/arrival/predicted/'+metadata['REFERENCE MODEL']+'/'+metadata['CRUST']),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True)
# store weights/uncertainties
if metadata['WEITYPE'] != 'None':
data.loc[data['typeiorb'] == typeiorb]['delweight'].to_hdf(hdffile, key=unicode(subfolder+'/arrival/weight/'+metadata['WEITYPE']),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True)
if metadata['SIGMATYPE'] != 'None':
data.loc[data['typeiorb'] == typeiorb]['delsigma'].to_hdf(hdffile, key=unicode(subfolder+'/arrival/sigma/'+metadata['SIGMATYPE']),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True)
#### store flag ####
try:
data.loc[data['typeiorb'] == typeiorb]['iflag'].to_hdf(hdffile, key=unicode(subfolder+'/arrival/others'),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True)
except KeyError:
if datatype == 'raw':
iflag = pd.DataFrame(0, index=np.array(data.loc[data['typeiorb'] == typeiorb].index), columns=['iflag'])
iflag.to_hdf(hdffile, key=unicode(subfolder+'/arrival/others'),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True)
else:
raise ValueError('iflag not defined for datatype ('+datatype+') unless specified for each measurement row in '+file)
[docs]def SWhdf5toascii(query = '0/25.0/L1/REM3D',hdffile = 'Summary.SW.data.h5',iflag=0,datatype='summary',delim='-', outfile=None,model3d = None, refmodel = None,crust=None,weitype=None, sigmatype= None,stattype =None,eqtype=None):
"""
write hdf field to a file. None is the default i.e. the values in metadata
Input parameters:
----------------
query: key query to the hdf5 file till the group level e.g. 0/25.0/L1/GDM52
hdffile: output hdf5 file to append fields to
datatype: type of data to group the data with e.g. raw, processed, summary
delim : delimiter that combines fields into a joint field e.g. network-station
seperate out during I/O.
iflag : select the flag to sub-select data
outfile : output ascii file. If None, decided based on query
model3d, refmodel,crust,weitype : Fields used to query various groups and attributes
sigmatype,stattype ,eqtype : If None, use the values in the hdffile metadata
Output:
------
ASCII file with the values selected
"""
if (not os.path.isfile(hdffile)): raise IOError("Filename ("+hdffile+") does not exist")
# read the dataframes from hdf5
SWdata = readSWhdf5(query,hdffile,iflag,datatype,delim,model3d,refmodel,crust, weitype, sigmatype,stattype,eqtype)
# write the pandas to file
fields = query.split('/')
if outfile is None: filename = fields[3]+'_'+fields[0]+'_'+fields[1]+'_'+fields[2]+'.AVNI'
writeSWascii(SWdata,filename,delim=delim)
[docs]def readSWhdf5(query = '0/25.0/L1/REM3D',hdffile = 'Summary.SW.data.h5',iflag=0,datatype='summary',delim='-',model3d=None, refmodel=None,crust=None,weitype=None, sigmatype= None,stattype =None,eqtype=None):
"""
Read a list of files to hdf5 container format.
Input parameters:
----------------
query: key query to the hdf5 file till the group level e.g. 0/25.0/L1/GDM52
hdffile: output hdf5 file to append fields to
datatype: type of data to group the data with e.g. raw, processed, summary
delim : delimiter that combines fields into a joint field e.g. network-station
seperate out during I/O.
iflag : select the flag to sub-select data
model3d, refmodel,crust,weitype : Fields used to query various groups and attributes
sigmatype,stattype ,eqtype : If None, use the values in the hdffile metadata
Output:
------
SWdata : dictionary of SW data with metadata and data fields consistent with
readSWascii and writeSWascii
"""
SWdata = {}
if (not os.path.isfile(hdffile)): raise IOError("Filename ("+hdffile+") does not exist")
hf = h5py.File(hdffile, 'r')
SWdata['metadata'] = {}
for name,value in hf[query].attrs.items():
if name == 'COMMENTS':
SWdata['metadata'][name] = [txt[0].decode('utf-8') for txt in hf[query].attrs['COMMENTS'][:].tolist()]
else:
SWdata['metadata'][name]=value
try:
#### store flag ####
df0 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/arrival/others',where=['iflag=='+str(iflag)])
#### get path ####
df1 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/arrival/paths', where=df0.index)
#### get source ####
if eqtype is None: eqtype = SWdata['metadata']['EQTYPE']
df2 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/source/' +eqtype, where=df0.index)
#### store station ####
if stattype is None: stattype = SWdata['metadata']['STATTYPE']
df3 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/station/' +stattype, where=df0.index)
#### store data and predictions ####
if refmodel is None: refmodel = SWdata['metadata']['REFERENCE MODEL']
df4 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/arrival/observed/' +refmodel, where=df0.index)
if model3d is None: model3d = SWdata['metadata']['MODEL3D']
if model3d == 'None':
df5 = pd.DataFrame(0, index=np.array(df3.index), columns=['delpredphase'])
else:
df5 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/arrival/predicted/' +refmodel+'/'+model3d, where=df0.index)
if crust is None: crust = SWdata['metadata']['CRUST']
if crust == 'None':
df6 = pd.DataFrame(0, index=np.array(df3.index), columns=['delcruphase'])
else:
df6 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/arrival/predicted/' +refmodel+'/'+crust, where=df0.index)
# store weights/uncertainties
if weitype is None: weitype = SWdata['metadata']['WEITYPE']
if weitype == 'None':
df7 = pd.DataFrame(0, index=np.array(df3.index), columns=['delweight'])
else:
df7 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/arrival/weight/' +weitype, where=df0.index)
if sigmatype is None: sigmatype = SWdata['metadata']['SIGMATYPE']
if sigmatype == 'None':
df8 = pd.DataFrame(0, index=np.array(df3.index), columns=['delsigma'])
else:
df8 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/arrival/sigma/' +sigmatype, where=df0.index)
SWdata['data'] = pd.concat([df0,df1,df2,df3,df4,df5,df6,df7,df8],axis=1)
except:
raise ValueError('fields missing in location '+query+'/data/'+datatype)
# write the pandas to file
fields = query.split('/')
SWdata['data']['overtone'] = int(fields[0])
SWdata['data']['period'] = float(fields[1])
SWdata['data']['typeiorb'] = fields[2]
# update the metadata attributes based on input query
SWdata['metadata']['REFERENCE MODEL'] = refmodel
findindx = query+'/data/'+datatype+'/arrival/observed/' +refmodel
SWdata['metadata']['PVEL'] = str(hf[findindx].attrs.__getitem__('pvel'))+' '+hf[findindx].attrs.__getitem__('pvel_units')
SWdata['metadata']['REFERENCE MODEL'] = refmodel
SWdata['metadata']['CRUST'] = crust
SWdata['metadata']['MODEL3D'] = model3d
SWdata['metadata']['SIGMATYPE'] = sigmatype
SWdata['metadata']['WEITYPE'] = weitype
SWdata['metadata']['EQTYPE'] = eqtype
SWdata['metadata']['STATTYPE'] = stattype
return SWdata