Source code for avni.data.SW

#!/usr/bin/env python

#######################################################################################
# python 3 compatibility
from __future__ import absolute_import, division, print_function

#####################  IMPORT STANDARD MODULES   ######################################

import sys,os
import six
import numpy as np #for numerical analysis
import fortranformat as ff #reading/writing fortran formatted text
import pandas as pd
import h5py
import time
import warnings
from .. import constants

if sys.version_info[0] >= 3: unicode = str

####################       I/O ROUTINES     ######################################

[docs]def get_travel_times1D(table,distance_in_degree,period,output='pvel',overtone=0,phase=None): """ Return the arrival time in seconds of a surface-wave phase or mode type/overtone. When phase is queries, mode_type/overtone/arc are ignored Input Parameters: ---------------- table <str>: path to hdf5 format modes table distance_in_degree <float>: great circle distance, can be minor or major arc period <float>: period in second mode_type <str>: either "spheroidal", "toroidal", or "radial" overtone <int>: overtone number (defaults to 0, ie. fundamental mode) """ # Perform some checks if phase == None: #if no phase is given, assume minor arc mode_type='spheroidal' iorbit=1 else: if isinstance(phase, str): # heuristics: 'R1' - call spheroidal, G1=toroidal if phase.startswith('L') or phase.startswith('G'): mode_type='toroidal' elif phase.startswith('R'): mode_type='spheroidal' iorbit=int(phase[1]) else: raise ValueError('Only phases like R1, G1 can be called') # Perform interpolation to get velocity for requested period vel_i = get_velocity(table=table,period=period,overtone=overtone,mode_type=mode_type,output=output) # Find travel time if iorbit==1: distance_in_km = constants.deg2km.magnitude * distance_in_degree igc_count=0 else: if np.mod(iorbit,2) == 0: #even orbit hence G2,G4, etc. igc_count=iorbit/2 #number of times almost circled the earth distance_in_km = constants.deg2km.magnitude * (360.*float(igc_count)-distance_in_degree) else: igc_count=(iorbit-1)/2 distance_in_km = constants.deg2km.magnitude * (360.*float(igc_count)+distance_in_degree) time_in_s = distance_in_km * (1./vel_i) return time_in_s
[docs]def get_velocity(table,period,overtone,mode_type,output='pvel'): """ Return the arrival time in seconds of a surface-wave phase or mode type/overtone. When phase is queries, mode_type/overtone/arc are ignored Input Parameters: ---------------- table <str>: path to hdf5 format modes table distance_in_degree <float>: great circle distance, can be minor or major arc period <float>: period in second mode_type <str>: either "spheroidal", "toroidal", or "radial" overtone <int>: overtone number (defaults to 0, ie. fundamental mode) """ table = h5py.File(table,'r') omega_query = (1./period) * (2.*np.pi) #the requested frequency in rad/s if mode_type=='S': mode_type='spheroidal' elif mode_type=='T': mode_type='toroidal' elif mode_type=='R': mode_type='radial' omega = table[mode_type][str(overtone)].attrs['omega'] vel = table[mode_type][str(overtone)].attrs[output] if omega_query < np.min(omega) or omega_query > np.max(omega): raise ValueError('period {} doesnt exist in table for requested mode'.format(period)) # Perform interpolation to get velocity for requested period vel_i = np.interp(omega_query,omega,vel) return vel_i
[docs]def get_dispersion_curve(table,mode_type,output='pvel',overtone=0,freq_units='mhz'): ''' return a dispersion curve for params: table <str>: path to hdf5 format modes table mode_type <str>: either "spheroidal", "toroidal", or "radial" output <str>: either "gvel" or "pvel" for group or phase velocity overtone <int>: overtone number (defaults to 0, ie. fundamental mode) freq_units <str>: units of frequency axis. either "rad/s","hz",or "mhz" returns: freq,vel: frequency (in freq_units) and (group or phase) velocity in km/s ''' table = h5py.File(table,'r') omega = table[mode_type][str(overtone)].attrs['omega'] vel = table[mode_type][str(overtone)].attrs[output] if freq_units.lower() == 'hz': freq = omega/(2.*np.pi) elif freq_units.lower() == 'mhz': freq = (omega/(2.*np.pi)) * 1000. elif freq_units.lower() == 'rad/s': freq = omega return freq,vel
[docs]def readSWascii(file, delim = '-',required = None,warning=False): """Reads the AVNI format for analysis and plotting. Input parameters: ---------------- file : input file in default AVNI format delim : delimiter that combines fields into a joint field e.g. network-station seperate out during I/O. required : fields needed as comments (e.g. #CITE: Author et al., YEAR) in the file defaults: 'CITE', 'SHORTCITE', 'REFERENCE MODEL', 'PVEL', 'CRUST', 'MODEL3D', 'SIGMATYPE', 'WEITYPE','EQTYPE', 'STATTYPE', 'FORMAT', 'WRITE', 'FIELDS' Output: ------ SWdata : dictionary with fields data, metadata and comments """ # defaults if required == None: required = ['CITE', 'SHORTCITE', 'REFERENCE MODEL', 'PVEL', 'CRUST', 'MODEL3D', 'SIGMATYPE', 'WEITYPE','EQTYPE', 'STATTYPE', 'FORMAT', 'WRITE', 'FIELDS'] if (not os.path.isfile(file)): raise IOError("Filename ("+file+") does not exist") # checks for CITE and SHORTCITE comments comments = [] for line in open(file): if line.startswith("#"): comments.append(line.rstrip()) metadata={}; notes=[] for line in comments: if line.startswith("#") and ':' in line and not line.startswith("#NOTES:"): key = line[1:].split(':')[0] value = line[1:].split(':')[1].rstrip().lstrip() metadata[key] = value elif line.startswith("#NOTES:"): notes.append(line) metadata['COMMENTS'] = notes #check if required fields exist for key in required: try: namelist = metadata[key].split() except KeyError: if warning: print(key+" should be defined as a comment in "+file) else: raise KeyError(key+" should be defined as a comment in "+file) data = pd.read_table(file,header=None,comment='#',sep='\s+',names=metadata['FIELDS'].split()) # replace the fields by divided fields if concatenated for column in data.columns.tolist(): if delim in column: newcolumns = column.split(delim) try: data[newcolumns] = data[column].str.split(delim,expand=True) data = data.drop(column, 1) # get rid of old composite column data.replace('', np.nan, inplace=True) #replace blank with nan except: warnings.warn('could not split the column '+column+' with delimiter '+delim) if 'stat' in data.columns.values and 'net' in data.columns.values: data['path'] = data['cmtname'] + '_'+ data['stat'] + '-' + data['net'] SWdata = {}; SWdata['data'] = data; SWdata['metadata'] = metadata return SWdata
[docs]def writeSWascii(SWdata,filename,iflagthreshold=None,delim='-',writeheader=True,writedata=True,verbose=True): """ Writes the AVNI format for analysis and plotting Input parameters: ---------------- SWdata : dictionary of SW data with metadata and data fields filename : output file name delim : delimiter that combines fields into a joint field e.g. network-station seperate out during I/O. iflagthreshold : threshold for iflag which corresponds to the processing level that was cleared """ metadata = SWdata['metadata'] header_line = ff.FortranRecordWriter('('+metadata['WRITE']+')') # write out header if writeheader: printstr = [unicode('#'+key+':'+metadata[key]+'\n') for key in metadata.keys() if key not in ['FIELDS','WRITE','COMMENTS']] printstr.append(unicode('#'*len(metadata['FIELDS'])+'\n')) for comment in metadata['COMMENTS']: printstr.append(unicode(comment+'\n')) printstr.append(unicode('#'*len(metadata['FIELDS'])+'\n')) printstr.append(unicode('#WRITE:'+metadata['WRITE']+'\n')) printstr.append(unicode('#FIELDS:'+metadata['FIELDS']+'\n')) with open(filename,'w') as f: f.writelines(printstr) # append the series to output file (one column per row now) if writedata: if iflagthreshold is None: data = SWdata['data'] else: data = SWdata['data'][SWdata['data']['iflag'] >= iflagthreshold] if len(data) == 0: raise ValueError('No data row clears threshold for ', iflagthreshold) if isinstance(data, pd.Series): data = pd.DataFrame(data.to_dict(), index=[0]) namelist = metadata['FIELDS'].split() # replace the fields by divided fields if concatenated for column in namelist: if delim in column and column not in data.columns.values: oldcolumns = column.split(delim) data[column]=data[oldcolumns].fillna('').apply(lambda x: delim.join(x.astype(str).values), axis=1) data = data.drop(oldcolumns, 1) # get rid of old separate columns # reorder according to FIELDS data = data.reindex(namelist,axis=1) # all pandas version cols=metadata['FIELDS'] # initialize fortranformat writer -- apply by row line = ff.FortranRecordWriter('('+metadata['WRITE']+')') # using apply for field in ['cmtname','stat-net-chan-loc']: data[field] = data[field].str.ljust(15) Formated_Series=data.apply(lambda x : line.write(x.values),axis=1) Formated_Series.to_csv(filename,index=False,header=False,mode='a') if verbose: print("....written "+str(len(data))+" observations to "+filename) return
[docs]def SWasciitohdf5(files,hdffile = 'Summary.SW.data.h5',datatype='summary',delim='-'): """ Read a list of files to hdf5 container format. Input parameters: ---------------- files: a panda list of ascii files to append hdffile: output hdf5 file to append fields to datatype: type of data to group the data with delim : delimiter that combines fields into a joint field e.g. network-station seperate out during I/O. """ hf = h5py.File(hdffile, 'a') for _ , row in files.iterrows(): file = row[0] print("... creating group folders based on "+file+" in "+hdffile+" with the "+datatype+" group") SWdata = readSWascii(file,delim=delim) metadata = SWdata['metadata']; data = SWdata['data'] group = metadata['SHORTCITE'] uniquefolders = ['overtone', 'period'] for ii in np.arange(len(uniquefolders)): key = uniquefolders[ii] if len(np.unique(data[key])) != 1: raise ValueError('Number of unique values of '+key+' should be 1 in '+file) if ii == 0: folder = str(np.unique(data[key])[0]) else: folder = folder+'/'+str(np.unique(data[key])[0]) g1 = hf.require_group(folder) g1.attrs['name']=key if key == 'period': g1.attrs['units']='s' # Loop over every type or orbit and store values e.g. R1,R2,R3 for typeiorb in np.unique(data['typeiorb']): folderorb = folder+'/'+typeiorb g1 = hf.require_group(folderorb) g1.attrs['name']='typeiorb' g2 = g1.require_group(group) g2.attrs['FILE'] = file for field in metadata.keys(): if field == 'COMMENTS': asciiList = [n.encode("ascii", "ignore") for n in metadata['COMMENTS']] g2.attrs.create('COMMENTS', asciiList, (len(asciiList),1),'S200') else: g2.attrs[field]=metadata[field] g3 = g2.require_group('data/'+datatype) subfolder = folderorb+'/'+group+'/data/'+datatype #### store path #### g4 = g3.require_group('arrival/paths') g4.attrs['desc'] = 'ipath is the unique path in CMT_STATION-NETWORK format' #### store source #### g4 = g3.require_group('source/'+metadata['EQTYPE']) #### store station #### g4 = g3.require_group('station/'+metadata['STATTYPE']) #### store data and predictions #### g4 = g3.require_group('arrival/observed') g4.attrs['units']='s' g4.attrs['desc'] = 'observed relative arrivals w.r.t. various 1D reference models' g5 = g4.require_group(metadata['REFERENCE MODEL']) g5.attrs['pvel'] = float(metadata['PVEL'].split()[0]) g5.attrs['pvel_units'] = metadata['PVEL'].split()[1] g4 = g3.require_group('arrival/predicted') g4.attrs['units']='s' g4.attrs['desc'] = 'predicted relative and absolute arrivals based on various 1D and 3D mantle and crustal models' # store weights/uncertainties g4 = g3.require_group('arrival/weight') g4.attrs['desc'] = 'weight of the measurement to account fo uneven sampling etc.' g4 = g3.require_group('arrival/sigma') g4.attrs['desc'] = 'uncertainity of the measurement' #### store flag #### g4 = g3.require_group('arrival/others') g4.attrs['desc'] = 'other fields relevant to the arrival e.g. iflag describes the processing scheme : 0 is raw' hf.close() #################################################### # Now store relevant pandas Dataframe for _ , row in files.iterrows(): file = row[0] print("... adding "+file+" to "+hdffile+" in the "+datatype+" group") SWdata = readSWascii(file,delim=delim) metadata = SWdata['metadata']; data = SWdata['data'] group = metadata['SHORTCITE'] uniquefolders = ['overtone', 'period'] for ii in np.arange(len(uniquefolders)): key = uniquefolders[ii] if len(np.unique(data[key])) != 1: raise ValueError('Number of unique values of '+key+' should be 1 in '+file) if ii == 0: folder = str(np.unique(data[key])[0]) else: folder = folder+'/'+str(np.unique(data[key])[0]) # Loop over every type or orbit and store values e.g. R1,R2,R3 for typeiorb in np.unique(data['typeiorb']): folderorb = folder+'/'+typeiorb subfolder = folderorb+'/'+group+'/data/'+datatype #### store path #### columns = ['distkm', 'path','delellphase'] data.loc[data['typeiorb'] == typeiorb][columns].to_hdf(hdffile, key=unicode(subfolder+'/arrival/paths'),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True) #### store source #### columns = ['cmtname', 'eplat', 'eplon', 'epdep'] data.loc[data['typeiorb'] == typeiorb][columns].to_hdf(hdffile, key=unicode(subfolder+'/source/'+metadata['EQTYPE']),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True) #### store station #### columns = ['stlat', 'stlon','stat','net', 'chan', 'loc'] data.loc[data['typeiorb'] == typeiorb][columns].to_hdf(hdffile, key=unicode(subfolder+'/station/'+metadata['STATTYPE']),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True) #### store data and predictions #### columns = ['refphase', 'delobsphase'] data.loc[data['typeiorb'] == typeiorb][columns].to_hdf(hdffile, key=unicode(subfolder+'/arrival/observed/'+metadata['REFERENCE MODEL']),format='table', mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True) if metadata['MODEL3D'] != 'None': data.loc[data['typeiorb'] == typeiorb]['delpredphase'].to_hdf(hdffile, key=unicode(subfolder+'/arrival/predicted/'+metadata['REFERENCE MODEL']+'/'+metadata['MODEL3D']),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True) if metadata['CRUST'] != 'None': data.loc[data['typeiorb'] == typeiorb]['delcruphase'].to_hdf(hdffile, key=unicode(subfolder+'/arrival/predicted/'+metadata['REFERENCE MODEL']+'/'+metadata['CRUST']),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True) # store weights/uncertainties if metadata['WEITYPE'] != 'None': data.loc[data['typeiorb'] == typeiorb]['delweight'].to_hdf(hdffile, key=unicode(subfolder+'/arrival/weight/'+metadata['WEITYPE']),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True) if metadata['SIGMATYPE'] != 'None': data.loc[data['typeiorb'] == typeiorb]['delsigma'].to_hdf(hdffile, key=unicode(subfolder+'/arrival/sigma/'+metadata['SIGMATYPE']),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True) #### store flag #### try: data.loc[data['typeiorb'] == typeiorb]['iflag'].to_hdf(hdffile, key=unicode(subfolder+'/arrival/others'),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True) except KeyError: if datatype == 'raw': iflag = pd.DataFrame(0, index=np.array(data.loc[data['typeiorb'] == typeiorb].index), columns=['iflag']) iflag.to_hdf(hdffile, key=unicode(subfolder+'/arrival/others'),format='table',mode='a',complib='bzip2',complevel=9,dropna=True,data_columns=True,append=True) else: raise ValueError('iflag not defined for datatype ('+datatype+') unless specified for each measurement row in '+file)
[docs]def SWhdf5toascii(query = '0/25.0/L1/REM3D',hdffile = 'Summary.SW.data.h5',iflag=0,datatype='summary',delim='-', outfile=None,model3d = None, refmodel = None,crust=None,weitype=None, sigmatype= None,stattype =None,eqtype=None): """ write hdf field to a file. None is the default i.e. the values in metadata Input parameters: ---------------- query: key query to the hdf5 file till the group level e.g. 0/25.0/L1/GDM52 hdffile: output hdf5 file to append fields to datatype: type of data to group the data with e.g. raw, processed, summary delim : delimiter that combines fields into a joint field e.g. network-station seperate out during I/O. iflag : select the flag to sub-select data outfile : output ascii file. If None, decided based on query model3d, refmodel,crust,weitype : Fields used to query various groups and attributes sigmatype,stattype ,eqtype : If None, use the values in the hdffile metadata Output: ------ ASCII file with the values selected """ if (not os.path.isfile(hdffile)): raise IOError("Filename ("+hdffile+") does not exist") # read the dataframes from hdf5 SWdata = readSWhdf5(query,hdffile,iflag,datatype,delim,model3d,refmodel,crust, weitype, sigmatype,stattype,eqtype) # write the pandas to file fields = query.split('/') if outfile is None: filename = fields[3]+'_'+fields[0]+'_'+fields[1]+'_'+fields[2]+'.AVNI' writeSWascii(SWdata,filename,delim=delim)
[docs]def readSWhdf5(query = '0/25.0/L1/REM3D',hdffile = 'Summary.SW.data.h5',iflag=0,datatype='summary',delim='-',model3d=None, refmodel=None,crust=None,weitype=None, sigmatype= None,stattype =None,eqtype=None): """ Read a list of files to hdf5 container format. Input parameters: ---------------- query: key query to the hdf5 file till the group level e.g. 0/25.0/L1/GDM52 hdffile: output hdf5 file to append fields to datatype: type of data to group the data with e.g. raw, processed, summary delim : delimiter that combines fields into a joint field e.g. network-station seperate out during I/O. iflag : select the flag to sub-select data model3d, refmodel,crust,weitype : Fields used to query various groups and attributes sigmatype,stattype ,eqtype : If None, use the values in the hdffile metadata Output: ------ SWdata : dictionary of SW data with metadata and data fields consistent with readSWascii and writeSWascii """ SWdata = {} if (not os.path.isfile(hdffile)): raise IOError("Filename ("+hdffile+") does not exist") hf = h5py.File(hdffile, 'r') SWdata['metadata'] = {} for name,value in hf[query].attrs.items(): if name == 'COMMENTS': SWdata['metadata'][name] = [txt[0].decode('utf-8') for txt in hf[query].attrs['COMMENTS'][:].tolist()] else: SWdata['metadata'][name]=value try: #### store flag #### df0 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/arrival/others',where=['iflag=='+str(iflag)]) #### get path #### df1 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/arrival/paths', where=df0.index) #### get source #### if eqtype is None: eqtype = SWdata['metadata']['EQTYPE'] df2 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/source/' +eqtype, where=df0.index) #### store station #### if stattype is None: stattype = SWdata['metadata']['STATTYPE'] df3 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/station/' +stattype, where=df0.index) #### store data and predictions #### if refmodel is None: refmodel = SWdata['metadata']['REFERENCE MODEL'] df4 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/arrival/observed/' +refmodel, where=df0.index) if model3d is None: model3d = SWdata['metadata']['MODEL3D'] if model3d == 'None': df5 = pd.DataFrame(0, index=np.array(df3.index), columns=['delpredphase']) else: df5 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/arrival/predicted/' +refmodel+'/'+model3d, where=df0.index) if crust is None: crust = SWdata['metadata']['CRUST'] if crust == 'None': df6 = pd.DataFrame(0, index=np.array(df3.index), columns=['delcruphase']) else: df6 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/arrival/predicted/' +refmodel+'/'+crust, where=df0.index) # store weights/uncertainties if weitype is None: weitype = SWdata['metadata']['WEITYPE'] if weitype == 'None': df7 = pd.DataFrame(0, index=np.array(df3.index), columns=['delweight']) else: df7 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/arrival/weight/' +weitype, where=df0.index) if sigmatype is None: sigmatype = SWdata['metadata']['SIGMATYPE'] if sigmatype == 'None': df8 = pd.DataFrame(0, index=np.array(df3.index), columns=['delsigma']) else: df8 = pd.read_hdf(hdffile, query+'/data/'+datatype+'/arrival/sigma/' +sigmatype, where=df0.index) SWdata['data'] = pd.concat([df0,df1,df2,df3,df4,df5,df6,df7,df8],axis=1) except: raise ValueError('fields missing in location '+query+'/data/'+datatype) # write the pandas to file fields = query.split('/') SWdata['data']['overtone'] = int(fields[0]) SWdata['data']['period'] = float(fields[1]) SWdata['data']['typeiorb'] = fields[2] # update the metadata attributes based on input query SWdata['metadata']['REFERENCE MODEL'] = refmodel findindx = query+'/data/'+datatype+'/arrival/observed/' +refmodel SWdata['metadata']['PVEL'] = str(hf[findindx].attrs.__getitem__('pvel'))+' '+hf[findindx].attrs.__getitem__('pvel_units') SWdata['metadata']['REFERENCE MODEL'] = refmodel SWdata['metadata']['CRUST'] = crust SWdata['metadata']['MODEL3D'] = model3d SWdata['metadata']['SIGMATYPE'] = sigmatype SWdata['metadata']['WEITYPE'] = weitype SWdata['metadata']['EQTYPE'] = eqtype SWdata['metadata']['STATTYPE'] = stattype return SWdata