Source code for avni.data.TT

#!/usr/bin/env python

#######################################################################################
# python 3 compatibility
from __future__ import absolute_import, division, print_function

#####################  IMPORT STANDARD MODULES   ######################################

import os
import sys
import h5py
import glob
import numpy as np
import six
import pandas as pd
from itertools import islice
import fortranformat as ff #reading/writing fortran formatted text

if sys.version_info[0] >= 3: unicode = str

####################       I/O ROUTINES     ######################################
[docs]def readTTascii(file, delim = '-',required = None,warning=False): """Reads the AVNI format for analysis and plotting. Input parameters: ---------------- file : input file in default AVNI format delim : delimiter that combines fields into a joint field e.g. network-station seperate out during I/O. required : fields needed as comments (e.g. #CITE: Author et al., YEAR) in the file defaults: 'CITE', 'SHORTCITE', 'REFERENCE MODEL', 'PVEL', 'CRUST', 'MODEL3D', 'SIGMATYPE', 'WEITYPE','EQTYPE', 'STATTYPE', 'FORMAT', 'WRITE', 'FIELDS' Output: ------ TTdata : dictionary with fields data, metadata and comments """ # defaults if required == None: required = ['CITE', 'SHORTCITE', 'REFERENCE MODEL', 'CRUST', 'MODEL3D', 'SIGMATYPE', 'WEITYPE','EQTYPE', 'STATTYPE', 'FORMAT', 'WRITE', 'FIELDS'] if (not os.path.isfile(file)): raise IOError("Filename ("+file+") does not exist") # checks for CITE and SHORTCITE comments comments = [] for line in open(file): if line.startswith("#"): comments.append(line.rstrip()) metadata={}; notes=[] for line in comments: if line.startswith("#") and ':' in line and not line.startswith("#NOTES:"): key = line[1:].split(':')[0] value = line[1:].split(':')[1].rstrip().lstrip() metadata[key] = value elif line.startswith("#NOTES:"): notes.append(line) metadata['COMMENTS'] = notes #check if required fields exist for key in required: try: namelist = metadata[key].split() except KeyError: if warning: print(key+" should be defined as a comment in "+file) else: raise KeyError(key+" should be defined as a comment in "+file) data = pd.read_table(file,header=None,comment='#',sep='\s+',names=metadata['FIELDS'].split()) # replace the fields by divided fields if concatenated for column in data.columns.tolist(): if delim in column: newcolumns = column.split(delim) data[newcolumns] = data[column].str.split(delim,expand=True).iloc[:,:len(newcolumns)] data = data.drop(column, 1) # get rid of old composite column data.replace('', np.nan, inplace=True) #replace blank with nan data['path'] = data['cmtname'] + '_'+ data['stat'] + '-' + data['net'] TTdata = {}; TTdata['data'] = data; TTdata['metadata'] = metadata return TTdata
[docs]def writeTTascii(TTdata,filename,iflagthreshold=None,delim='-'): """ Writes the AVNI format for analysis and plotting Input parameters: ---------------- TTdata : dictionary of SW data with metadata and data fields filename : output file name delim : delimiter that combines fields into a joint field e.g. network-station seperate out during I/O. iflagthreshold : threshold for iflag which corresponds to the processing level that was cleared """ if iflagthreshold is None: data = TTdata['data'] else: data = TTdata['data'][TTdata['data']['iflag'] >= iflagthreshold] metadata = TTdata['metadata']; header_line = ff.FortranRecordWriter('('+metadata['WRITE']+')') printstr = [unicode('#'+key+':'+metadata[key]+'\n') for key in metadata.keys() if key not in ['FIELDS','WRITE','COMMENTS']] printstr.append(unicode('#'*len(metadata['FIELDS'])+'\n')) for comment in metadata['COMMENTS']: printstr.append(unicode(comment+'\n')) printstr.append(unicode('#'*len(metadata['FIELDS'])+'\n')) for key in metadata.keys(): if key in ['FIELDS','WRITE']: printstr.append(unicode('#'+key+':'+metadata[key]+'\n')) namelist = metadata['FIELDS'].split() # replace the fields by divided fields if concatenated for column in namelist: if delim in column: oldcolumns = column.split(delim) data[column]=data[oldcolumns].fillna('').apply(lambda x: delim.join(x.astype(str).values), axis=1) data = data.drop(oldcolumns, 1) # get rid of old separate columns # reorder according to FIELDS data = data.reindex(namelist,axis=1) f = open(filename,'w') f.writelines(printstr) for ii in range(len(data)): line=[] for val in data.values[ii]: if isinstance(val,six.string_types): line.append(val.ljust(15)) else: line.append(val) try: arow = header_line.write(line) except: raise IOError('No line to print') f.write(unicode(arow+'\n')) f.close() print("....written "+str(len(data))+" observations to "+filename) return
[docs]def writetablehdf5(cagc_rays_outdir,tt_table_name,source_depth,component,verbose=True): ''' writes an hdf5 file based on the output of CAGCRAYS params: cagc_rays <str>: path to output directory of CAGCRAYS tt_table_name <str>: name of hdf5 file to create or add to source_depth <float>: source depth in km component <str>: 'PSV', or 'SH' verbose <bool>: print extra output, helpful for debugging ''' bad_paths = open('bad_paths.txt','w') source_depth = str(source_depth) #open hdf5 file if os.path.exists(tt_table_name): ds = h5py.File(tt_table_name,'r+') else: ds = h5py.File(tt_table_name,'w') #create h5py groups for travel times and paths ds.create_group('travel_times') ds['travel_times'].create_group('1D') ds.create_group('paths') try: ds['travel_times']['1D'].create_group(component) except: print('group already exists... appending new data to it') #get list of directories containing travel time and path into phase_list = glob.glob(cagc_rays_outdir+'/*') print(phase_list) for phase_ in phase_list: phase = phase_.split('/')[-1] print(phase) #get travel times try: tts = pd.read_csv(phase_+'/ttandtstar.txt', delim_whitespace=True, error_bad_lines=False, skiprows=1, names=['delta','p','T','dDdp','tstar','branch']) tts = tts.iloc[::-1] tts = tts.dropna() #get rid of NaN values except(OSError): raise ValueError('The directory ',phase,'does not contain ttandtstar.txt') #create a new group for each phase try: ds['travel_times']['1D'][component].create_group(phase) except: print('group already exists... appending new data to it') try: ds['travel_times']['1D'][component][phase].create_group(source_depth) except: print('group already exists... appending new data to it') #create datasets ds['travel_times']['1D'][component][phase][source_depth].create_dataset('distance_in_degree',data=tts['delta'].astype(float)) ds['travel_times']['1D'][component][phase][source_depth].create_dataset('ray_param_sec_degree',data=tts['p'].astype(float)) ds['travel_times']['1D'][component][phase][source_depth].create_dataset('time',data=tts['T'].astype(float)) ds['travel_times']['1D'][component][phase][source_depth].create_dataset('dDdp',data=tts['dDdp'].astype(float)) ds['travel_times']['1D'][component][phase][source_depth].create_dataset('tstar',data=tts['tstar'].astype(float)) ds['travel_times']['1D'][component][phase][source_depth].create_dataset('branch',data=tts['branch'].values.astype('|S2')) #get raypaths path_list = glob.glob(cagc_rays_outdir+'/'+phase+'/path_SYNTH*') path_list.sort() try: ds['paths'].create_group(phase) except: print('group already exists... appending new data to it') for path_ in path_list: #read header with open(path_) as file_: header = list(islice(file_, 8)) path_info = header[6] gcarc = path_info.strip().split()[1] #read data print("#########################################################") print(path_) path = np.loadtxt(path_) #get branch info with open(path_) as file_: branches = [] for line_ in file_: if line_.startswith("#BRANCH"): branches.append(line_.strip().split()[1]) #skip if there are two of the same branch at a single distance if len(branches) != len(set(branches)): bad_paths.write('source depth {}, phase {}, distance {}\n'.format(source_depth,phase,gcarc)) continue if verbose: print('THE BRANCHES ARE', branches, 'for ', phase, 'at', gcarc) nbranches = len(branches) gc_diff = np.diff(path[:,0]) branch_breaks = np.where(gc_diff < 0) #create paths if they don't exist try: grp = ds['paths'][phase][source_depth] except KeyError: ds['paths'][phase].create_group(source_depth) try: grp = ds['paths'][phase][source_depth][gcarc] except KeyError: ds['paths'][phase][source_depth].create_group(gcarc) #add paths for each branch if nbranches > 1: for i in range(0,nbranches): branch = branches[i] if i == 0: bb_0 = 0 bb_1 = branch_breaks[0][i-1] + 1 else: bb_0 = branch_breaks[0][i-1] + 1 try: bb_1 = branch_breaks[0][i] + 1 except IndexError: bb_1 = None if verbose: print(phase,branch,gcarc,bb_0, bb_1) ds['paths'][phase][source_depth][gcarc].create_group(branch) ds['paths'][phase][source_depth][gcarc][branch].create_dataset('radius', data=path[:,3][bb_0:bb_1]) ds['paths'][phase][source_depth][gcarc][branch].create_dataset('distance', data=path[:,4][bb_0:bb_1]) ds['paths'][phase][source_depth][gcarc][branch].create_dataset('time', data=path[:,7][bb_0:bb_1]) elif nbranches == 1: ds['paths'][phase][source_depth][gcarc].create_group(branches[0]) ds['paths'][phase][source_depth][gcarc][branches[0]].create_dataset('radius', data=path[:,3]) ds['paths'][phase][source_depth][gcarc][branches[0]].create_dataset('distance', data=path[:,4]) ds['paths'][phase][source_depth][gcarc][branches[0]].create_dataset('time', data=path[:,7]) else: raise ValueError('something went wrong finding branches')
[docs]def get_travel_times1D(table,distance_in_degree,source_depth_in_km,phase, component='PSV',branch=None): ''' Get 1D travel travel times from a lookup table params: table <str>: path to hdf5 travel time table distance_in_degree <float>: source/receiver distance in degrees source_depth_in_km <float>: source depth in km phase <str>: seismic phase component <str>: 'PSV' or 'SH'. defaults to 'PSV'(for now) branch <str>: branch (doesn't do anything yet) returns time_in_s <float>: travel time in seconds. ''' if branch == None and phase != 'PKP': branch = '1' #defaults to branch 1 if not given elif branch == None and phase == 'PKP': branch = 'ab' #defaults to ab branch if not given branch = branch.encode('utf-8') ttt = h5py.File(table) #perform checks--------------------------------------------------------- if phase not in ttt['travel_times']['1D'][component].keys(): raise ValueError('phase {} not present in the {} table'.format(phase,table)) #TODO add attributes to the table such as min/max depth and distance evdp_max = 670.0 #this is just a stand-in value if source_depth_in_km > evdp_max: raise ValueError('source depth {}-km exceeds limit of {}-km'.format(source_depth_in_km,evdp_max)) #get closest depths dz_table = 1 z1 = np.int(source_depth_in_km) z2 = z1 + dz_table #get datasets at each source depth dset1 = ttt['travel_times']['1D'][component][phase][str(z1)] dset2 = ttt['travel_times']['1D'][component][phase][str(z2)] b_inds1 = np.where(dset1['branch'].value==branch)[0] b_inds2 = np.where(dset2['branch'].value==branch)[0] b_inds1 = list(b_inds1) b_inds2 = list(b_inds2) if len(b_inds1) == 0 or len(b_inds2) == 0: raise ValueError('distance {} for phase {} and branch {} not found'.format(distance_in_degree,phase,branch)) #get interpolated values for each depth ttz1 = np.interp(distance_in_degree, #dset1['distance_in_degree'][:], dset1['distance_in_degree'][b_inds1], #dset1['time'][:], dset1['time'][b_inds1]) ttz2 = np.interp(distance_in_degree, #dset2['distance_in_degree'][:], dset2['distance_in_degree'][b_inds2], #dset2['time'][:], dset2['time'][b_inds2]) #interp between travel times for different source depths w1 = (z2 - source_depth_in_km) / dz_table w2 = (source_depth_in_km - z1) / dz_table time_in_s = ttz1 * w1 + ttz2 * w2 return time_in_s