Source code for avni.models.common

#!/usr/bin/env python
"""This script/module contains routines that are used to Earth models"""

#####################  IMPORT STANDARD MODULES   ######################################
# python 3 compatibility
from __future__ import absolute_import, division, print_function
import sys
if (sys.version_info[:2] < (3, 0)):
    from builtins import float,int

import os
import timeit
import glob # pattern matching for finding files
import numpy as np #for numerical analysis
import fortranformat as ff #reading/writing fortran formatted text
from configobj import ConfigObj
import xarray as xr
from io import StringIO
from copy import deepcopy
import ntpath
import warnings
import pandas as pd
import struct
import traceback
import typing as tp

####################### IMPORT AVNI LIBRARIES  #######################################
from .. import tools
from .. import constants
from .reference1d import Reference1D
#######################################################################################

[docs]def readepixfile(filename: str) -> tp.Tuple[np.ndarray, dict, list]: """Read a local file in extended pixel (.epix) format. Parameters ---------- filename : str Name of the file containing four columns: (`latitude`, `longitude`, `pixel_size`, `value`) Returns ------- tp.Tuple[np.ndarray, dict, list] First element is an array containing (`latitude`, `longitude`, `pixel_size`, `value`). Second element are metadata from input fields if specified. Third element are all other comments except lines containing metadata. Raises ------ IOError File not found in local directory Examples -------- >>> epixarr,metadata,comments = readepixfile('file.epix') Notes ----- This function assumes cell-centered values within a pixel of size `pixel_size`. In order to interpolate in arbitrary locations, this assumption needs to be used. :Authors: Raj Moulik (moulik@caa.columbia.edu) :Last Modified: 2023.02.16 5.00 """ # Read the file if available try: f = open(filename, 'r') epixarr=np.genfromtxt(filename, dtype=None,comments="#",names=['latitude','longitude','pixel_size','value']) except IOError: raise IOError("File (",filename,") cannot be read.") # strip header and store basic metadata metadata = {} comments = [] with open(filename) as f: for line in f: if line.startswith('#'): if ':' in line: field = line.lstrip('#').split(':')[0].strip() metadata[field] = line.lstrip('#').split(':')[1].strip() else: comments.append(line.strip()) return epixarr,metadata,comments
[docs]def writeepixfile(filename: str, epixarr: np.ndarray , metadata: dict = {'BASIS':'PIX','FORMAT':'50'}, comments: list = None) -> None: """Write named numpy array to extended pixel format (.epix) file. Parameters ---------- filename : str Name of the file containing four columns: (`latitude`, `longitude`, `pixel_size`, `value`) epixarr : np.ndarray array containing (`latitude`, `longitude`, `pixel_size`, `value`) metadata : dict, optional metadata from input fields if specified, by default {'BASIS':'PIX','FORMAT':'50'} comments : list, optional all other comments except lines containing metadata, by default None Raises ------ IOError File cannot be written in local directory Examples -------- >>> writeepixfile('file.epix',epixarr,metadata,comments) Notes ----- This function assumes cell-centered values within a pixel of size `pixel_size`. In order to interpolate in arbitrary locations, this assumption needs to be used. :Authors: Raj Moulik (moulik@caa.columbia.edu) :Last Modified: 2023.02.16 5.00 """ # combine headers header='' for key in sorted(metadata.keys()): header=header+'#'+key+':'+metadata[key]+'\n' if comments is not None: for comment in comments: header = header+comment+'\n' header = header[:-1] # to get rid of the last \n in header # try writing the file try: np.savetxt(filename, epixarr, fmt='%8.3f %8.3f %8.3f %+12.7e',header=header,comments='') except : raise IOError("File (",filename,") cannot be written.") return
[docs]def read3dmodelfile(modelfile: str) -> dict: """Reads a standard 3D model file into a `model3d` dictionary containing `data` and `metadata`. The `data` field contains the basis coefficients of the parameterization employed in the 3D model file. The `metadata` fields contain various parameters relevant for evaluating the 3D model at various locations. Parameters ---------- modelfile : str Text file describing the 3D model Returns ------- dict A `model3d` dictionary containing `data` and `metadata` Raises ------ IOError File not found in local directory ValueError Parameterization type has not been implemented yet. Current ones include spherical harmonics, spherical splines and pixels. Examples -------- >>> model3d = read3dmodelfile('S362ANI+M') Notes ----- This function has several fields in the `data` and `matadata` fields. The details depend on what kind of parameterization is employed. In general, `maxkern` is the maximum number of radial kernels and `maxcoeff` is the maximum number of corresponding lateral basis functions, `resolution` and `realization` are the indices for the resolution level and the realization from a model ensemble (usually 0 if it is a single file). :Authors: Raj Moulik (moulik@caa.columbia.edu) :Last Modified: 2023.02.16 5.00 """ # check if file exists if (not os.path.isfile(modelfile)): raise IOError("Filename ("+modelfile+") does not exist") #defaults desckern=[];ihorpar=[] refmodel = None; kerstr = None; crust = None; null_model= None interpolant = None; cite = None; shortcite = None # read the fields with open(modelfile) as f: lines = f.readlines() ii=0; model3d = {} foundsplines = False; foundpixels = False; #foundharmonics = False while ii < len(lines): line=lines[ii]; ii=ii+1 if line.startswith("REFERENCE MODEL:"): refmodel=line[16:].rstrip('\n').strip(' ') if line.startswith("NAME:"): model_name=line[5:].rstrip('\n').strip(' ') if line.startswith("KERNEL SET:"): kerstr=line[12:].rstrip('\n').strip(' ') if line.startswith("NULL MODEL:"): null_model=line[12:].rstrip('\n').strip(' ') if 'None' in null_model or 'none' in null_model: null_model=None if line.startswith("CITE:"): cite=line[6:].rstrip('\n').strip(' ') if line.startswith("SHORTCITE:"): shortcite=line[11:].rstrip('\n').strip(' ') if line.startswith("INTERPOLANT:"): interpolant=line[13:].rstrip('\n').strip(' ') if line.startswith("CRUST:"): crust=line[7:].rstrip('\n').strip(' ') if line.startswith("RADIAL STRUCTURE KERNELS:"): nmodkern = int(line[26:].rstrip('\n')) # search for parmaterization if line.startswith("DESC"): idummy=int(line[4:line.index(':')]) substr=line[line.index(':')+1:len(line.rstrip('\n'))] ifst,ilst=tools.firstnonspaceindex(substr) desckern.append(substr[ifst:ilst]) if line.startswith("HORIZONTAL PARAMETERIZATIONS:"): nhorpar = int(line[29:].rstrip('\n')) typehpar=np.zeros(nhorpar, dtype='U40') ityphpar=np.zeros(nhorpar, dtype=np.int) ncoefhor=np.zeros(nhorpar, dtype=np.int) hsplfile=np.zeros(nhorpar, dtype='U40') coef=[[] for i in range(nmodkern)] if line.startswith("HPAR"): idummy=int(line[4:line.index(':')]) substr=line[line.index(':')+1:len(line.rstrip('\n'))] ifst,ilst=tools.firstnonspaceindex(substr) if substr[ifst:ifst+20] == 'SPHERICAL HARMONICS,': # initialize lmaxhor=np.zeros(nhorpar, dtype=np.int) ityphpar[idummy-1]=1 typehpar[idummy-1]='SPHERICAL HARMONICS' lmax = int(substr[21:].rstrip('\n')) lmaxhor[idummy-1]=lmax ncoefhor[idummy-1]=(lmax+1)**2 #foundharmonics = True elif substr[ifst:ifst+18] == 'SPHERICAL SPLINES,': ifst1=ifst+18 ifst=len(substr) ilst=len(substr) while substr[ifst-1:ifst] != ',': ifst=ifst-1 ncoef=int(substr[ifst+1:ilst].rstrip('\n')) substr=substr[ifst1:ifst-1] ifst1,ilst=tools.firstnonspaceindex(substr) hsplfile[idummy-1]=substr[ifst1:ilst] ityphpar[idummy-1]=2 typehpar[idummy-1]='SPHERICAL SPLINES' ncoefhor[idummy-1]=ncoef # initialize if found first time if not foundsplines: ixlspl=[[] for i in range(nhorpar)] xlaspl=[[] for i in range(nhorpar)] xlospl=[[] for i in range(nhorpar)] xraspl=[[] for i in range(nhorpar)] # specific variables for jj in range(ncoef): arr=lines[ii].rstrip('\n').split(); ii=ii+1 ixlspl[idummy-1].append(int(arr[0])) xlaspl[idummy-1].append(float(arr[1])) xlospl[idummy-1].append(float(arr[2])) xraspl[idummy-1].append(float(arr[3])) foundsplines = True elif substr[ifst:ifst+7] == 'PIXELS,': ifst1=ifst+7 ifst=len(substr) ilst=len(substr) while substr[ifst-1:ifst] != ',': ifst=ifst-1 ncoef=int(substr[ifst+1:ilst].rstrip('\n')) substr=substr[ifst1:ifst-1] ifst1,ilst=tools.firstnonspaceindex(substr) hsplfile[idummy-1]=substr[ifst1:ilst] ityphpar[idummy-1]=3 typehpar[idummy-1]='PIXELS' ncoefhor[idummy-1]=ncoef # initialize if not foundpixels: xlapix=[[] for i in range(nhorpar)] xlopix=[[] for i in range(nhorpar)] xsipix=[[] for i in range(nhorpar)] # specific variables for jj in range(ncoef): arr=lines[ii].rstrip('\n').split(); ii=ii+1 xlopix[idummy-1].append(float(arr[0])) xlapix[idummy-1].append(float(arr[1])) xsipix[idummy-1].append(float(arr[2])) foundpixels = True else: raise ValueError('Undefined parameterization type - '+substr[ifst:ilst]) if line.startswith("STRU"): idummy=int(line[4:line.index(':')]) ihor=int(line[line.index(':')+1:].rstrip('\n')) ihorpar.append(ihor) ncoef=ncoefhor[ihor-1] for jj in range(int(ncoef/6)): arr=lines[ii].rstrip('\n').split(); ii=ii+1 #coef[jj*6:(jj+1)*6,idummy-1]=[float(i) for i in arr] for i in arr: coef[idummy-1].append(float(i)) remain = ncoef % 6 if remain > 0: arr=lines[ii].rstrip('\n').split(); ii=ii+1 for val in arr: coef[idummy-1].append(float(val)) # Store the variables numvar=0; varstr=np.zeros(nmodkern, dtype='U200') ivarkern=np.zeros(nmodkern,dtype=np.int) for ii in np.arange(nmodkern): string=desckern[ii] if numvar == 0: varstr[0] = string[:string.index(',')] ivarkern[ii]=1 numvar=numvar+1 else: for kk in np.arange(numvar): if varstr[kk] == string[:string.index(',')]: ivarkern[ii]=kk+1 if ivarkern[ii] == 0: numvar=numvar+1 varstr[numvar-1] = string[:string.index(',')] ivarkern[ii]=numvar # Save the relevant portions desckern = np.array(desckern, dtype='U200') ihorpar = np.array(ihorpar) varstr = varstr[:numvar] ncoefcum = np.cumsum([ncoefhor[ihor-1] for ihor in ihorpar]) # Store in a dictionary metadata = {} metadata['refmodel']=refmodel; metadata['kerstr']=kerstr;metadata['nmodkern']=nmodkern metadata['desckern']=desckern; metadata['nhorpar']=nhorpar;metadata['hsplfile']=hsplfile metadata['ityphpar']=ityphpar; metadata['typehpar']=typehpar; metadata['ncoefhor']=ncoefhor; metadata['ihorpar']=ihorpar metadata['ivarkern']=ivarkern; metadata['numvar']=numvar metadata['varstr']=varstr; metadata['ncoefcum']=ncoefcum metadata['crust']=crust; metadata['null_model']=null_model metadata['interpolant']=interpolant; metadata['cite']=cite metadata['shortcite']=shortcite # Get number of coefficients - lmax, number of splines or pixels if 'SPHERICAL HARMONICS' in typehpar: metadata['lmaxhor']=lmaxhor if 'PIXELS' in typehpar: metadata['xsipix']=np.array(xsipix); metadata['xlapix']=np.array(xlapix) metadata['xlopix']=np.array(xlopix) if 'SPHERICAL SPLINES' in typehpar: metadata['ixlspl']=np.array(ixlspl); metadata['xlaspl']=np.array(xlaspl) metadata['xlospl']=np.array(xlospl); metadata['xraspl']=np.array(xraspl) # Fill the basis coefficients as pandas DataFrame model3d['data']={} model3d['data']['coef']=pd.DataFrame(coef) try: model3d['data']['name']=model_name except: model3d['data']['name']=ntpath.basename(modelfile) model3d['metadata']=metadata return model3d
[docs]def epix2xarray(model_dir: str = '.', setup_file: str = 'setup.cfg', output_dir: str = '.', n_hpar: int = 1, buffer: bool = True) -> xr.Dataset: """Convert a set of extended pixel format (.epix) files in a directory to a netCDF4 file in AVNI format using xarray. Parameters ---------- model_dir : str, optional path to directory containing epix layer files, by default '.' setup_file : str, optional setup file containing metadata for the model, by default 'setup.cfg' output_dir : str, optional path to output directory, by default '.' n_hpar : int, optional number of unique horizontal parameterizations, by default 1 buffer : bool, optional write to buffer instead of file for the intermediate step of the ascii file, by default True Returns ------- xr.Dataset An xarray Dataset that stores the values from a 3D model. Raises ------ IOError File not found in local directory :Authors: Raj Moulik (moulik@caa.columbia.edu) :Last Modified: 2023.02.16 5.00 """ start_time = timeit.default_timer() cfg_file = model_dir+'/'+setup_file if not os.path.isfile(cfg_file): raise IOError('No configuration file found.'\ 'Model directory must contain '+setup_file) else: parser = ConfigObj(cfg_file) model_name = parser['metadata']['name'] try: kernel_set = '{}'.format(parser['metadata']['kerstr']).strip() except KeyError: kernel_set = 'NATIVE' if buffer: print('... writing ASCII buffer') else: print('... writing ASCII file') asciibuffer = epix2ascii(model_dir=model_dir,setup_file=setup_file,output_dir=output_dir,n_hpar=n_hpar,buffer=buffer) elapsed = timeit.default_timer() - start_time print("........ evaluations took "+str(elapsed)+" s") if buffer: print('... read to ASCII buffer. evaluations took '+str(elapsed)+' s') else: print('... written ASCII file '+asciibuffer+'. evaluations took '+str(elapsed)+' s') ncfile = output_dir+'/{}.{}.avni.nc4'.format(model_name,kernel_set) print('... writing netcdf file '+ncfile) ds = ascii2xarray(asciibuffer,model_dir=model_dir,outfile=ncfile,setup_file=setup_file) return ds
[docs]def checksetup(parser): """Check the arguments in the model setup file and populate the default options Parameters ---------- parser : A ConfigObj object :Authors: Raj Moulik (moulik@caa.columbia.edu) :Last Modified: 2023.02.16 5.00 """ # Required arguments for key in ['name','cite']: if key not in parser['metadata'].keys(): raise KeyError(key+' needs to be specified') for field in ['min','units','max','resolution']: for type in ['lat','lon']: key = 'geospatial_'+type+'_'+field if key in parser['metadata'].keys(): parser['metadata'][key] = parser['metadata'][key] if field=='units' else float(parser['metadata'][key]) if field == 'unit': try: constants.ureg(key) except: raise ValueError('need to define '+key+' in acceptable units (% not allowed, use percent') else: raise KeyError(key+' needs to be specified in setup file') for field in ['min','units','max']: key = 'geospatial_vertical_'+field if key in parser['metadata'].keys(): parser['metadata'][key] = parser['metadata'][key] if field=='units' else float(parser['metadata'][key]) else: raise KeyError(key+' needs to be specified') # Optional arguments try: value = parser['metadata']['geospatial_vertical_positive'].strip() except KeyError: parser['metadata']['geospatial_vertical_positive'] = 'down' if parser['metadata']['geospatial_vertical_positive'].lower() not in ['up','down']: raise KeyError('geospatial_vertical_positive type can only be up or down') try: value = parser['metadata']['interpolant'].strip() parser['metadata']['interpolant'] = None if value.lower() == 'none' else value except KeyError: parser['metadata']['interpolant'] = 'nearest' if parser['metadata']['interpolant'] != None: if parser['metadata']['interpolant'].lower() not in ['nearest','smooth']: raise KeyError('interpolant type can only be nearest or smooth for compatibility with KDtree queries') try: value = parser['metadata']['refmodel'].strip() parser['metadata']['refmodel'] = None if value.lower() == 'none' else value except KeyError: parser['metadata']['refmodel'] = None try: value = parser['metadata']['crust'].strip() parser['metadata']['crust'] = None if value.lower() == 'none' else value except KeyError: parser['metadata']['crust'] = None try: value = parser['metadata']['null_model'].strip() parser['metadata']['null_model'] = None if value.lower() == 'none' else value except KeyError: parser['metadata']['null_model'] = None try: value = '{}'.format(parser['metadata']['kerstr']).strip() parser['metadata']['kerstr'] = 'NATIVE' if value.lower() == 'none' else value except KeyError: parser['metadata']['kerstr'] = 'NATIVE' try: value = parser['metadata']['forward_modeling'].strip() parser['metadata']['forward_modeling'] = parser['parameters'].keys() if value.lower() == 'none' else value.split(',') except KeyError: parser['metadata']['forward_modeling'] = parser['parameters'].keys() # check units for var in parser['parameters'].keys(): for key in ['unit','absolute_unit']: if key in parser['parameters'][var].keys(): value =parser['parameters'][var][key] try: constants.ureg(value) except: raise ValueError('need to define '+key+' for variable '+var+' in acceptable units, not '+value+' (% NOT allowed, use percent).') else: if key == 'unit': raise ValueError('need to define '+key+' for variable '+var+' in acceptable units (% NOT allowed, use percent).')
[docs]def epix2ascii(model_dir: str = '.', setup_file: str = 'setup.cfg', output_dir: str = '.', n_hpar: int = 1, checks: bool = True, buffer: bool = True, onlyheaders: bool = False): """Write AVNI formatted ASCII file from a directory containing extended pixel format (.epix) files Parameters ---------- model_dir : str, optional path to directory containing epix layer files, by default '.' setup_file : str, optional setup file containing metadata for the model, by default 'setup.cfg' output_dir : str, optional path to output directory, by default '.' n_hpar : int, optional number of horizontal parameterizations (currently only handles models with 1 horizontal parameterization, and with a constant pixel width), by default 1 checks : bool, optional checks if the metadata in `setup_file` is consistent with epix files, by default True buffer : bool, optional write to buffer instead of file for the intermediate step of the ascii file, by default True onlyheaders : bool, optional write only headers, not the coefficients, by default False Returns ------- buffer or file name The memory buffer containing the file output (buffer=True) or the output file name on disk. Raises ------ IOError Some epix files have not been found in the directory AssertionError Checks for compatibility across epix files are not satisfied ValueError Invalid values are found for some metadata fields :Authors: Raj Moulik (moulik@caa.columbia.edu) :Last Modified: 2023.02.16 5.00 """ # Check the configuration file cfg_file = model_dir+'/'+setup_file ref_dict = {} #dictionary containing reference values if not os.path.isfile(cfg_file): raise IOError('No configuration file found.'\ 'Model directory must contain '+setup_file) else: parser = ConfigObj(cfg_file) # Required arguments checksetup(parser) model_name = parser['metadata']['name'] cite = parser['metadata']['cite'] epix_folder = parser['metadata']['folder'] kernel_set = parser['metadata']['kerstr'] # get the optional arguments interpolant = parser['metadata']['interpolant'] ref_model = parser['metadata']['refmodel'] crust = parser['metadata']['crust'] null_model = parser['metadata']['null_model'] forward_modeling = parser['metadata']['forward_modeling'] #write header outfile = output_dir+'/{}.{}.avni.ascii'.format(model_name,kernel_set) if buffer: # Writing to a buffer f_out = StringIO() else: f_out = open(outfile,'w') f_out.write(u'NAME: {}\n'.format(model_name)) f_out.write(u'REFERENCE MODEL: {} \n'.format(ref_model)) f_out.write(u'KERNEL SET: {}\n'.format(kernel_set)) f_out.write(u'INTERPOLANT: {}\n'.format(interpolant)) f_out.write(u'CITE: {}\n'.format(cite)) if crust != 'None': f_out.write(u'CRUST: {}\n'.format(crust)) if forward_modeling != 'None': f_out.write(u'FORWARD MODELING: {}\n'.format(forward_modeling)) if null_model != 'None': f_out.write(u'NULL_MODEL: {}\n'.format(null_model)) #find the number radial kernels epix_lengths = []; string = [] icount = 0 for parameter in parser['parameters']: mod_type = parser['parameters'][parameter]['type'] par_folder = parser['parameters'][parameter]['folder'] icount += 1 try: description = parser['parameters'][parameter]['description'] string.append(str(icount)+'. '+description+' ('+parameter+')') except KeyError: string.append(str(icount)+'. '+parameter) if mod_type == 'heterogeneity': epix_files = glob.glob(model_dir+'/'+epix_folder+'/'+par_folder+'/*.epix') elif mod_type == 'topography': epix_files = glob.glob(model_dir+'/'+epix_folder+'/'+par_folder+'/*'+parameter+'.epix') if len(epix_files)>0: epix_lengths.append(len(epix_files)) else: raise IOError('no files found for parameter '+parameter+' of type '+mod_type) print('... read '+str(np.sum(epix_lengths))+' radial structure kernels of '+str(len(string))+' variables: \n'+'\n'.join(string)) f_out.write(u'RADIAL STRUCTURE KERNELS: {}\n'.format(np.sum(epix_lengths))) # Read through various parameters stru_indx = [] #stru_list = [] lats = [] lons = [] pxs = [] k = 1 for i, parameter in enumerate(parser['parameters']): ref_dict[parameter] = {} ref_dict[parameter]['ifremav'] = [] ref_dict[parameter]['refvalue'] = [] ref_dict[parameter]['average'] = [] mod_type = parser['parameters'][parameter]['type'] par_folder = parser['parameters'][parameter]['folder'] if mod_type == 'heterogeneity': epix_files = glob.glob(model_dir+'/'+epix_folder+'/'+par_folder+'/*.epix') epix_files.sort(key=tools.alphanum_key) ref_dict[parameter]['depth_in_km'] = [] elif mod_type == 'topography': #topo_folder = parser['parameters'][parameter]['folder'] epix_files = glob.glob(model_dir+'/'+epix_folder+'/'+par_folder+'/*'+parameter+'.epix') depth = parser['parameters'][parameter]['depth']*constants.ureg(parser['parameters'][parameter]['unit']) depth.ito('km') ref_dict[parameter]['depth_in_km'] = float(depth.magnitude) else: raise ValueError('model type not recognized... should be either "heterogeneity" or "topography"') #write descriptions of radial kernels for j, epix_file in enumerate(epix_files): #read header and store basic metadata metadata = {} head = [] with open(epix_file) as f: for line in f: if line.startswith('#'): head.append(line) for field in ['DEPTH_IN_KM','AVERAGE','IFREMAV','REFVALUE','REFMODEL','UNIT','WHAT','FORMAT','BASIS']: if field in line: metadata[field] = line.split(':')[1].split('\n')[0].strip() # conduct checks if checks: if not parser['parameters'][parameter]['unit'].lower()==metadata['UNIT'].lower(): raise AssertionError("UNIT incompatible in file "+epix_file) if parameter.lower()!=metadata['WHAT'].lower() : warnings.warn("parameter !=metadata['WHAT'] in file "+epix_file) value = metadata['REFMODEL'].strip() refmodel_epix = None if value.lower() == 'none' else value if not ref_model == refmodel_epix: warnings.warn("parser['parameters']['refmodel']!=metadata['REFMODEL'] in file "+epix_file) if not metadata['FORMAT']=='50': raise AssertionError("FORMAT incompatible in file "+epix_file) if not metadata['BASIS'].lower()=='PIX'.lower(): raise AssertionError("BASIS incompatible in file "+epix_file) # defaults if field not available in the epix file try: ref_dict[parameter]['ifremav'].append(float(metadata['IFREMAV'])) except: ref_dict[parameter]['ifremav'].append(0.) try: ref_dict[parameter]['refvalue'].append(float(metadata['REFVALUE'])) except: ref_dict[parameter]['refvalue'].append(-999.0) try: ref_dict[parameter]['average'].append(float(metadata['AVERAGE'])) except: ref_dict[parameter]['average'].append(0.) try: ref_dict[parameter]['refmodel'] = metadata['REFMODEL'] except: ref_dict[parameter]['refmodel'] = 'None' if mod_type == 'heterogeneity': for line in head: if 'DEPTH_RANGE' in line: depth_range = line.split(':')[1].split('\n')[0] f_out.write(u'DESC {:3.0f}: {}, boxcar, {} km\n'.format(k,parameter,depth_range)) ref_dict[parameter]['depth_in_km'].append(float(metadata['DEPTH_IN_KM'])) elif mod_type == 'topography': if checks: if not float(parser['parameters'][parameter]['depth']) == float(metadata['REFVALUE']): raise AssertionError("REFVALUE incompatible in file "+epix_file) depth_ref = parser['parameters'][parameter]['depth'] f_out.write(u'DESC {:3.0f}: {}, delta, {} km\n'.format(k,parameter,depth_ref)) # now read the data f = np.loadtxt(epix_file) if j == 0 and k == 1: par1 = f[:,0:2] lats.append(f[:,0]) lons.append(f[:,1]) pxs.append(f[:,2]) stru_indx.append(n_hpar) else: par_new = f[:,0:2] lats_new = f[:,0] lons_new = f[:,1] pxs_new = f[:,2] #loop through all previous parameterizations for ii in range(len(lats)): if not np.array_equal(par1,par_new): n_hpar += 1 lats.append(lats_new) lons.append(lons_new) pxs.append(pxs_new) stru_indx.append(n_hpar) k += 1 #enforce longitudes from 0 to 360 to be consistent with xarray if not min(f[:,1]) >= 0.: raise AssertionError("longitudes need to be [0,360] "+epix_file) #write horizontal parameterization f_out.write(u'HORIZONTAL PARAMETERIZATIONS: {}\n'.format(len(lats))) for i,_ in enumerate(lats): #check pixel widths if np.min(pxs[i]) != np.max(pxs[i]): raise ValueError('no inhomogeneous model parameterizations allowed yet') else: px_w = pxs[i][0] f_out.write(u'HPAR {}: PIXELS, {:5.3f} X {:5.3f}, {}\n'.format(stru_indx[0],px_w,px_w,len(lats[i]))) if not np.all(sorted(np.unique(lons))==np.unique(lons)): raise AssertionError() if not np.all(sorted(np.unique(lats))==np.unique(lats)): raise AssertionError() for j in range(len(lats[i])): lon_here = lons[i][j] lat_here = lats[i][j] px_here = pxs[i][j] f_out.write(u'{:7.3f} {:7.3f} {:7.3f}\n'.format(lon_here,lat_here, px_here)) if not onlyheaders: # read the 1D model if any of the reference values are not defined ifread1D = {} #stores whether the references values have been read from a file fileread = False for _, parameter in enumerate(parser['parameters']): mod_type = parser['parameters'][parameter]['type'] ifread1D[parameter] = np.any(np.array(ref_dict[parameter]['refvalue'])<0.) if ifread1D[parameter]: # check if the refmodel file exists if not os.path.isfile(ref_dict[parameter]['refmodel']): ifread1D[parameter] = False print ('WARNING: Could not fill reference values for '+parameter+' as the 1D reference model file could not be found : '+ref_dict[parameter]['refmodel']) if ifread1D[parameter] : if not fileread: try: # try reading the 1D file in card format ref1d = Reference1D(ref_dict[parameter]['refmodel']) fileread = True except: print ('WARNING: Could not fill reference values for '+parameter+' as the 1D reference model file could not be read as Reference1D instance : '+ref_dict[parameter]['refmodel']) ifread1D[parameter] = False if mod_type == 'heterogeneity' and ifread1D[parameter]: ref1d.get_custom_parameter(parameter) # write coefficients k = 1 for i, parameter in enumerate(parser['parameters']): #epix_files = glob.glob(model_dir+'/'+epix_folder+'/'+parameter+'/*.epix') mod_type = parser['parameters'][parameter]['type'] par_folder = parser['parameters'][parameter]['folder'] if mod_type == 'heterogeneity': epix_files = glob.glob(model_dir+'/'+epix_folder+'/'+par_folder+'/*.epix') epix_files.sort(key=tools.alphanum_key) elif mod_type == 'topography': #topo_folder = parser['parameters'][parameter]['folder'] epix_files = glob.glob(model_dir+'/'+epix_folder+'/'+par_folder+'/*'+parameter+'.epix') else: raise ValueError('model type not recognized... should be either "heterogeneity" or "topography"') #write model coefficients print('writing coefficients for '+parameter+' # '+str(k)+' - '+str(k+len(epix_files)-1)+' out of '+str(np.sum(epix_lengths))+' radial kernels/layers.') line = ff.FortranRecordWriter('(6E12.4)') for j in range(len(epix_files)): f = np.loadtxt(epix_files[j]) coefs = f[:,3] #check if the reference value is negative. # if so, make an instance of the 1D # model class to read from if ifread1D[parameter] and ref_dict[parameter]['refvalue'][j] < 0: depth_in_km = ref_dict[parameter]['depth_in_km'][j] ref_dict[parameter]['refvalue'][j] = ref1d.evaluate_at_depth(depth_in_km,parameter) #check ifremav. if it's 1, add in average if ref_dict[parameter]['ifremav'][j] == 1: coefs += ref_dict[parameter]['average'][j] print('... adding average back to parameter '+parameter+' # '+str(j)) f_out.write(u'STRU {:3.0f}: {:1.0f}\n'.format(k,px_w)) f_out.write(line.write(coefs)+u'\n') k += 1 # Write to buffer or file name if buffer: f_out.seek(0) return f_out else: return outfile
[docs]def ascii2xarray(asciioutput,outfile = None, model_dir: str = '.', setup_file: str = 'setup.cfg', complevel: int = 9, engine: str = 'netcdf4', writenc4: bool = False): """Create an xarray Dataset from AVNI formatted ASCII file Parameters ---------- asciioutput : buffer or str A filename string or buffer that will be read outfile : str, optional Output file in netCDF4 format, by default None model_dir : str, optional _description_, by default '.' setup_file : str, optional setup file containing metadata for the model, by default 'setup.cfg' complevel : int, optional options for compression in netcdf file, by default 9 engine : str, optional options in netcdf file, by default 'netcdf4' writenc4 : bool, optional write a netcdf4 file, by default False Returns ------- ds: xarray Dataset Contains the model description in a new xarray Dataset Raises ------ IOError Configuration file has not been found in the directory AssertionError Checks for compatibility across files are not satisfied ValueError Only pixels are allowed as parameterization for netCDF files warnings.warn Checks for number of pixels not satisfied :Authors: Raj Moulik (moulik@caa.columbia.edu) :Last Modified: 2023.02.16 5.00 """ model_dict = {} cfg_file = model_dir+'/'+setup_file # check for configuration file if not os.path.isfile(cfg_file): raise IOError('No configuration file found.'\ 'Model directory must contain '+setup_file) else: parser = ConfigObj(cfg_file) # Required arguments epix_folder = parser['metadata']['folder'] checksetup(parser) try: #attempt buffer asciioutput.seek(0) except: if outfile == None and writenc4: try: outfile = asciioutput.split('.ascii')[0]+'.nc4' except: outfile = asciioutput+'.nc4' asciioutput = open(asciioutput,'r') #read header line = asciioutput.readline() while line: #if 'NAME' in line: # model_name = line.split('NAME:')[1].strip() if 'REFERENCE MODEL' in line: value = line.split('REFERENCE MODEL:')[1].strip() ref_model = None if value.lower() == 'none' else value if 'KERNEL SET' in line: value = line.split('KERNEL SET:')[1].strip() krnl_set = None if value.lower() == 'none' else value if 'RADIAL STRUCTURE KERNELS' in line: nrad_krnl = line.split('RADIAL STRUCTURE KERNELS:')[1].strip() nrad_krnl = int(nrad_krnl) break line = asciioutput.readline() # check that reference model is the same as parser if not ref_model == parser['metadata']['refmodel']: raise AssertionError(ref_model+' the reference model in '+asciioutput+' is not the same as refmodel in '+setup_file) if not krnl_set == parser['metadata']['kerstr']: raise AssertionError(krnl_set+' the kernel string in '+asciioutput+' is not the same as kerstr in '+setup_file) #read variables and parameterizations variables = [] rpar_list = [] hpar_list = [] rpar_starts = {} rpar_ends = {} rpar = {} variable_idxs = [] hpar_idx = 0 rpar_idx = 0 var_idx = 0 i = 0 line = asciioutput.readline() while line: if i < nrad_krnl: variable = line.strip().split()[2].split(',')[0] if variable not in variables: variables.append(variable) rpar_starts[variable] = [] rpar_ends[variable] = [] rpar[variable] = [] model_dict[variable] = {} model_dict[variable]['hpar_idx'] = None variable_idxs.append(var_idx) var_idx += 1 # first try to find start and end of radial param try: rpar_start = float(line.strip().split(',')[-1].split('-')[0].strip('km')) rpar_end = float(line.strip().split(',')[-1].split('-')[1].strip('km')) rpar_starts[variable].append(rpar_start) rpar_ends[variable].append(rpar_end) rpar[variable].append((rpar_start + rpar_end)/2.) except IndexError: model_dict[variable]['rpar_idx'] = None line = asciioutput.readline() i += 1 if i == nrad_krnl: #read number of horizontal parameterizations nhpar = int(line.strip().split()[-1]) break # Now get rparindex for variable in variables: if len(rpar[variable]) != 0: # if it is an empty list like in discontinuity if len(rpar[variable]) > 1: if sorted(rpar[variable]) != rpar[variable]: raise AssertionError('depths not sorted',rpar[variable]) if rpar[variable] not in rpar_list: rpar_list.append(rpar[variable]) model_dict[variable]['rpar_idx'] = rpar_idx rpar_idx += 1 else: model_dict[variable]['rpar_idx'] = rpar_list.index(rpar[variable]) # check that information on variables in ascii file exists in setup.cfg for var in variables: if not var in parser['parameters'].keys(): raise AssertionError(var+' not found as shortname in '+setup_file) for indx in ['rpar_idx','hpar_idx']: if not indx in model_dict[var].keys(): raise AssertionError(var+' not read properly with index '+indx) for i in range(nhpar): lons = [] lats = [] pxwd = [] line = asciioutput.readline() print(line.strip()) #hpar_type = line.strip().split()[2].split(',')[0] hpar_name = line.split(':')[1].strip() if hpar_name.lower().startswith('pixel'): pxw_lon = float(line.strip().split()[3].strip(',')) pxw_lat = float(line.strip().split()[5].strip(',')) nlines_input = int(line.strip().split()[6].strip(',')) nlines = int(360.0/pxw_lon) * int(180/pxw_lat) if not nlines == nlines_input: warnings.warn('number of pixels expected for '+str(pxw_lat)+'X'+str(pxw_lon)+' is '+str(nlines)+', not '+str(nlines_input)+' as provided.') else: raise ValueError('only PIXEL parameterizations enabled') for j in range(nlines_input): line = asciioutput.readline() lons.append(float(line.strip().split()[0])) lats.append(float(line.strip().split()[1])) pxwd.append(float(line.strip().split()[2])) hpar_list.append([lons,lats,pxwd]) #read coefficients num_coefficients = None for variable in variables: stru_idx = model_dict[variable]['rpar_idx'] model_dict[variable]['layers'] = {} if stru_idx is not None: n_stru = len(rpar_list[stru_idx]) else: n_stru = 1 for i in range(0,n_stru): layer_coefs = [] #get structure info line = asciioutput.readline() #nstru = int(line.strip().split()[1].split(':')[0]) nhparam = int(line.strip().split()[2]) npts = len(hpar_list[nhparam-1][0][:]) nlines = int(npts/6) if model_dict[variable]['hpar_idx'] == None: model_dict[variable]['hpar_idx'] = nhparam-1 for j in range(nlines): line = asciioutput.readline() for coef in line.strip().split(): layer_coefs.append(float(coef)) # if not a multiple of 6, add remainder remain = nlines % 6 if remain > 0: line = asciioutput.readline() for coef in line.strip().split(): layer_coefs.append(float(coef)) model_dict[variable]['layers'][i] = layer_coefs # Check if the same number of coefficients are used if num_coefficients is None: num_coefficients = len(model_dict[variable]['layers'][i]) else: if num_coefficients != len(model_dict[variable]['layers'][i]): raise AssertionError('number of coefficients for variable '+variable+' and layer '+str(i)+' is not equal to that of another one '+str(num_coefficients)+'. All variables should use the same grid.') # check if we can read 1D model ifread1D = False if parser['metadata']['refmodel'] is None else True if ifread1D: if os.path.isfile(parser['metadata']['refmodel']): try: # try reading the 1D file in card format ref1d = Reference1D(parser['metadata']['refmodel']) # get derived parameters ref1d.get_mineralogical() ifread1D = True except: ifread1D = False print ('WARNING: Could not fill some reference values as the 1D reference model file could not be read as Reference1D instance : '+parser['metadata']['refmodel']) else: ifread1D = False print ('WARNING: Could not fill some reference values as the 1D reference model file could not be found : '+parser['metadata']['refmodel']) # check that indices have been read properly for var in variables: if model_dict[var]['hpar_idx'] is None: raise AssertionError(var+' not read properly with hpar_idx ') # Get all depths alldepths = []; allstartdepths = []; allenddepths = [] for rpar_temp in rpar_list: alldepths.extend(rpar_temp) alldepths = np.sort(np.unique(np.asarray(alldepths))) for variable in rpar_starts.keys(): allstartdepths.extend(rpar_starts[variable]) allenddepths.extend(rpar_ends[variable]) allstartdepths = np.sort(np.unique(np.asarray(allstartdepths))) allenddepths = np.sort(np.unique(np.asarray(allenddepths))) #open xarray dataset ds = xr.Dataset() #make DataArrays for each variable, and add to the dataset area = None # calculate area the first time around for variable in variables: hpar_idx = model_dict[variable]['hpar_idx'] # sort them by increaing lon if needed since reshape requires that sortbylon = np.all(hpar_list[hpar_idx][0] == sorted(hpar_list[hpar_idx][0])) # unique values for reshaping arr=pd.DataFrame(np.asarray(hpar_list[hpar_idx]).T,columns =['lon', 'lat', 'pxw']) if sortbylon: lon = np.unique(hpar_list[hpar_idx][0]) lat = np.unique(hpar_list[hpar_idx][1]) pxw = np.unique(hpar_list[hpar_idx][2]) else: arr = arr.sort_values(by=['lon','lat']) lon = pd.unique(arr['lon']) lat = pd.unique(arr['lat']) pxw = pd.unique(arr['pxw']) if not len(pxw)==1: warnings.warn('more than 1 pixel size in variable '+variable) print(variable,': PXW', pxw) #create dims arrays stru_idx = model_dict[variable]['rpar_idx'] # get the grid sizes stored pixel_array = xr.DataArray(np.zeros((len(lat),len(lon))), dims = ['latitude','longitude'], coords = [lat,lon]) pixel_array[:,:] = np.reshape(arr['pxw'].values, (len(lat),len(lon)),order='F') ds['pixel_width'] = pixel_array if stru_idx is not None: dep = rpar_list[stru_idx] # find depth indices dep_indx = np.searchsorted(alldepths,dep) data_array = xr.DataArray(np.zeros((len(alldepths),len(lat),len(lon))), dims = ['depth','latitude','longitude'], coords=[alldepths,lat,lon]) for i,layer in enumerate(model_dict[variable]['layers']): # if sorting is needed before reshaping values = model_dict[variable]['layers'][layer] if not sortbylon: arr=pd.DataFrame(np.vstack([hpar_list[hpar_idx],values]).T,columns =['lon', 'lat', 'pxw','values']) arr = arr.sort_values(by=['lon','lat']) values = arr['values'].values data_array[dep_indx[i],:,:] = np.reshape(values, (len(lat),len(lon)),order='F') else: data_array = xr.DataArray(np.zeros((len(lat),len(lon))), dims = ['latitude','longitude'], coords = [lat,lon]) # if sorting is needed before reshaping values = model_dict[variable]['layers'][0] if not sortbylon: arr=pd.DataFrame(np.vstack([hpar_list[hpar_idx],values]).T,columns =['lon', 'lat', 'pxw','values']) arr = arr.sort_values(by=['lon','lat']) values = arr['values'].values data_array[:,:] = np.reshape(values, (len(lat),len(lon)),order='F') #------------------------------------------------------------------------- #add reference values at each depth as metadata to the Data_Array #------------------------------------------------------------------------- av_attrs = {} for keys in parser['parameters'][variable].keys(): if (sys.version_info[:2] > (3, 0)): av_attrs[keys] = parser['parameters'][variable][keys] else: av_attrs[keys] = parser['parameters'][variable][keys].decode('utf-8') # read the 1D model if any of the reference values are not defined av_attrs['refmodel'] = parser['metadata']['refmodel'] if len(data_array.shape) == 3: # if 3-D variable # get the variable values av_depth = deepcopy(data_array.depth.values) avgvalue = []; ifaverage = True # calculate reference value if 1D model is read and absolute_unit is specified if ifread1D and 'absolute_unit' in av_attrs.keys(): # get custom parameters that share a name with existing variables # e.g. vs_even6 for even degree variations up to degree 6 ref1d.get_custom_parameter(variable) target_unit=av_attrs['absolute_unit'] refvalue = ref1d.evaluate_at_depth(av_depth,parameter=variable).to(target_unit).magnitude av_attrs['refvalue'] = refvalue # loop over depths and get average values for _,depth in enumerate(av_depth): # select the appropriate map mapval = data_array.sel(depth=depth) # get the average, use an earlier evaluation of area if possible if ifaverage: try: globalav,area, _ = tools.meanxarray(mapval,area=area,pix_width = ds['pixel_width']) avgvalue.append(globalav) except: print(traceback.format_exc()) warnings.warn('Could not read mean values for parameter '+variable) ifaverage = False if ifaverage: av_attrs['average'] = np.array(avgvalue) av_attrs['start_depths'] = allstartdepths av_attrs['end_depths'] = allenddepths else: # get the average, use an earlier evaluation of area if possible try: globalav,area,_ = tools.meanxarray(data_array,area=area,pix_width = ds['pixel_width']) av_attrs['average'] = globalav except: warnings.warn('Could not read mean values for parameter '+variable) av_attrs['depth'] = float(av_attrs['depth']) #add Data_Array object to Data_Set data_array.attrs = av_attrs ds[variable] = data_array #Add overall attributes attrs = {} for key in parser['metadata'].keys(): if (sys.version_info[:2] > (3, 0)): attrs[key] = parser['metadata'][key] else: attrs[key] = parser['metadata'][key].decode('utf-8') # add data variables as an attributes since pixel_width or other data may also be added attrs['parameters'] = variables ds.attrs = attrs # write to netcdf comp = {'zlib': True, 'complevel': complevel} encoding = {var: comp for var in ds.data_vars} if outfile != None: # change None to string since it cannot be stored in for key in ds.attrs.keys(): if ds.attrs[key] is None: ds.attrs[key] = 'None' for var in ds.data_vars: for key in ds[var].attrs.keys(): if ds[var].attrs[key] is None: ds[var].attrs[key] = 'None' ds.to_netcdf(outfile,engine=engine,encoding=encoding) return ds
[docs]def getLU2symmetric(insparse): """Get the full symmetric matrix from LU matrix :Authors: Raj Moulik (moulik@caa.columbia.edu) :Last Modified: 2023.02.16 5.00 """ print(".... Converting from LU matrix to symmetric matrix") outsparse=insparse.tolil(copy=True) outsparse.setdiag(0.) outsparse=outsparse.tocsr() outsparse=outsparse+insparse.T return outsparse
[docs]def readResCov(infile: str, onlymetadata: bool = False): """Reads Resolution or Covariance matrix created by invwdata_pm64 with option -r. R=inv(ATA+DTD)ATA and the name of file is typically outmodel.Resolution.bin Parameters ---------- infile : str Binary file containing Resolution or Covariance matrix onlymetadata : bool, optional Only read the metadata and ignore the elements of the matrix, by default False Returns ------- refmdl : str Reference 1D model kerstr : str Kernel signifying the basis sets comprising radial and horizontal parameterization ntot : int Number of parameters or basis coefficients in the model indexrad1,indexrad2,indexhor1,indexhor2 : np.ndarray Arrays describing the radial and horizontal basis sets that each ATA element in `out` correponds to out : np.ndarray Elements of the sparse Resolution or Covariance matrix Raises ------ IOError Input file has not been found in the directory ValueError Number of bytes in binary file does not match expected :Authors: Raj Moulik (moulik@caa.columbia.edu) :Last Modified: 2023.02.16 5.00 """ #read all the bytes to indata if (not os.path.isfile(infile)): raise IOError("Filename (",infile,") does not exist") nbytes = os.path.getsize(infile) ii = 0 #initialize byte counter ifswp = '' # Assuem that byte order is not be swapped unless elat is absurdly high start_time = timeit.default_timer() with open(infile, "rb") as f: # preliminary metadata indata = f.read(4) # try to read iflag iflag = struct.unpack(ifswp+'i',indata)[0] # Read flag if iflag != 1: ifswp = '!' # swap endianness from now on iflag = struct.unpack(ifswp+'i',indata)[0] if iflag != 1: raise ValueError("Error: iflag != 1") refmdl = struct.unpack('80s',f.read(80))[0].strip().decode('utf-8') kerstr = struct.unpack('80s',f.read(80))[0].strip().decode('utf-8') ntot = struct.unpack(ifswp+'i',f.read(4))[0] ndtd = int(((ntot+1)*ntot)/2) # pre-allocate matrices indexrad1 = None if onlymetadata else np.zeros(ndtd,dtype=int) indexrad2 = None if onlymetadata else np.zeros(ndtd,dtype=int) indexhor1 = None if onlymetadata else np.zeros(ndtd,dtype=int) indexhor2 = None if onlymetadata else np.zeros(ndtd,dtype=int) out = None if onlymetadata else np.zeros(ndtd) if not onlymetadata: # Now start reading data for jj in range(ndtd): indexrad1[jj] = struct.unpack(ifswp+'i',f.read(4))[0] indexrad2[jj] = struct.unpack(ifswp+'i',f.read(4))[0] indexhor1[jj] = struct.unpack(ifswp+'i',f.read(4))[0] indexhor2[jj] = struct.unpack(ifswp+'i',f.read(4))[0] out[jj] = struct.unpack(ifswp+'d', f.read(8))[0] ii=168+ndtd*24 if ii != nbytes: raise ValueError("Error: number of bytes read ",str(ii)," do not match expected ones ",str(nbytes)) elapsed = timeit.default_timer() - start_time if not onlymetadata: print(".... read "+str(ndtd)+" rows for the Res or Cov matrix in "+str(round(elapsed/60*10)/10)+" min.") return refmdl, kerstr, ntot, indexrad1, indexrad2, indexhor1, indexhor2, out