import numpy as np
import pandas as pd
import xarray as xr
[docs]def get_from_nc_files(files_in, concat_dim='time'):
"""read (several) NetCDF input files to a :class:`xarray.Dataset` and fix time encoding for correct nc output"""
data = xr.open_mfdataset(files_in, concat_dim=concat_dim, combine='nested')
data = drop_duplicates(data, dim=concat_dim)
# correct time encoding (especially units) which is broken by open_mfdateset by explicitly loading first file
data_first = xr.open_dataset(files_in[0])
data = set_encoding(data, ['time'], data_first.time.encoding)
return data
[docs]def drop_duplicates(ds, dim):
"""drop duplicates from all data in ds for duplicates in dimension vector
Args:
ds: :class:`xarray.Dataset` or :class:`xarray.DataArray` containing the data
dim: string indicating the dimension name to check for duplicates
Returns:
ds with unique dimension vector
"""
_, ind = np.unique(ds[dim], return_index=True) # keep first index but assume duplicate values identical anyway
return ds.isel({dim: ind})
[docs]def set_encoding(ds, vars, enc):
"""(re-)set encoding of variables in a dataset
Args:
ds: :class:`xarray.Dataset` containing the data
vars: list of variables for which encoding is to be adapted
enc: encoding dictionary (containing e.g. units) that encoding of the respective variables shall to be set to.
Returns:
ds with updated encoding for var in :obj:`vars`
"""
for var in vars:
ds[var].encoding = enc
return ds
[docs]def get_nearest(data, find_vals):
"""find values in data nearest values in the input data"""
x = np.unique(data)
out = []
for fv in find_vals:
out.append(x[np.abs(x-fv).argmin()])
return out
[docs]def has_data(ds, var):
"""check if a variable in a :class:`xarray.Dataset` exists and contains non-NaN data"""
if var in ds and not ds[var].isnull().all():
return True
else:
return False
[docs]def datetime64_to_str(x, date_format):
"""transform :class:`numpy.datetime64` to a datestring corresponding to 'date_format'
Args:
x: datetime as :class:`numpy.datetime64` object
date_format: date format understood by :class:`datetime.datetime`
"""
t = pd.to_datetime(x)
return t.strftime(date_format)
[docs]def datetime64_to_hour(x):
"""transform :class:`numpy.datetime64` to a float representing time of day in hours"""
date_format = '%H:%M:%S.%f'
hour_frac = np.array([1, 60, 3600])
dstr = datetime64_to_str(x, date_format)
hms = np.array(list(map(float, dstr.split(':'))))
return np.sum(hms / hour_frac)
[docs]def scalars_to_time(ds, variables, time_dim='time'):
"""expand scalar variables onto time dimension to form an array of len(time) containing identical values
Args:
ds: :class:`xarray.Dataset` containing all requested scalar variables and the time dimension to transform to
variables: list of variables to expand onto the time dimension. These will be replaced in-place
time_dim (optional): name of the time dimension. Defaults to 'time'.
"""
for var in variables:
ds.update({var: (time_dim, ds[var].values * np.ones(ds[time_dim].shape))})
return ds
[docs]def vectors_to_time(ds, variables, time_dim='time'):
"""expand constant vector variables onto time dimension to form an array of len(time) containing identical values
TODO: merge with scalars_to_time
Args:
ds: :class:`xarray.Dataset` containing all requested scalar variables and the time dimension to transform to
variables: list of variables to expand onto the time dimension. These will be replaced in-place
time_dim (optional): name of the time dimension. Defaults to 'time'.
"""
for var in variables:
ds[var] = ds[var].expand_dims(time=ds[time_dim])
return ds
[docs]def lists_to_np(indict):
"""transform all values of a dict with type list to a :class:`numpy.ndarray`"""
for key, val in indict.items():
if isinstance(val, list):
indict[key] = np.array(val)
return indict