Source code for mwr_l12l2.utils.data_utils

import numpy as np
import pandas as pd
import xarray as xr


[docs]def get_from_nc_files(files_in, concat_dim='time'): """read (several) NetCDF input files to a :class:`xarray.Dataset` and fix time encoding for correct nc output""" data = xr.open_mfdataset(files_in, concat_dim=concat_dim, combine='nested') data = drop_duplicates(data, dim=concat_dim) # correct time encoding (especially units) which is broken by open_mfdateset by explicitly loading first file data_first = xr.open_dataset(files_in[0]) data = set_encoding(data, ['time'], data_first.time.encoding) return data
[docs]def drop_duplicates(ds, dim): """drop duplicates from all data in ds for duplicates in dimension vector Args: ds: :class:`xarray.Dataset` or :class:`xarray.DataArray` containing the data dim: string indicating the dimension name to check for duplicates Returns: ds with unique dimension vector """ _, ind = np.unique(ds[dim], return_index=True) # keep first index but assume duplicate values identical anyway return ds.isel({dim: ind})
[docs]def set_encoding(ds, vars, enc): """(re-)set encoding of variables in a dataset Args: ds: :class:`xarray.Dataset` containing the data vars: list of variables for which encoding is to be adapted enc: encoding dictionary (containing e.g. units) that encoding of the respective variables shall to be set to. Returns: ds with updated encoding for var in :obj:`vars` """ for var in vars: ds[var].encoding = enc return ds
[docs]def get_nearest(data, find_vals): """find values in data nearest values in the input data""" x = np.unique(data) out = [] for fv in find_vals: out.append(x[np.abs(x-fv).argmin()]) return out
[docs]def has_data(ds, var): """check if a variable in a :class:`xarray.Dataset` exists and contains non-NaN data""" if var in ds and not ds[var].isnull().all(): return True else: return False
[docs]def datetime64_to_str(x, date_format): """transform :class:`numpy.datetime64` to a datestring corresponding to 'date_format' Args: x: datetime as :class:`numpy.datetime64` object date_format: date format understood by :class:`datetime.datetime` """ t = pd.to_datetime(x) return t.strftime(date_format)
[docs]def datetime64_to_hour(x): """transform :class:`numpy.datetime64` to a float representing time of day in hours""" date_format = '%H:%M:%S.%f' hour_frac = np.array([1, 60, 3600]) dstr = datetime64_to_str(x, date_format) hms = np.array(list(map(float, dstr.split(':')))) return np.sum(hms / hour_frac)
[docs]def scalars_to_time(ds, variables, time_dim='time'): """expand scalar variables onto time dimension to form an array of len(time) containing identical values Args: ds: :class:`xarray.Dataset` containing all requested scalar variables and the time dimension to transform to variables: list of variables to expand onto the time dimension. These will be replaced in-place time_dim (optional): name of the time dimension. Defaults to 'time'. """ for var in variables: ds.update({var: (time_dim, ds[var].values * np.ones(ds[time_dim].shape))}) return ds
[docs]def vectors_to_time(ds, variables, time_dim='time'): """expand constant vector variables onto time dimension to form an array of len(time) containing identical values TODO: merge with scalars_to_time Args: ds: :class:`xarray.Dataset` containing all requested scalar variables and the time dimension to transform to variables: list of variables to expand onto the time dimension. These will be replaced in-place time_dim (optional): name of the time dimension. Defaults to 'time'. """ for var in variables: ds[var] = ds[var].expand_dims(time=ds[time_dim]) return ds
[docs]def lists_to_np(indict): """transform all values of a dict with type list to a :class:`numpy.ndarray`""" for key, val in indict.items(): if isinstance(val, list): indict[key] = np.array(val) return indict