Source code for climate_toolbox.utils.utils

"""
Handy functions for standardizing the format of climate data
"""

import xarray as xr
import numpy as np


[docs]def convert_kelvin_to_celsius(df, temp_name): """ Convert Kelvin to Celsius """ df_attrs = df[temp_name].attrs df[temp_name] = df[temp_name] - 273.15 # update attrs & unit information df[temp_name].attrs.update(df_attrs) df[temp_name].attrs['units'] = 'C' df[temp_name].attrs['valid_min'] = -108.78788 df[temp_name].attrs['valid_max'] = 62.02828 return df
[docs]def convert_lons_mono(ds, lon_name='longitude'): """ Convert longitude from -180-180 to 0-360 """ ds[lon_name].values = np.where( ds[lon_name].values < 0, 360 + ds[lon_name].values, ds[lon_name].values ) # sort the dataset by the new lon values ds = ds.sel(**{lon_name: np.sort(ds[lon_name].values)}) return ds
[docs]def convert_lons_split(ds, lon_name='longitude'): """ Convert longitude from 0-360 to -180-180 """ ds[lon_name].values = xr.where( ds[lon_name] > 180, ds[lon_name] - 360, ds[lon_name]) # sort the dataset by the new lon values ds = ds.sel(**{lon_name: np.sort(ds[lon_name].values)}) return ds
[docs]def rename_coords_to_lon_and_lat(ds): """ Rename Dataset spatial coord names to: lat, lon """ if 'latitude' in ds.coords: ds = ds.rename({'latitude': 'lat'}) if 'longitude' in ds.coords: ds = ds.rename({'longitude': 'lon'}) elif 'long' in ds.coords: ds = ds.rename({'long': 'lon'}) if 'z' in ds.coords: ds = ds.drop('z').squeeze() return ds
[docs]def rename_coords_to_longitude_and_latitude(ds): """ Rename Dataset spatial coord names to: latitude, longitude """ if 'lat' in ds.coords: ds = ds.rename({'lat': 'latitude'}) if 'lon' in ds.coords: ds = ds.rename({'lon': 'longitude'}) elif 'long' in ds.coords: ds = ds.rename({'long': 'longitude'}) if 'z' in ds.coords: ds = ds.drop('z').squeeze() return ds
[docs]def remove_leap_days(ds): ds = ds.loc[{ 'time': ~((ds['time.month'] == 2) & (ds['time.day'] == 29))}] return ds
[docs]def season_boundaries(growing_days): """ Returns the sorted start and end date of growing season """ # the longitude values of the data is off, we need to scale it growing_days.longitude.values = growing_days.longitude.values - 180 # we then sort by longitude growing_days = growing_days.sortby('longitude') # construct the ds gdd_sorted = xr.DataArray( # xarray has no method to sort along an axis # we use np.sort but construct the matrix from a xarray dataArray # we use transpose to track the axis we want to sort along np.sort( growing_days.variable.transpose( 'latitude', 'longitude', 'z').values, axis=2), dims=('latitude', 'longitude', 'sort'), coords={ 'latitude': growing_days.latitude, 'longitude': growing_days.longitude, 'sort': pd.Index(['min', 'max']) } ) # we can then select an axis in the sorted dataarray as min min_day, max_day = gdd_sorted.sel(sort='min'), gdd_sorted.sel(sort='max') return min_day, max_day
[docs]def get_daily_growing_season_mask(lat, lon, time, growing_days_path): """ Constructs a mask for days in the within calendar growing season Parameters ---------- lat: xr.DataArray coords object lon: xr.DataArray coords object time: xr.DataArray coords object growing_days_path: str Returns ------- DataArray xr.DataArray of masked lat x lon x time """ growing_days = xr.open_dataset(growing_days_path) # find the min and max for the growing season min_day, max_day = season_boundaries(growing_days) data = np.ones((lat.shape[0], lon.shape[0], time.shape[0])) # create an array of ones in the shape of the data ones = xr.DataArray( data, coords=[lat, lon, time], dims=['lat', 'lon', 'time']) # mask the array around the within calendar year start and end times # of growing season mask = ( (ones['time.dayofyear'] >= min_day) & (ones['time.dayofyear'] <= max_day)) # apply this mask and finalmask = ( mask.where( growing_days.variable.sel(z=2) >= growing_days.variable.sel(z=1)).fillna(1-mask).where( ~growing_days.variable.sel(z=1, drop=True).isnull() ).rename({'latitude': 'lat', 'longitude': 'lon'}) ) return finalmask