Notebook to explore the effect of some of these hyperparameters on the “flickering” effects.
Jimmy Butler, January 2025
%matplotlib inline
import numpy as np
import xarray as xr
from matplotlib import animation
from IPython.display import Video
from matplotlib.cm import prism
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import os
import matplotlib.path as mpath
import matplotlib.colors
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.util import add_cyclic_point
import matplotlib
matplotlib.use("Agg")
import h5py
import requests
import boto3
import s3fs
import earthaccess
from ipywidgets import IntProgress
from IPython.display import display
repo_dir = str(Path(os.getcwd()).parents[0])
os.chdir(repo_dir + '/scripts/')
from utils import display_catalogLoading up the catalogs¶
repo_dir = str(Path(os.getcwd()).parents[0])
os.chdir(repo_dir + '/notebooks/')
catalog_eps12 = pd.read_pickle('storm_df_epstime_12.pkl')
catalog_eps18 = pd.read_hdf(repo_dir + '/data/ar_database/dataframes/2020_storm_df.h5')Helper functions¶
First four are all relevant to adding columns of temporal information to the catalog, including duration, start dates, and end dates.
Last function is relevant to converting the catalog format from a DataFrame of xArray binary masks, to a DataFrame of where each row consists of lists of a storm’s latitude and longitude coordinates at a particular time. This is done because it is much easier to create an Antarctic-wide movie with data in this format.
def compute_duration(ar_da):
days = (ar_da.time.max() - ar_da.time.min()).values.astype('timedelta64[h]').astype(int) + np.timedelta64(3, 'h')
return days
def add_start_date(ar_da):
start = ar_da.time.min().values
return start
def add_end_date(ar_da):
end = ar_da.time.max().values
return end
def add_time_info(catalog):
'''
Helper function to add start dates, end dates, and duration of each storm in-place to the catalog,
where the catalog is a pandas DataFrame with rows corresponding to binary xArray DataArray masks
Inputs
catalog (pandas DataFrame): contains at least a column of xArray DataArray masks for each storm
Outputs
Modifies input catalog in-place
'''
catalog['duration'] = catalog['data_array'].apply(compute_duration)
catalog['start_date'] = catalog['data_array'].apply(add_start_date)
catalog['end_date'] = catalog['data_array'].apply(add_end_date)
def to_stormtime_format(catalog):
'''
Helper function which takes in the default catalog format (a DataFrame whose rows contain xArray DataArray masks for each storm)
and converts to a pandas DataFrame where rows consist of a storm at a particular time and columns contain point locations
associated with these storms. This data format facilitates making animations of ARs.
Inputs
catalog (pandas DataFrame): contains columns of the Data_Array masks
Outputs
stormtime_df (pandas DataFrame): DataFrame where a row consists of a particular AR at a particular time, and columns give lists of associated coordinates
as well as time
'''
# lists to collect storm labels, times, and corresponding storm lats and lons
labels = []
times = []
lats = []
lons = []
# for every storm
for index in catalog.index:
# grab that storm's times and binary mask
storm_da = catalog.loc[index].data_array
storm_times, storm_grid_lats, storm_grid_lons = storm_da.coords.values()
# for each time step of that storm
for i in range(len(storm_times)):
# get lats and lons associated with that storm at that time
time_slice = storm_da.sel(time = storm_times[i])
inds = np.argwhere(time_slice.to_numpy() == 1)
storm_lats = storm_grid_lats[inds[:,0]].values
storm_lons = storm_grid_lons[inds[:,1]].values
# add the storm label, time, and lats and lons at that time to requisite lists
labels.append(index)
times.append(storm_times[i].values)
lats.append(storm_lats)
lons.append(storm_lons)
stormtime_df = pd.DataFrame({'label':labels, 'time':times, 'lat':lats, 'lon':lons})
return stormtime_dfProcess the Data¶
add_time_info(catalog_eps12)
add_time_info(catalog_eps18)
start_date = pd.to_datetime('1/30/2020')
end_date = pd.to_datetime('2/10/1980')
catalog_subset_eps12 = catalog_eps12[(catalog_eps12.start_date >= start_date) & (catalog_eps12.end_date <= end_date)]
catalog_subset_eps18 = catalog_eps18[(catalog_eps18.start_date >= start_date) & (catalog_eps18.end_date <= end_date)]
stormtime_df_eps12 = to_stormtime_format(catalog_subset_eps12)
stormtime_df_eps18 = to_stormtime_format(catalog_subset_eps18)Make the Animations¶
Helper Functions¶
def make_movie(stormtime_df, start_date, end_date, title):
'''
Function that constructs an animation of ARs in a stormtime version of the AR catalog.
Inputs
stormtime_df (pandas DataFrame): same as above
title (string): the title to print on the animation
Outputs
ani (Matplotlib Animation): an animation object, as far as I know it must be saved first in order to view it
'''
# define the times of the movie, and mappings between AR labels and colors
movie_times = pd.date_range(start=start_date, end=end_date + pd.Timedelta(1, 'd'), freq='3h', inclusive='left')
unique_clusters = stormtime_df['label'].unique()
color_mapping = {unique_clusters[j]:prism(j/len(unique_clusters)) for j in range(len(unique_clusters)) }
# plot the jth frame of the movie (need as input to matplotlib animation constructor)
def plt_time(j):
time_pt = movie_times[j]
if (time_pt == stormtime_df.time).any():
dat = stormtime_df[stormtime_df['time'] == time_pt]
n_clusts = dat.shape[0]
for i in range(n_clusts):
cluster = dat['label'].iloc[i]
ax.scatter(dat['lon'].iloc[i], dat['lat'].iloc[i], transform=ccrs.PlateCarree(), s=1, color=color_mapping[cluster], label=str(cluster), zorder=30)
ax.legend()
ax.set_extent([-180,180,-90,-39], ccrs.PlateCarree())
ice_shelf_poly = cfeature.NaturalEarthFeature('physical', 'antarctic_ice_shelves_polys', '50m',edgecolor='none',facecolor='lightcyan') # 10m, 50m, 110m
ax.add_feature(ice_shelf_poly,linewidth=3)
ice_shelf_line = cfeature.NaturalEarthFeature('physical', 'antarctic_ice_shelves_lines', '50m',edgecolor='black',facecolor='none') # 10m, 50m, 110m
ax.add_feature(ice_shelf_line,linewidth=1,zorder=13)
ax.coastlines(resolution='110m',linewidth=1,zorder=32)
# Map extent
theta = np.linspace(0, 2*np.pi, 100)
center, radius = [0.5, 0.5], 0.5
verts = np.vstack([np.sin(theta), np.cos(theta)]).T
circle = mpath.Path(verts * radius + center)
ax.set_boundary(circle, transform=ax.transAxes)
ax.gridlines(alpha=0.5, zorder=33)
time_ts = pd.Timestamp(time_pt)
ax.set_title(f'{time_ts.month}/{time_ts.day}/{time_ts.year}, {time_ts.hour}:00')
# instantiate the animation
fig, ax = plt.subplots(figsize=(5,5), subplot_kw=dict(projection=ccrs.Stereographic(central_longitude=0., central_latitude=-90.)))
plt_time(0)
fig.suptitle(title, fontsize=16)
def update_img(i):
ax.clear()
plt_time(i)
ani = animation.FuncAnimation(fig, update_img, frames=len(movie_times))
return ani
def save_progress(i, n):
'''
Function that will be used in calls to animation.save(), just to give some progress updates as it can take a while to save!
'''
if i%20 == 0: {print(f'Saving frame {i}/{n}')}ani = make_movie(stormtime_df_eps12, start_date, end_date, 'Eps Time 12 hours')
ani.save('2020_family_event_epstime12.mp4', progress_callback=save_progress)---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
Cell In[64], line 1
----> 1 ani = make_movie(stormtime_df_eps12, start_date, end_date, 'Eps Time 12 hours')
2 ani.save('2020_family_event_epstime12.mp4', progress_callback=save_progress)
Cell In[63], line 53, in make_movie(stormtime_df, start_date, end_date, title)
51 # instantiate the animation
52 fig, ax = plt.subplots(figsize=(5,5), subplot_kw=dict(projection=ccrs.Stereographic(central_longitude=0., central_latitude=-90.)))
---> 53 plt_time(0)
54 fig.suptitle(title, fontsize=16)
56 def update_img(i):
Cell In[63], line 20, in make_movie.<locals>.plt_time(j)
19 def plt_time(j):
---> 20 time_pt = movie_times[j]
22 if (time_pt == stormtime_df.time).any():
23 dat = stormtime_df[stormtime_df['time'] == time_pt]
File ~/envs/antarctic_ars/lib/python3.12/site-packages/pandas/core/indexes/base.py:5389, in Index.__getitem__(self, key)
5386 if is_integer(key) or is_float(key):
5387 # GH#44051 exclude bool, which would return a 2d ndarray
5388 key = com.cast_scalar_indexer(key)
-> 5389 return getitem(key)
5391 if isinstance(key, slice):
5392 # This case is separated from the conditional above to avoid
5393 # pessimization com.is_bool_indexer and ndim checks.
5394 return self._getitem_slice(key)
File ~/envs/antarctic_ars/lib/python3.12/site-packages/pandas/core/arrays/datetimelike.py:381, in DatetimeLikeArrayMixin.__getitem__(self, key)
374 """
375 This getitem defers to the underlying array, which by-definition can
376 only handle list-likes, slices, and integer scalars
377 """
378 # Use cast as we know we will get back a DatetimeLikeArray or DTScalar,
379 # but skip evaluating the Union at runtime for performance
380 # (see https://github.com/pandas-dev/pandas/pull/44624)
--> 381 result = cast("Union[Self, DTScalarOrNaT]", super().__getitem__(key))
382 if lib.is_scalar(result):
383 return result
File ~/envs/antarctic_ars/lib/python3.12/site-packages/pandas/core/arrays/_mixins.py:284, in NDArrayBackedExtensionArray.__getitem__(self, key)
278 def __getitem__(
279 self,
280 key: PositionalIndexer2D,
281 ) -> Self | Any:
282 if lib.is_integer(key):
283 # fast-path
--> 284 result = self._ndarray[key]
285 if self.ndim == 1:
286 return self._box_func(result)
IndexError: index 0 is out of bounds for axis 0 with size 0Video('1980_epstime12_0522.mp4')Loading...
gesdisc_s3 = "https://data.gesdisc.earthdata.nasa.gov/s3credentials"
# Define a function for S3 access credentials
def begin_s3_direct_access(url: str=gesdisc_s3):
response = requests.get(url).json()
return s3fs.S3FileSystem(key=response['accessKeyId'],
secret=response['secretAccessKey'],
token=response['sessionToken'],
client_kwargs={'region_name':'us-west-2'})
fs = begin_s3_direct_access()
def grab_MERRA2_granules(first, last, data_doi):
# stream the data only between those two dates
results = earthaccess.search_data(doi=data_doi,
temporal=(f'{first.year}-{first.month}-{first.day}',
f'{last.year}-{last.month}-{last.day}'))
granule_lst = []
for result in results:
link = result.data_links()[0].replace('https://data.gesdisc.earthdata.nasa.gov/data', 's3://gesdisc-cumulus-prod-protected')
granule = fs.open(link)
granule_lst.append(granule)
return granule_lstfiles = grab_MERRA2_granules(start_date, end_date, '10.5067/Q5GVUVUIVGO7')ds_lst = []
for granule in files:
ds = xr.open_dataset(granule)
ds_lst.append(ds['VFLXQV'].sel(lat=slice(-86, -39)).sel(time = ds.time.dt.hour % 3 == 0))
obs_ds = -xr.concat(ds_lst, dim='time')
fig, ax = plt.subplots(figsize=(5,5), subplot_kw=dict(projection=ccrs.Stereographic(central_longitude=0., central_latitude=-90.)), dpi=100)
ax.set_extent([-180,179.4,-90,-50], ccrs.PlateCarree())
ax.coastlines(resolution='110m',linewidth=0.2,zorder=14)
minimum = obs_ds.min().to_numpy()
maximum = obs_ds.max().to_numpy()
ax.contourf(obs_ds.lon,obs_ds.lat, obs_ds[0,:,:],transform=ccrs.PlateCarree(), levels=10, vmin=minimum, vmax=maximum)
plt.title(str(obs_ds.time.dt.date.to_numpy()[0]) + ', ' + str(obs_ds.time.dt.time.to_numpy()[0]))
#fig.colorbar(CS)
fig.suptitle('IVT (kg/m$^2$)', fontsize=16)
# Map extent
theta = np.linspace(0, 2*np.pi, 100)
center, radius = [0.5, 0.5], 0.5
verts = np.vstack([np.sin(theta), np.cos(theta)]).T
circle = mpath.Path(verts * radius + center)
ax.set_boundary(circle, transform=ax.transAxes)
ax.gridlines(alpha=0.5, zorder=33)
def update_img(i):
ax.clear()
ax.set_extent([-180,180,-90,-50], ccrs.PlateCarree())
ax.coastlines(resolution='110m',linewidth=2,zorder=14)
ice_shelf_line = cfeature.NaturalEarthFeature('physical', 'antarctic_ice_shelves_lines', '50m',edgecolor='black',facecolor='none') # 10m, 50m, 110m
ax.add_feature(ice_shelf_line,linewidth=1,zorder=13)
ax.contourf(obs_ds.lon,obs_ds.lat, obs_ds[i,:,:],transform=ccrs.PlateCarree(), vmin=minimum, vmax=maximum, levels=10)
ax.set_title(str(obs_ds.time.dt.date.to_numpy()[i]) + ', ' + str(obs_ds.time.dt.time.to_numpy()[i]))
# Map extent
theta = np.linspace(0, 2*np.pi, 100)
center, radius = [0.5, 0.5], 0.5
verts = np.vstack([np.sin(theta), np.cos(theta)]).T
circle = mpath.Path(verts * radius + center)
ax.set_boundary(circle, transform=ax.transAxes)
ani = animation.FuncAnimation(fig, update_img, frames=len(obs_ds.time))ani.save('flickering_effect_ivt_0522.mp4', progress_callback=save_progress)Saving frame 0/40
Saving frame 20/40
Video('~/extreme_antarctic_ARs/notebooks/flickering_effect_ivt_0209.mp4', embed=True)Loading...
ani = make_movie(stormtime_df_eps18, 'Eps Time 18 hours')
ani.save('2020_family_event_epstime18.mp4', progress_callback=save_progress)Saving frame 0/83
Saving frame 20/83
Saving frame 40/83
Saving frame 60/83
Saving frame 80/83
Video('2020_family_event_epstime18.mp4')Loading...
Some flickering events from 1980:
2/9
5/22 - 5/24