Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Flickering Investigations

Notebook to explore the effect of some of these hyperparameters on the “flickering” effects.

Jimmy Butler, January 2025

%matplotlib inline

import numpy as np
import xarray as xr
from matplotlib import animation
from IPython.display import Video
from matplotlib.cm import prism
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import os

import matplotlib.path as mpath
import matplotlib.colors
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.util import add_cyclic_point

import matplotlib
matplotlib.use("Agg")

import h5py
import requests

import boto3
import s3fs
import earthaccess
from ipywidgets import IntProgress
from IPython.display import display

repo_dir = str(Path(os.getcwd()).parents[0])
os.chdir(repo_dir + '/scripts/')
from utils import display_catalog

Loading up the catalogs

repo_dir = str(Path(os.getcwd()).parents[0])
os.chdir(repo_dir + '/notebooks/')

catalog_eps12 = pd.read_pickle('storm_df_epstime_12.pkl')
catalog_eps18 = pd.read_hdf(repo_dir + '/data/ar_database/dataframes/2020_storm_df.h5')

Helper functions

First four are all relevant to adding columns of temporal information to the catalog, including duration, start dates, and end dates.

Last function is relevant to converting the catalog format from a DataFrame of xArray binary masks, to a DataFrame of where each row consists of lists of a storm’s latitude and longitude coordinates at a particular time. This is done because it is much easier to create an Antarctic-wide movie with data in this format.

def compute_duration(ar_da):
    days = (ar_da.time.max() - ar_da.time.min()).values.astype('timedelta64[h]').astype(int) + np.timedelta64(3, 'h')
    return days

def add_start_date(ar_da):
    start = ar_da.time.min().values
    return start

def add_end_date(ar_da):
    end = ar_da.time.max().values
    return end

def add_time_info(catalog):
    '''
    Helper function to add start dates, end dates, and duration of each storm in-place to the catalog,
    where the catalog is a pandas DataFrame with rows corresponding to binary xArray DataArray masks

    Inputs
        catalog (pandas DataFrame): contains at least a column of xArray DataArray masks for each storm

    Outputs
        Modifies input catalog in-place
    '''

    catalog['duration'] = catalog['data_array'].apply(compute_duration)
    catalog['start_date'] = catalog['data_array'].apply(add_start_date)
    catalog['end_date'] = catalog['data_array'].apply(add_end_date)

def to_stormtime_format(catalog):
    '''
    Helper function which takes in the default catalog format (a DataFrame whose rows contain xArray DataArray masks for each storm)
    and converts to a pandas DataFrame where rows consist of a storm at a particular time and columns contain point locations
    associated with these storms. This data format facilitates making animations of ARs.

    Inputs
        catalog (pandas DataFrame): contains columns of the Data_Array masks

    Outputs
        stormtime_df (pandas DataFrame): DataFrame where a row consists of a particular AR at a particular time, and columns give lists of associated coordinates
            as well as time
    '''
    # lists to collect storm labels, times, and corresponding storm lats and lons
    labels = []
    times = []
    lats = []
    lons = []

    # for every storm
    for index in catalog.index:

        # grab that storm's times and binary mask
        storm_da = catalog.loc[index].data_array
        storm_times, storm_grid_lats, storm_grid_lons = storm_da.coords.values()
        
        # for each time step of that storm
        for i in range(len(storm_times)):
            # get lats and lons associated with that storm at that time
            time_slice = storm_da.sel(time = storm_times[i])
            inds = np.argwhere(time_slice.to_numpy() == 1)
            storm_lats = storm_grid_lats[inds[:,0]].values
            storm_lons = storm_grid_lons[inds[:,1]].values
            # add the storm label, time, and lats and lons at that time to requisite lists
            labels.append(index)
            times.append(storm_times[i].values)
            lats.append(storm_lats)
            lons.append(storm_lons)

    stormtime_df = pd.DataFrame({'label':labels, 'time':times, 'lat':lats, 'lon':lons})

    return stormtime_df

Process the Data

add_time_info(catalog_eps12)
add_time_info(catalog_eps18)

start_date = pd.to_datetime('1/30/2020')
end_date = pd.to_datetime('2/10/1980')

catalog_subset_eps12 = catalog_eps12[(catalog_eps12.start_date >= start_date) & (catalog_eps12.end_date <= end_date)]
catalog_subset_eps18 = catalog_eps18[(catalog_eps18.start_date >= start_date) & (catalog_eps18.end_date <= end_date)]
stormtime_df_eps12 = to_stormtime_format(catalog_subset_eps12)
stormtime_df_eps18 = to_stormtime_format(catalog_subset_eps18)

Make the Animations

Helper Functions

def make_movie(stormtime_df, start_date, end_date, title):
    '''
    Function that constructs an animation of ARs in a stormtime version of the AR catalog.

    Inputs
        stormtime_df (pandas DataFrame): same as above
        title (string): the title to print on the animation

    Outputs
        ani (Matplotlib Animation): an animation object, as far as I know it must be saved first in order to view it
    '''

    # define the times of the movie, and mappings between AR labels and colors
    movie_times = pd.date_range(start=start_date, end=end_date + pd.Timedelta(1, 'd'), freq='3h', inclusive='left')
    unique_clusters = stormtime_df['label'].unique()
    color_mapping = {unique_clusters[j]:prism(j/len(unique_clusters)) for j in range(len(unique_clusters)) }

    # plot the jth frame of the movie (need as input to matplotlib animation constructor)
    def plt_time(j):
        time_pt = movie_times[j]

        if (time_pt == stormtime_df.time).any():
            dat = stormtime_df[stormtime_df['time'] == time_pt]
            n_clusts = dat.shape[0]

            for i in range(n_clusts):
                cluster = dat['label'].iloc[i]
                ax.scatter(dat['lon'].iloc[i], dat['lat'].iloc[i], transform=ccrs.PlateCarree(), s=1, color=color_mapping[cluster], label=str(cluster), zorder=30)
        
            ax.legend()

        ax.set_extent([-180,180,-90,-39], ccrs.PlateCarree())
        ice_shelf_poly = cfeature.NaturalEarthFeature('physical', 'antarctic_ice_shelves_polys', '50m',edgecolor='none',facecolor='lightcyan') # 10m, 50m, 110m
        ax.add_feature(ice_shelf_poly,linewidth=3)
        ice_shelf_line = cfeature.NaturalEarthFeature('physical', 'antarctic_ice_shelves_lines', '50m',edgecolor='black',facecolor='none') # 10m, 50m, 110m
        ax.add_feature(ice_shelf_line,linewidth=1,zorder=13)
        ax.coastlines(resolution='110m',linewidth=1,zorder=32)

    
        # Map extent 
        theta = np.linspace(0, 2*np.pi, 100)
        center, radius = [0.5, 0.5], 0.5
        verts = np.vstack([np.sin(theta), np.cos(theta)]).T
        circle = mpath.Path(verts * radius + center)
        ax.set_boundary(circle, transform=ax.transAxes)
        ax.gridlines(alpha=0.5, zorder=33)
    
        time_ts = pd.Timestamp(time_pt)
        ax.set_title(f'{time_ts.month}/{time_ts.day}/{time_ts.year}, {time_ts.hour}:00')

    # instantiate the animation
    fig, ax = plt.subplots(figsize=(5,5), subplot_kw=dict(projection=ccrs.Stereographic(central_longitude=0., central_latitude=-90.)))
    plt_time(0)
    fig.suptitle(title, fontsize=16)

    def update_img(i):
        ax.clear()
        plt_time(i)

    ani = animation.FuncAnimation(fig, update_img, frames=len(movie_times))

    return ani

def save_progress(i, n):
    '''
    Function that will be used in calls to animation.save(), just to give some progress updates as it can take a while to save!
    '''
    if i%20 == 0: {print(f'Saving frame {i}/{n}')}

ϵtime=12\epsilon_{time} = 12

ani = make_movie(stormtime_df_eps12, start_date, end_date, 'Eps Time 12 hours')
ani.save('2020_family_event_epstime12.mp4', progress_callback=save_progress)
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[64], line 1
----> 1 ani = make_movie(stormtime_df_eps12, start_date, end_date, 'Eps Time 12 hours')
      2 ani.save('2020_family_event_epstime12.mp4', progress_callback=save_progress)

Cell In[63], line 53, in make_movie(stormtime_df, start_date, end_date, title)
     51 # instantiate the animation
     52 fig, ax = plt.subplots(figsize=(5,5), subplot_kw=dict(projection=ccrs.Stereographic(central_longitude=0., central_latitude=-90.)))
---> 53 plt_time(0)
     54 fig.suptitle(title, fontsize=16)
     56 def update_img(i):

Cell In[63], line 20, in make_movie.<locals>.plt_time(j)
     19 def plt_time(j):
---> 20     time_pt = movie_times[j]
     22     if (time_pt == stormtime_df.time).any():
     23         dat = stormtime_df[stormtime_df['time'] == time_pt]

File ~/envs/antarctic_ars/lib/python3.12/site-packages/pandas/core/indexes/base.py:5389, in Index.__getitem__(self, key)
   5386 if is_integer(key) or is_float(key):
   5387     # GH#44051 exclude bool, which would return a 2d ndarray
   5388     key = com.cast_scalar_indexer(key)
-> 5389     return getitem(key)
   5391 if isinstance(key, slice):
   5392     # This case is separated from the conditional above to avoid
   5393     # pessimization com.is_bool_indexer and ndim checks.
   5394     return self._getitem_slice(key)

File ~/envs/antarctic_ars/lib/python3.12/site-packages/pandas/core/arrays/datetimelike.py:381, in DatetimeLikeArrayMixin.__getitem__(self, key)
    374 """
    375 This getitem defers to the underlying array, which by-definition can
    376 only handle list-likes, slices, and integer scalars
    377 """
    378 # Use cast as we know we will get back a DatetimeLikeArray or DTScalar,
    379 # but skip evaluating the Union at runtime for performance
    380 # (see https://github.com/pandas-dev/pandas/pull/44624)
--> 381 result = cast("Union[Self, DTScalarOrNaT]", super().__getitem__(key))
    382 if lib.is_scalar(result):
    383     return result

File ~/envs/antarctic_ars/lib/python3.12/site-packages/pandas/core/arrays/_mixins.py:284, in NDArrayBackedExtensionArray.__getitem__(self, key)
    278 def __getitem__(
    279     self,
    280     key: PositionalIndexer2D,
    281 ) -> Self | Any:
    282     if lib.is_integer(key):
    283         # fast-path
--> 284         result = self._ndarray[key]
    285         if self.ndim == 1:
    286             return self._box_func(result)

IndexError: index 0 is out of bounds for axis 0 with size 0
Video('1980_epstime12_0522.mp4')
Loading...
gesdisc_s3 = "https://data.gesdisc.earthdata.nasa.gov/s3credentials"

# Define a function for S3 access credentials

def begin_s3_direct_access(url: str=gesdisc_s3):
    response = requests.get(url).json()
    return s3fs.S3FileSystem(key=response['accessKeyId'],
                             secret=response['secretAccessKey'],
                             token=response['sessionToken'],
                             client_kwargs={'region_name':'us-west-2'})

fs = begin_s3_direct_access()

def grab_MERRA2_granules(first, last, data_doi):

    # stream the data only between those two dates
    results = earthaccess.search_data(doi=data_doi, 
                                  temporal=(f'{first.year}-{first.month}-{first.day}', 
                                            f'{last.year}-{last.month}-{last.day}'))

    granule_lst = []
    for result in results:
        link = result.data_links()[0].replace('https://data.gesdisc.earthdata.nasa.gov/data', 's3://gesdisc-cumulus-prod-protected')
        granule = fs.open(link)
        granule_lst.append(granule)

    return granule_lst
files = grab_MERRA2_granules(start_date, end_date, '10.5067/Q5GVUVUIVGO7')
ds_lst = []

for granule in files:
    ds = xr.open_dataset(granule)
    ds_lst.append(ds['VFLXQV'].sel(lat=slice(-86, -39)).sel(time = ds.time.dt.hour % 3 == 0))

obs_ds = -xr.concat(ds_lst, dim='time')
fig, ax = plt.subplots(figsize=(5,5), subplot_kw=dict(projection=ccrs.Stereographic(central_longitude=0., central_latitude=-90.)), dpi=100)
ax.set_extent([-180,179.4,-90,-50], ccrs.PlateCarree())
ax.coastlines(resolution='110m',linewidth=0.2,zorder=14)

minimum = obs_ds.min().to_numpy()
maximum = obs_ds.max().to_numpy()

ax.contourf(obs_ds.lon,obs_ds.lat, obs_ds[0,:,:],transform=ccrs.PlateCarree(), levels=10, vmin=minimum, vmax=maximum)
plt.title(str(obs_ds.time.dt.date.to_numpy()[0]) + ', ' + str(obs_ds.time.dt.time.to_numpy()[0]))

#fig.colorbar(CS)
fig.suptitle('IVT (kg/m$^2$)', fontsize=16)

# Map extent 
theta = np.linspace(0, 2*np.pi, 100)
center, radius = [0.5, 0.5], 0.5
verts = np.vstack([np.sin(theta), np.cos(theta)]).T
circle = mpath.Path(verts * radius + center)
ax.set_boundary(circle, transform=ax.transAxes)
ax.gridlines(alpha=0.5, zorder=33)

def update_img(i):
    ax.clear()
    ax.set_extent([-180,180,-90,-50], ccrs.PlateCarree())
    ax.coastlines(resolution='110m',linewidth=2,zorder=14)
    ice_shelf_line = cfeature.NaturalEarthFeature('physical', 'antarctic_ice_shelves_lines', '50m',edgecolor='black',facecolor='none') # 10m, 50m, 110m
    ax.add_feature(ice_shelf_line,linewidth=1,zorder=13)
    ax.contourf(obs_ds.lon,obs_ds.lat, obs_ds[i,:,:],transform=ccrs.PlateCarree(), vmin=minimum, vmax=maximum, levels=10)
    ax.set_title(str(obs_ds.time.dt.date.to_numpy()[i]) + ', ' + str(obs_ds.time.dt.time.to_numpy()[i]))
    # Map extent 
    theta = np.linspace(0, 2*np.pi, 100)
    center, radius = [0.5, 0.5], 0.5
    verts = np.vstack([np.sin(theta), np.cos(theta)]).T
    circle = mpath.Path(verts * radius + center)
    ax.set_boundary(circle, transform=ax.transAxes)


ani = animation.FuncAnimation(fig, update_img, frames=len(obs_ds.time))
ani.save('flickering_effect_ivt_0522.mp4', progress_callback=save_progress)
Saving frame 0/40
Saving frame 20/40
Video('~/extreme_antarctic_ARs/notebooks/flickering_effect_ivt_0209.mp4', embed=True)
Loading...

ϵtime=18\epsilon_{time} = 18

ani = make_movie(stormtime_df_eps18, 'Eps Time 18 hours')
ani.save('2020_family_event_epstime18.mp4', progress_callback=save_progress)
Saving frame 0/83
Saving frame 20/83
Saving frame 40/83
Saving frame 60/83
Saving frame 80/83
Video('2020_family_event_epstime18.mp4')
Loading...

Some flickering events from 1980:

  • 2/9

  • 5/22 - 5/24