Example of gridded forecasts postprocessing

In this example, using Pythie, we postprocess the 2 metre temperature forecasts at a station. We postprocess it with the 2 metre temperature itself, the maximum 2 metre temperature in the last 6 hours and the soil temperature as predictors.

We use the ERA5 reanalysis over a large area in Europe from 1997 to 2016 as gridded observations. These reanalysis have been downloaded from the Copernicus Data Store and converted to the NetCDF file format.

The postprocessing is done by making a regression, at each grid point and lead time, between the gridded reforecasts and the gridded reanalysis (considered as observation). For verification, the result of this regression is then applied on the reforecasts themselves (the training set).

The reforecasts files have been download from ECMWF and converted to NetCDF files.

Note: In the following example, we drop the initial conditions of the reforecasts because one of the maximum 2 meter temperature is not defined at this lead time ! As a result, we do not postprocess the lead time 0.

Gridded reforecast data source

Source: www.ecmwf.int

Creative Commons Attribution 4.0 International (CC BY 4.0) Copyright © 2021 European Centre for Medium-Range Weather Forecasts (ECMWF). See the attached ECMWF_LICENSE.txt file included with the data for more details.

Copernicus ERA5 gridded reanalysis data source

Source: https://cds.climate.copernicus.eu/

Copyright © 2021 European Union. Generated using Copernicus Climate Change Service information 2021.

Hersbach et al. (2018): ERA5 hourly data on single levels from 1979 to present. Copernicus Climate Change Service (C3S) Climate Data Store (CDS). (Accessed on < 21-04-2021 >), doi:10.24381/cds.adbb2d47.

See the attached COPERNICUS_LICENSE.txt file included with the data for more details.

Preliminaries

Setting the path

import sys, os
sys.path.extend([os.path.abspath('../../../../')])
os.chdir('../../../../')

Importing external modules

import iris
import iris.pandas as ipd
import iris.plot as iplt
import iris.quickplot as qplt
import cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import datetime
import cftime
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib import cm
import numpy as np
from IPython.display import HTML
from matplotlib import rc
rc('font',**{'family':'serif','sans-serif':['Times'],'size':16})
%matplotlib inline

Importing internal modules

from core.data import Data
import postprocessors.MBM as MBM

Setting some parameters

# Date of the forecast
date = "01-02"
# Year to correct
year = 2017
# Number of reforecasts
years_back = 20
# Parameter of the analysis (to be postprocessed)
param = '2t'
# Parameters of the predictors
params = ['2t', 'mx2t6', 'stl1']
# Time units used in the NetCDF input files
time_units = 'hours since 1900-01-01 00:00:00.0'
# Locaction of the data
data_folder = './data/europa_grid/'

Defining some functions to extract the time coordinates from Iris cubes

def get_cubes_time_index(cubes):
    # assumes all the cube in cubes have the same timestamps
    a = ipd.as_data_frame(cubes[0][:, 0, 0])
    return a.index


def get_cubes_time(cubes, units=None, year=None):
    time_index = get_cubes_time_index(cubes)
    if year is not None:
        convert_time = lambda t: datetime.datetime(year, t.month, t.day, t.hour)
    else:
        convert_time = lambda t: datetime.datetime(t.year, t.month, t.day, t.hour)
    time = np.array(list(map(convert_time, time_index.values)), dtype=object)
    if units is not None:
        cf_time = cftime.date2num(time, units=units)
        return time, cf_time
    else:
        return time, None

Defining some plotting functions

def plot_cube(cube, t, v, ax=None, quick=True):
    if ax is None:
        map_proj = ccrs.PlateCarree()
        plt.figure(figsize=(15, 10))
        ax = plt.axes(projection=map_proj)
    extent = [-20, 20, 30, 70]
    ax.gridlines()
    ax.coastlines(resolution='50m')
    ax.add_feature(cartopy.feature.BORDERS)
    if quick:
        qplt.contourf(cube[t, :, :], v, axes=ax, cmap=cm.get_cmap('bwr'))
    else:
        iplt.contourf(cube[t, :, :], v, axes=ax, cmap=cm.get_cmap('bwr'))
    ax.set_extent(extent)
    return ax


def plot_quant(quant, labels, timestamps, grid_point):
    fig = plt.figure(figsize=(15, 10))
    ax = fig.gca()

    first = True
    for c, lab in zip(quant, labels):
        if first:
            c.plot(ax=ax, global_label=lab, timestamps=timestamps, lw=3., ls="--", grid_point=grid_point)
            first = False
        else:
            c.plot(ax=ax, global_label=lab, timestamps=timestamps, grid_point=grid_point)

    ax.legend()
    return ax

Loading and creating the Data objects

This section shows how to load Data objects from NetCDF files using Iris

First we load the reforecasts

# For each year in the past:
for y in range(year-years_back, year):
    # We load the reforecasts with Iris
    reforecast_cubes = iris.load(data_folder+'reforecasts_'+'-'.join(params)+'_'+str(y)+'-'+date+'.nc')
    # If it is the first year, then:
    if y == year-years_back:
        # We create one list per predictors:
        reforecasts_list = list()
        for i in range(len(reforecast_cubes)):
            reforecasts_list.append(list())
        # And a list for the timestamps:
        times = list()
    # We extract the time:
    time, t = get_cubes_time(reforecast_cubes, time_units)
    times.append(time[1:, ...])
    # We create a Numpy array for each variable from the Iris cube
    for i, cube in enumerate(reforecast_cubes):
        # (we have to swap the time and ensemble member axis and add 2 dummy axis at the beginning
        # to fit the Data object specs)
        data = np.array(cube.data[1:, ...]).swapaxes(0, 1)[np.newaxis, np.newaxis, ...]
        # (and also add the dummy 'variable' axis)
        data = np.expand_dims(data, 3)
        reforecasts_list[i].append(data)

# Now that the data have been loaded into convenient Numpy arrays, we can create the Data object:
# First create one Data object per predictor
reforecasts_data_list = list()
for reforecast in reforecasts_list:
    reforecasts_data_list.append(Data(np.concatenate(reforecast, axis=1), timestamps=times))
# saving the first predictor (the variable itself) for latter
reforecast_data_1st_predictor = reforecasts_data_list[0].copy()
# (Deleting data not needed anymore to save RAM space)
del reforecast_cubes, reforecasts_list
# Then loading all the predictors into one single Data object
reforecasts_data = reforecasts_data_list[0].copy()
for reforecast in reforecasts_data_list[1:]:
    reforecasts_data.append_predictors(reforecast)
# (Deleting data not needed anymore to save RAM space)
del reforecasts_data_list

Then we load the analysis

# Same as previously but fo only one variable
analysis = list()
times = list()
for y in range(year-years_back, year):
    analysis_cube = iris.load(data_folder+'analysis_'+param+'_'+str(y)+'-'+date+'.nc')
    time, t = get_cubes_time(analysis_cube, time_units)
    times.append(time[1:, ...])
    data = np.array(analysis_cube[0].data[1:, ...])[np.newaxis, np.newaxis, np.newaxis, np.newaxis, ...]
    analysis.append(data)

analysis_data = Data(np.concatenate(analysis, axis=1), timestamps=times)

del analysis

Training the PostProcessors

In this section, we train the various different postprocessors of the Member-By-Member MBM module with the data previously loaded

# List to hold the trained PostProcessors
postprocessors = list()
proc_labels = list()

Simple bias correction

ebc = MBM.BiasCorrection()
ebc.train(analysis_data, reforecasts_data)
postprocessors.append(ebc)
proc_labels.append('Bias correction')

Ensemble Mean correction

emc = MBM.EnsembleMeanCorrection()
emc.train(analysis_data, reforecasts_data)
postprocessors.append(emc)
proc_labels.append('Ensemble Mean correction')

Ensemble Spread Scaling correction

essc = MBM.EnsembleSpreadScalingCorrection()
essc.train(analysis_data, reforecasts_data)
postprocessors.append(essc)
proc_labels.append('Ensemble Spread Scaling correction')

Ensemble Spread Scaling correction with Absolute norm CRPS minimization

essacc = MBM.EnsembleSpreadScalingAbsCRPSCorrection()
essacc.train(analysis_data, reforecasts_data, ntrial=1)
postprocessors.append(essacc)
proc_labels.append('Ensemble Spread Scaling Abs. CRPS min. correction')

Ensemble Spread Scaling correction + Climatology nudging with Absolute norm CRPS minimization

eacc = MBM.EnsembleAbsCRPSCorrection()
eacc.train(analysis_data, reforecasts_data, ntrial=1)
postprocessors.append(eacc)
proc_labels.append('Ensemble Spread Scaling + Clim. Nudging Abs. CRPS min. correction')

Ensemble Spread Scaling correction + Climatology nudging with Ngr CRPS minimization

encc = MBM.EnsembleNgrCRPSCorrection()
encc.train(analysis_data, reforecasts_data, ntrial=1)
postprocessors.append(encc)
proc_labels.append('Ensemble Spread Scaling + Clim. Nudging Ngr CRPS min. correction')

Verification

Here we are going to postprocess the reforecasts themselves to see how well they perform:

# List to store the experiment names
exp_results = list()
exp_results.append(reforecast_data_1st_predictor)
exp_labels = list()
exp_labels.append('Raw forecasts')
timestamps = np.array(range(reforecast_data_1st_predictor.number_of_time_steps))
for label, postprocessor in zip(proc_labels, postprocessors):
    exp_results.append(postprocessor(reforecasts_data))
    exp_labels.append(label)

Computing scores

Computing the bias

# List to store the CRPS Data object
bias = list()
for label, result in zip(exp_labels, exp_results):
    bias.append(result.bias(analysis_data))

Computing the ensemble mean RMSE

# List to store the CRPS Data object
rmse = list()
for label, result in zip(exp_labels, exp_results):
    rmse.append(result.ensemble_mean_RMSE(analysis_data))

Computing the CRPS

# List to store the CRPS Data object
crps = list()
for label, result in zip(exp_labels, exp_results):
    crps.append(result.CRPS(analysis_data))

Plotting the scores

Plots at a grid point

grid_point=(20,70)
ax = plot_quant(crps, exp_labels, timestamps, grid_point=grid_point)
ax.set_title('CRPS Score at grid point '+str(grid_point))
ax.set_ylabel('CRPS [C°]')
ax.set_xlabel('time x 6hrs');
../../_images/2mT_fields_postprocessing_57_0.png
ax = plot_quant(rmse, exp_labels, timestamps, grid_point=grid_point)
ax.set_title('RMSE Score at grid point '+str(grid_point))
ax.set_ylabel('RMSE [C°]')
ax.set_xlabel('time x 6hrs');
../../_images/2mT_fields_postprocessing_58_0.png
ax = plot_quant(bias, exp_labels, timestamps, grid_point=grid_point)
ax.set_title('Bias Score at grid point '+str(grid_point))
ax.set_ylabel('Bias [C°]')
ax.set_xlabel('time x 6hrs');
../../_images/2mT_fields_postprocessing_59_0.png

Plotting scores as fields

Creating Iris CubeLists to store the scores:

crps_cubes = iris.cube.CubeList()
bias_cubes = iris.cube.CubeList()
rmse_cubes = iris.cube.CubeList()

Creating the bias cubes

for score in bias:
    # Creating a cube by recycling the analysis one
    bias_cube = analysis_cube[0].copy()
    # And replacing the data inside by the score
    bias_cube.data[1:, :, :] = np.squeeze(score.data)
    bias_cubes.append(bias_cube[1:])
for score in rmse:
    # Creating a cube by recycling the analysis one
    rmse_cube = analysis_cube[0].copy()
    # And replacing the data inside by the score
    rmse_cube.data[1:, :, :] = np.squeeze(score.data)
    rmse_cubes.append(rmse_cube[1:])
for score in crps:
    # Creating a cube by recycling the analysis one
    crps_cube = analysis_cube[0].copy()
    # And replacing the data inside by the score
    crps_cube.data[1:, :, :] = np.squeeze(score.data)
    crps_cubes.append(crps_cube[1:])
# Countour plot levels in Celsius
t2m_range = np.arange(-4.,4.,0.25)

Plot of the CRPS improvement

frames = crps_cubes[0].shape[0]
map_proj = ccrs.PlateCarree()
ns = len(rmse_cubes)-1
nr = int(np.ceil(ns / 2))
axls = list()
fig=plt.figure(figsize=(15, 10 * nr))
t = 0
for i in range(ns):
    plt.subplot(nr, 2, i+1, projection=map_proj)
    ax = plt.gca()
    axls.append(ax)
    plot_cube(crps_cubes[i+1]-crps_cubes[0], t, t2m_range, ax=ax)
    ax.set_title(exp_labels[i+1]+' method \nCRPS change w.r.t. the raw forecasts at time '+str((t+1)*6)+' hrs\n Blue is better, red is worse')

def update(t):
    imlist = list()
    for i in range(ns):
        ax = axls[i]
        ax.clear()
        plot_cube(crps_cubes[i+1]-crps_cubes[0], t, t2m_range, ax=ax, quick=False)
        ax.set_title(exp_labels[i+1]+' method \nCRPS change w.r.t. the raw forecasts at time '+str((t+1)*6)+' hrs\n Blue is better, red is worse')
        imlist.append(ax._gci())
    return imlist

crps_anim = animation.FuncAnimation(fig, update, frames=frames, interval=250, blit=False)
../../_images/2mT_fields_postprocessing_69_0.png

Making a video to show the CRPS evolution over the lead time

HTML(crps_anim.to_html5_video())

Plot of the RMSE improvement

frames = bias_cubes[0].shape[0]
map_proj = ccrs.PlateCarree()
ns = len(rmse_cubes)-1
nr = int(np.ceil(ns / 2))
axls = list()
fig=plt.figure(figsize=(15, 10 * nr))
t = 0
for i in range(ns):
    plt.subplot(nr, 2, i+1, projection=map_proj)
    ax = plt.gca()
    axls.append(ax)
    plot_cube(rmse_cubes[i+1]-rmse_cubes[0], t, t2m_range, ax=ax)
    ax.set_title(exp_labels[i+1]+' method \nRMSE change w.r.t. the raw forecasts at time '+str((t+1)*6)+' hrs\n Blue is better, red is worse')

def update(t):
    imlist = list()
    for i in range(ns):
        ax = axls[i]
        ax.clear()
        plot_cube(rmse_cubes[i+1]-rmse_cubes[0], t, t2m_range, ax=ax, quick=False)
        ax.set_title(exp_labels[i+1]+' method \nRMSE change w.r.t. the raw forecasts at time '+str((t+1)*6)+' hrs\n Blue is better, red is worse')
        imlist.append(ax._gci())
    return imlist

rmse_anim = animation.FuncAnimation(fig, update, frames=frames, interval=250, blit=False)
../../_images/2mT_fields_postprocessing_73_0.png

Making a video to show the RMSE evolution over the lead time

HTML(rmse_anim.to_html5_video())

Plot of the bias

frames = bias_cubes[0].shape[0]
map_proj = ccrs.PlateCarree()
ns = len(bias_cubes)
nr = int(np.ceil(ns / 2))
axls = list()
fig=plt.figure(figsize=(15, 10 * nr))
t = 0
for i in range(ns):
    plt.subplot(nr, 2, i+1, projection=map_proj)
    ax = plt.gca()
    axls.append(ax)
    plot_cube(bias_cubes[i], t, t2m_range, ax=ax)
    if i >= 1:
        ax.set_title(exp_labels[i]+' method \nBias score at time '+str((t+1)*6)+' hrs')
    else:
        ax.set_title(exp_labels[i]+'\nBias score at time '+str((t+1)*6)+' hrs')

def update(t):
    imlist = list()
    for i in range(ns):
        ax = axls[i]
        ax.clear()
        plot_cube(bias_cubes[i], t, t2m_range, ax=ax, quick=False)
        if i >= 1:
            ax.set_title(exp_labels[i]+' method \nBias score at time '+str((t+1)*6)+' hrs')
        else:
            ax.set_title(exp_labels[i]+'\nBias score at time '+str((t+1)*6)+' hrs')
        imlist.append(ax._gci())
    return imlist

bias_anim = animation.FuncAnimation(fig, update, frames=frames, interval=250, blit=False)
../../_images/2mT_fields_postprocessing_77_0.png

Making a video to show the bias evolution over the lead time

HTML(bias_anim.to_html5_video())