{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<h1>sum_emissions</h1>\n",
    "<h3>Notebook for printing emissions totals from HEMCO diagnostic netCDF files</h3>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We only need to import a few packages. The os.path package is used for filename manipulation, xarray for manipulationg netCDF data, and numpy for math operations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import xarray as xr\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Define the data file that you want to read.  We will sum emissions for each of the variables within this file.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# We will use the July 2016 data file as an example.\n",
    "# You can edit this accordingly to use any file that you have on hand.\n",
    "filename = \"HEMCO_sa.diagnostics.201607010000.nc\"\n",
    "\n",
    "# Year and month \n",
    "year  = 2016\n",
    "month = 7\n",
    "\n",
    "# Seconds in July 2016\n",
    "sec_in_month = 31.0 * 86400.0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We will read the entire contents of the netCDF file into an xarray Dataset object.  The very powerful xarray package allows you to easily read and manipulate Earth Science data that are stored in netCDF files."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<xarray.Dataset>\n",
       "Dimensions:      (lat: 46, lev: 47, lon: 72, time: 1)\n",
       "Coordinates:\n",
       "  * lon          (lon) float64 -180.0 -175.0 -170.0 -165.0 -160.0 -155.0 ...\n",
       "  * lat          (lat) float64 -89.0 -86.0 -82.0 -78.0 -74.0 -70.0 -66.0 ...\n",
       "  * lev          (lev) float64 0.9925 0.9775 0.9625 0.9475 0.9325 0.9175 ...\n",
       "  * time         (time) datetime64[ns] 2016-07-01\n",
       "Data variables:\n",
       "    hyam         (lev) float64 2.402 332.1 986.4 1.637e+03 2.285e+03 ...\n",
       "    hybm         (lev) float64 0.9925 0.9742 0.9526 0.9311 0.9096 0.8882 ...\n",
       "    P0           float64 1e+05\n",
       "    AREA         (lat, lon) float64 2.16e+09 2.16e+09 2.16e+09 2.16e+09 ...\n",
       "    POG2_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    POG1_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    OCPO_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    OCPI_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    BCPO_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    BCPI_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    HCOOH_TOTAL  (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    XYLE_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    TOLU_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    BENZ_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    C2H4_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    C2H2_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    GLYC_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    HAC_TOTAL    (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    C2H6_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    CH2O_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    PRPE_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    MVK_TOTAL    (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    ALD2_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    MEK_TOTAL    (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    ISOP_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    ALK4_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    NH3_TOTAL    (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    pFe_TOTAL    (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    SO4_TOTAL    (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    SO2_TOTAL    (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    NO2_TOTAL    (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    NO_TOTAL     (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    SOAP_TOTAL   (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "    CO_TOTAL     (time, lat, lon) float64 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...\n",
       "Attributes:\n",
       "    title:        output/HEMCO  sa.diagnostics\n",
       "    history:      Created by routine NC_CREATE (in ncdf_mod.F90)\n",
       "    format:       NetCDF-4\n",
       "    conventions:  COARDS\n",
       "    reference:    http://wiki.geos-chem.org/The_HEMCO_Users_Guide\n",
       "    contact:      GEOS-Chem Support Team (geos-chem-support@as.harvard.edu)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Load the netCDF file into an xarray dataset\n",
    "ds = xr.open_dataset(filename)\n",
    "\n",
    "# Show the contents of the xarray dataset object.\n",
    "# In Jupyter notebook, typing the name of a variable or object\n",
    "# on the last line of a cell prints the object.\n",
    "ds"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Extract the grid box surface area values from the HEMCO diagnostic file.  Surface area is stored in each HEMCO diagnostic file as a 2-dimensional (lon,lat) variable named \"AREA\"."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Original shape of AREA: (46, 72)\n",
      "New shape of AREA: (1, 46, 72)\n"
     ]
    }
   ],
   "source": [
    "# Extract the grid box surface area values f(in units of m^2) \n",
    "# from the xarray Dataset object into a numpy array.\n",
    "# This is done by using with the .values tag.\n",
    "area = ds['AREA'].values \n",
    "\n",
    "# Print the shape of the area array\n",
    "print( \"Original shape of AREA: {}\".format( area.shape ) )\n",
    "\n",
    "# Reshape area from (46,72) to (1,46,72) to be consistent with the \n",
    "# emissions data variables in the file.  If the array shapes are the same,\n",
    "# we can multiply each emissions data array directly by the area array.\n",
    "area = area[ np.newaxis, :, : ]\n",
    "\n",
    "# Print the new of the area array\n",
    "print( \"New shape of AREA: {}\".format( area.shape ) )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Construct the list of variables for which emissions totals will be printed."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['POG2_TOTAL',\n",
       " 'POG1_TOTAL',\n",
       " 'OCPO_TOTAL',\n",
       " 'OCPI_TOTAL',\n",
       " 'BCPO_TOTAL',\n",
       " 'BCPI_TOTAL',\n",
       " 'HCOOH_TOTAL',\n",
       " 'XYLE_TOTAL',\n",
       " 'TOLU_TOTAL',\n",
       " 'BENZ_TOTAL',\n",
       " 'C2H4_TOTAL',\n",
       " 'C2H2_TOTAL',\n",
       " 'GLYC_TOTAL',\n",
       " 'HAC_TOTAL',\n",
       " 'C2H6_TOTAL',\n",
       " 'CH2O_TOTAL',\n",
       " 'PRPE_TOTAL',\n",
       " 'MVK_TOTAL',\n",
       " 'ALD2_TOTAL',\n",
       " 'MEK_TOTAL',\n",
       " 'ISOP_TOTAL',\n",
       " 'ALK4_TOTAL',\n",
       " 'NH3_TOTAL',\n",
       " 'pFe_TOTAL',\n",
       " 'SO4_TOTAL',\n",
       " 'SO2_TOTAL',\n",
       " 'NO2_TOTAL',\n",
       " 'NO_TOTAL',\n",
       " 'SOAP_TOTAL',\n",
       " 'CO_TOTAL']"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# List of all variables in the xarray object.\n",
    "# This will include the index arrays (lon, lat, time) and also the AREA variable.\n",
    "varlist = ds.data_vars.keys()\n",
    "\n",
    "# Restrict the list of variables to only those containing emissions data.\n",
    "# (i.e. those that have at least 3 dimensions (e.g. time,lat.lon).\n",
    "# To manipulate a list, we have to use the \"v for v in varlist if ...\" syntax.\n",
    "varlist = [v for v in varlist if ds[v].ndim > 2  ]\n",
    "\n",
    "# Print the modified list.  This now only contains emisisons variables.\n",
    "varlist"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Loop over each emissions variable in the list and print the emission totals."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "POG2_TOTAL      sum =  5.239687e+06  kgC\n",
      "POG1_TOTAL      sum =  5.034209e+06  kgC\n",
      "OCPO_TOTAL      sum =  8.857803e+07  kgC\n",
      "OCPI_TOTAL      sum =  8.857803e+07  kgC\n",
      "BCPO_TOTAL      sum =  4.223591e+07  kgC\n",
      "BCPI_TOTAL      sum =  1.055898e+07  kgC\n",
      "HCOOH_TOTAL     sum =  2.068979e+07  kg\n",
      "XYLE_TOTAL      sum =  2.153014e+06  kgC\n",
      "TOLU_TOTAL      sum =  6.596417e+07  kgC\n",
      "BENZ_TOTAL      sum =  6.755955e+07  kgC\n",
      "C2H4_TOTAL      sum =  7.328033e+07  kgC\n",
      "C2H2_TOTAL      sum =  1.380832e+07  kgC\n",
      "GLYC_TOTAL      sum =  1.315163e+07  kg\n",
      "HAC_TOTAL       sum =  1.258072e+08  kg\n",
      "C2H6_TOTAL      sum =  7.362613e+07  kgC\n",
      "CH2O_TOTAL      sum =  6.660128e+07  kg\n",
      "PRPE_TOTAL      sum =  3.889013e+07  kgC\n",
      "MVK_TOTAL       sum =  5.276616e+06  kg\n",
      "ALD2_TOTAL      sum =  1.840918e+07  kgC\n",
      "MEK_TOTAL       sum =  8.520004e+06  kgC\n",
      "ISOP_TOTAL      sum =  3.381371e+06  kgC\n",
      "ALK4_TOTAL      sum =  1.940089e+08  kgC\n",
      "NH3_TOTAL       sum =  3.007598e+08  kg\n",
      "pFe_TOTAL       sum =  3.137024e+05  kg\n",
      "SO4_TOTAL       sum =  9.755584e+06  kg\n",
      "SO2_TOTAL       sum =  3.137023e+08  kg\n",
      "NO2_TOTAL       sum =  1.309008e+07  kg\n",
      "NO_TOTAL        sum =  5.933377e+07  kg\n",
      "SOAP_TOTAL      sum =  3.261302e+08  kg\n",
      "CO_TOTAL        sum =  4.726525e+09  kg\n"
     ]
    }
   ],
   "source": [
    "for name in varlist:\n",
    "    \n",
    "    # Get the units of each data variable.\n",
    "    # This will typically be in either kg/m2/s or kgC/m2/s, depending on the variable.\n",
    "    units = ds[name].units\n",
    "    \n",
    "    # We are going to print emissions totals in kg, so let's strip off just the\n",
    "    # kg or kgC part from the units name.  We do this the string \"split()\" method.\n",
    "    units = units.split( \"/\" )\n",
    "    \n",
    "    # units.split will return an array of substrings.  We just want the first element.\n",
    "    units = units[0]\n",
    "    \n",
    "    # Extract the data from the \"ds\" xarray object into a numpy array object.\n",
    "    emissions_array = ds[name].values\n",
    "    \n",
    "    # Convert the emissions data to kg/m2/s and take the sum\n",
    "    emissions_sum = np.sum( emissions_array * area * sec_in_month )\n",
    "    \n",
    "    # Print the emissions total. The  \" .6e\" specifier requests a number in \n",
    "    # scientific notation with 6 decimal places.  We leave a space for the - sign.\n",
    "    # The string method \".ljust(x)\" will left-justify a string in a column x spaces wide.\n",
    "    print( \"{n} sum = {es: .6e}  {u}\".format( n=name.ljust(15), es=emissions_sum, u=units ) )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}