#!/usr/bin/env python3
###############################################################################################
#
#   final_lmp_cv_fcst.py - formatting utility
# 
#   Purpose:
#     format lmp web data files with final forecast and relevant threshold
#
#   INPUT:
#     $1 = lmp u660 formatted file with cv prop forecasts (script keys on header names)
#     $2 = lmp u660 formatted file with cv thresholds (script keys on header names)
#
#   HISTORY:   DECEMBER 2017 SCHNAPP     MDL- CREATED
#              FEBRUARY 2021 HUANG       MDL- MODIFIED FOR ADDING ccg, cvs MELD PROBS AND THRESHOLDS
#
#   LIBRARY REQUIREMENTS:
#     Python3.6
#     pandas (works with 0.20.1)
#     numpy (works with 1.12.1)
#
###############################################################################################

import sys
import pandas as pd
import numpy as np


def final(f, t):

      # forecast elements and versions in order of priority
    fcsts = {'CIG#2':['CIGMD#2','CIGBP#2','CIGBS#2'],
             'CIG#3':['CIGMD#3','CIGBP#3','CIGBS#3'],
             'CIG#5':['CIGMD#5','CIGBP#5','CIGBS#5'],
             'CCG#2':['CCGMD#2','CCGBP#2','CCGBS#2'],
             'CCG#3':['CCGMD#3','CCGBP#3','CCGBS#3'],
             'CCG#5':['CCGMD#5','CCGBP#5','CCGBS#5'],
             'VIS#2':['VISMD#2','VISBP#2','VISBS#2'],
             'VIS#4':['VISMD#4','VISBP#4','VISBS#4'],
             'VIS#5':['VISMD#5','VISBP#5','VISBS#5'],
             'CVS#2':['CVSMD#2','CVSBP#2','CVSBS#2'],
             'CVS#4':['CVSMD#4','CVSBP#4','CVSBS#4'],
             'CVS#5':['CVSMD#5','CVSBP#5','CVSBS#5'],
             'PPO':['LMPPPOP','LMPPPOS'],
             'PRBFRZ':['PRBFRZP','PRBFRZS'],
             'PRBSNW':['PRBSNWP','PRBSNWS'],}

#   need to create explicit mapping as there is little consistency between prob and corresponding thresh labels
#                  FCST      THRESH
                  #CIG
    map_f_to_t = {'CIGMD#2':'CIGM#2', 
                  'CIGMD#3':'CIGM#3',
                  'CIGMD#5':'CIGM#5',
                  'CIGBP#2':'CIGP#2',
                  'CIGBP#3':'CIGP#3',
                  'CIGBP#5':'CIGP#5',
                  'CIGBS#2':'CIGS#2',
                  'CIGBS#3':'CIGS#3',
                  'CIGBS#5':'CIGS#5',
                  # CONDITIONAL CIG
                  'CCGMD#2':'CCGM#2',
                  'CCGBP#2':'CCGP#2',
                  'CCGBS#2':'CCGS#2',
                  'CCGMD#3':'CCGM#3',
                  'CCGBP#3':'CCGP#3',
                  'CCGBS#3':'CCGS#3',
                  'CCGMD#5':'CCGM#5',
                  'CCGBP#5':'CCGP#5',
                  'CCGBS#5':'CCGS#5',
                  # VIS
                  'VISMD#2':'VISM#2', 
                  'VISMD#4':'VISM#4',
                  'VISMD#5':'VISM#5',
                  'VISBP#2':'VISP#2', 
                  'VISBP#4':'VISP#4',
                  'VISBP#5':'VISP#5',
                  'VISBS#2':'VISS#2', 
                  'VISBS#4':'VISS#4',
                  'VISBS#5':'VISS#5',
                  # CONDITIONAL VIS
                  'CVSMD#2':'CVSM#2',
                  'CVSBP#2':'CVSP#2',
                  'CVSBS#2':'CVSS#2',
                  'CVSMD#4':'CVSM#4',
                  'CVSBP#4':'CVSP#4',
                  'CVSBS#4':'CVSS#4',
                  'CVSMD#5':'CVSM#5',
                  'CVSBP#5':'CVSP#5',
                  'CVSBS#5':'CVSS#5',
                  # PPO
                  'LMPPPOP':'PPOP#1',
                  'LMPPPOS':'PPOS#1',
                  # FREEZING
                  'PRBFRZP':'FRZPRI',
                  'PRBFRZS':'FRZSEC',
                  # SNOW
                  'PRBSNWP':'SNWPRI',
                  'PRBSNWS':'SNWSEC'}


    # lists of series for output dataframes
    forecasts = [f['VARIABLE']]
    thresholds = [t['VARIABLE']]

    # iteratively check the forecasts in reversed order of priority filling in higher priority forecasts where they exist
    for key in fcsts.keys():
        
        # let fs and ts be series which hold the forecasts and thresholds for a single forecast element and category
        # Initiate them as the lowest priority forecast and threshold
        fs = f[fcsts[key][-1]].astype('float')
        ts = t[map_f_to_t[fcsts[key][-1]]].astype('float')

        for ver in reversed(fcsts[key]):
            # Fill in fs and ts with higher priority data if it exists (simplicity of starting from what is initialized does not hit performance)
            fs = f[ver].where( f[ver].astype('float') < 999, fs ).astype('float')
            ts = t[map_f_to_t[ver]].where( f[ver].astype('float') < 999, ts ).astype('float')
        fs.name = key
        ts.name = key
        forecasts.append(fs)
        thresholds.append(ts)
    df_f = pd.concat(forecasts, axis=1)
    df_t = pd.concat(thresholds, axis=1)
    return df_f, df_t

    
def data_reader(file):

    ''' 
    IDENTIFY LOCATIONS OF HEADER SECTIONS
    HEADERS HAVE THE FOLLOWING FORMAT...

    "\n"
    " DATA FOR  YYYYMMDDHH  FOR VARIABLE GROUP  X"
    "\n"
    "  VARIABLE    all varable headers...."

    HEADERS ARE FOLLOWED BY DATA
    DATA SECTION TERMINATES WHEN NEW HEADER OR EOF FOLLOWS
    '''

    frames = []
    with open(file, 'r') as fcsts:
        more_data = True
        while more_data:
            header_found = False
            for ix,line in enumerate(fcsts):
                if 'DATA FOR' in line:  # hit header
                    next(fcsts) # skip blank line followed by every DATA FOR line
                    head_labels = next(fcsts).split() # record head labels
                    header_found = True
                    break
            if not header_found:
                more_data = False
                break
            data = []
            for line in fcsts:
                if line.strip():
                    data.append(line.split()[1:])
                else:
                    break
            df = pd.DataFrame(data, columns = head_labels)
            frames.append(df)
    return frames


def data_writer(old_file, new_frames):
    newfile = old_file + '_cv'
    with open(old_file,'r') as old, open(newfile, 'bw') as new:
        frame_index = 0
        for write_line in old:
            if 'CIG' in write_line:
                # write formatted header
                cols = len(new_frames[frame_index].columns)
                format = '%10s  '+'%9s'*(cols -2) 
                np.savetxt(new, [new_frames[frame_index].columns.values[1:]],fmt=format)
                # write formatted data
                format = '%5i %-7s'+'%8.3f '*(cols -2)
                np.savetxt(new, new_frames[frame_index], fmt=format)
                
                frame_index += 1
                skipping = True
                while skipping:
                    try:
                        line = next(old)
                    except StopIteration:
                        skipping = False
                    if not line.strip():
                        new.write(line.encode())
                        skipping = False
                continue
            else:
                new.write(write_line.encode())

def main():
                                        
    fcst_file = sys.argv[1]
    thresh_file = sys.argv[2]
    
    # read data
    fcst_frames = data_reader(fcst_file)
    thresh_frames = data_reader(thresh_file)

    final_f_frames = []
    final_t_frames = []

    # perform data manipulation/organization
    for f_frame, t_frame in zip(fcst_frames,thresh_frames):
        final_f_frame, final_t_frame = final(f_frame, t_frame)
        # add the index as a column (start at 1)
        final_f_frame.insert(0,'idx',final_f_frame.index + 1)
        final_t_frame.insert(0,'idx',final_f_frame.index + 1)
        # store all the dataframes in lists
        final_f_frames.append(final_f_frame)
        final_t_frames.append(final_t_frame)

    # write data
    data_writer(fcst_file, final_f_frames)
    data_writer(thresh_file, final_t_frames)


if __name__ == "__main__":
    main()