#!/usr/bin/env python
import sys
import os
import time
import datetime
import numpy as np
import numpy.ma as ma
import math
from io import StringIO

from sklearn import cluster
from sklearn.neighbors import kneighbors_graph
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score

print
print('                   *** WAVEWATCH III Wave system tracking ***  ')
print('               ===============================================')
print

NWPSdir = os.environ['NWPSdir']

# Parameters
monthstr = ['JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC']
wfo = sys.argv[1]
nclust = int(sys.argv[2])
plot_output = True

print('Reading ww3_systrk.inp file...')
fname = 'ww3_systrk.inp'
with open(fname) as ff:
   # Skip header matter
   for iloc in range(0, 10):
        line = ff.readline()

   # Get date
   line = ff.readline()
   yyyymmdd,HHMMSS,delta_t,ntimes = line.split()
   dt = int(delta_t)
   nt = int(ntimes)
   date_time = yyyymmdd+' '+HHMMSS
   pattern = '%Y%m%d %H%M%S'
   startdate = int(time.mktime(time.strptime(date_time, pattern)))
   enddate = startdate + dt*(nt-1)
   print('Start time:',datetime.datetime.fromtimestamp(startdate).strftime('%Y%m%d.%H%M%S'))
   print('End time:',datetime.datetime.fromtimestamp(enddate).strftime('%Y%m%d.%H%M%S'))
   print('Number of time levels being processed:',nt)

   # Skip comment lines
   for iloc in range(0, 4):
        line = ff.readline()

   # Read coordinates
   line = ff.readline()
   dum1,dum2,dum3 = line.split()
   x0=float(dum1)
   xn=float(dum2)
   mxc=int(dum3)
   line = ff.readline()
   dum1,dum2,dum3 = line.split()
   y0=float(dum1)
   yn=float(dum2)
   myc=int(dum3)

   print('LON0,LONN,MXC:',x0,xn,mxc)
   print('LAT0,LATN,MYC:',y0,yn,myc)

   dx = (xn-x0)/mxc
   dy = (yn-y0)/myc
   nlon = mxc+1
   nlat = myc+1
   print('DLON,DLAT,NLON,NLAT:',("%.4f" % dx),("%.4f" % dy),nlon,nlat)

   # Skip comment lines
   for iloc in range(0, 10):
        line = ff.readline()

   # Read output points (for Gerling-Hanson plots)
   print
   print('Output point locations:')
   nloc = 0
   pntlon = []
   pntlat = []   
   while (nloc < 50):
      line = ff.readline()
      dum1,dum2 = line.split()
      if (float(dum1)==0.000)&(float(dum2)==0.000):
         break
      nloc += 1
      pntlon = np.append(pntlon, float(dum1))
      pntlat = np.append(pntlat, float(dum2))
   print(nloc)
   print(pntlon)
   print(pntlat)

print
print('Reading partition.blk.raw file...')
rawdat = np.loadtxt('partition.blk.raw')
print('... finished')
#print(rawdat)

# Select parameters for clustering, omitting first (empty) record
# Contents of partition.blk.raw:
# 20181015.060000  49.420 233.000    1.12   11.85   282.22     9.98   0.00
wavedat = rawdat[1:-1,[4, 5]]
wavedat2 = rawdat[1:-1,[0, 1, 2, 3, 4, 5, 8, 9]]
# Change circular boundary of DIR from N to E for these West Coast WFOs
if ( (wfo == 'sgx') | (wfo == 'lox') | (wfo == 'mtr') | (wfo == 'eka') | \
     (wfo == 'mfr') | (wfo == 'pqr') | (wfo == 'sew') | (wfo == 'alu') ):
   wavedat[:,1] = [ x+360. if x<90. else x for x in wavedat[:,1]]
print('Data used in fit:')
print(wavedat)

#---- Clustering algorithms -----
print
print('Fitting data...')
wavedat = StandardScaler().fit_transform(wavedat)
silhouette_best = -1.
# Compute silhoutte coefficient for trial number of clusters
print('Trying nclust =',nclust)
k_means = cluster.KMeans(n_clusters=nclust, random_state=1)
k_means.fit(wavedat)
label=k_means.labels_.astype(float)

print('Calculating fit quality...')
if len(set(label[0:(nlon*nlat*5)])) > 1:
   try:  
     silhouette_avg = silhouette_score(wavedat[0:(nlon*nlat*5),:], label[0:(nlon*nlat*5)], 
                                     sample_size=min(25000,nlon*nlat*5), random_state=1)
   except:
      silhouette_avg=0.
      print('*** Warning: silhouette_avg could not be calculated')
else:
   silhouette_avg = 0.

print('Silhouette Coefficient:',silhouette_avg,'\n')
ofilenm = 'silhouette_coeff_k'+str(nclust)+'.txt'
text_file = open(ofilenm, "w")
text_file.write("%s" % str(silhouette_avg))
text_file.close()
#--------------------------------