#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
incstack.py

Purpose:
    Contain routines for stackloss estimation

Version:
    1       using lib for routines

Date:
    2005/2/16, 2017/7/24, 2017/8/17, 2019/6/4

Author:
    Charles Bos
"""
###########################################################
### Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.optimize as opt

###########################################################
### Get hessian and related functions
from lib.grad import *

###########################################################
### (vY, mX)= ReadStack(sData, sY, asX, bConst):
def ReadStack(sData, sY, asX, bConst):
    """
    Purpose:
      Read the stackloss dataset, extract x and y variables

    Inputs:
      sData     string, name of data file
      sY        string, name of dependent variables
      asX       list of strings, name of explanatory variables
      bConst    boolean, if TRUE add constant to mX matrix

    Return value:
      (vY, mX)  tuple of vY and mX, data

    """
    df= pd.read_csv(sData)      # Read csv into dataframe
    vY= df[sY].values           # Extract y-variable
    mX= df[asX].values          # Extract x-variables

    if (bConst):
        iN= vY.size                 # Check number of observations
        mX= np.hstack([np.ones((iN, 1)), mX])       # Append a vector of 1s

    return (vY, mX)

###########################################################
### dALL= AvgNLnLRegr(vP, vY, mX)
def AvgNLnLRegr(vP, vY, mX):
    """
    Purpose:
        Compute average negative loglikelihood of regression model

    Inputs:
        vP      iK+1 vector of parameters, with sigma and beta
        vY      iN vector of data
        mX      iN x iK matrix of regressors

    Return value:
        dALL    double, average negative loglikelihood
    """
    (iN, iK)= mX.shape
    if (np.size(vP) != iK+1):         # Check if vP is as expected
        print ("Warning: wrong size vP= ", vP)

    (dSigma, vBeta)= (np.fabs(vP[0]), vP[1:])

    if (dSigma <= 0):               # Cannot happen...
        print ('x', end='')
        return -math.inf

    vE= vY - mX @ vBeta

    vLL= -0.5*(np.log(2*np.pi) + 2*np.log(dSigma) + np.square(vE/dSigma))
    dALL= -np.mean(vLL, axis= 0)    # Get AVERAGE NEGATIVE LL

    print ('.', end='')             # Give sign of life

    return dALL

###########################################################
### (vBeta, dS2)= EstStack(vY, mX):
def EstStack(vY, mX):
    """
    Purpose:
      Estimate the model by OLS

    Inputs:
      vY        iN vector, dependent variable
      mX        iN x iK matrix, explanatory variables

    Return value:
      vBeta     iK x 1 vector, parameters
      dS2       double, residual variance
    """
    (vBeta, dSSR)= np.linalg.lstsq(mX, vY, rcond=None)[0:2]           # Run OLS y= X beta + e
    iN= vY.size                 # Check number of observations
    iK= vBeta.size

    dS2= dSSR/(iN-iK)

    return (vBeta, dS2)

###########################################################
### (vP, vS, dLL, sMess)= EstStack(vY, mX):
def EstStack_ML(vY, mX):
    """
    Purpose:
      Estimate the model by Maximum Likelihood

    Inputs:
      vY        iN vector, dependent variable
      mX        iN x iK matrix, explanatory variables

    Return value:
      vP        iK+1 vector, sigma and beta's
      vS        iK+1 vector, standard errors
      dLL       double, loglikelihood
      sMess     string, convergence
    """
    (iN, iK)= np.shape(mX)
    vP0= np.zeros(iK+1)
    vP0[0]= 1

    dLL= iN*AvgNLnLRegr(vP0, vY, mX)
    print ("Initial LL= ", -dLL, "\nvP0=", vP0)

    res= opt.minimize(AvgNLnLRegr, vP0, args=(vY, mX), method="BFGS")

    mH= hessian_2sided(AvgNLnLRegr, res.x, vY, mX)
    mS2= np.linalg.inv(mH)/iN
    vS= np.sqrt(np.diag(mS2))

    print ("\nBFGS results in ", res.message,
           "\nPars: ", res.x,
           "\nLL= ", -iN*res.fun, ", f-eval= ", res.nfev)

    return (res.x, vS, -iN*res.fun, res.message)

###########################################################
### OutputStack(vBeta, dS2, vY, mX, sY, asX, sBase)
def OutputStack(vBeta, dS2, vY, mX, sY, asX, sBase):
    """
    Purpose:
      Provide output

    Inputs:
      vBeta     iK x 1 vector, parameters
      dS2       double, residual variance
      vY        iN vector, dependent variable
      mX        iN x iK matrix, explanatory variables
      asX       iK array, names of explanatory variables
      sBase     string, base of output filename for graph
    """
    print ("Ols estimates regressing ", sY, " on ", asX)
    print (pd.DataFrame(vBeta, index=asX, columns=["beta"]))
    print ("Residual variance S2= ", dS2)

    iK= vBeta.size
    vYhat= mX @ vBeta
    for i in range(1,iK):
        plt.subplot(1,iK-1,i)
        plt.plot(mX[:,i], vY, 'o')
        plt.title(asX[i])
        # plt.title(asX[i]+" x "+sY)

    plt.subplot(1,iK-1,1)
    plt.ylabel(sY)
    plt.savefig(sBase+"data.png")
    plt.show()

###########################################################
### OutputStack_ML(mPS, dLL, sMess, asX)
def OutputStack_ML(mPS, dLL, sMess, asX):
    """
    Purpose:
      Provide output on screen
    """
    print ("\n\nEstimation resulted in ", sMess)
    print ("Using ML with LL= ", dLL)

    print ("Parameter estimates:\n",
           pd.DataFrame(mPS.T, index=["sigma"]+asX, columns=['p', 's']))
