#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
StackOls.py

Purpose:
    Estimate a regression model on the stackloss data set

Inputs:
    The program expects the file data/stackloss.csv to contain
    the data

Version:
    2       Version Python, using functions

Date:
    2005/2/16, 2017/7/24

Author:
    Charles Bos
"""
###########################################################
### Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

###########################################################
### (vY, mX)= ReadStack(sData, sY, asX):
def ReadStack(sData, sY, asX, bConst):
    """
    Purpose:
      Read the stackloss dataset, extract x and y variables

    Inputs:
      sData     string, name of data file
      sY        string, name of dependent variables
      asX       list of strings, name of explanatory variables
      bConst    boolean, if TRUE add constant to mX matrix

    Return value:
      (vY, mX)  tuple of vY and mX, data

    """
    df= pd.read_csv(sData)      # Read csv into dataframe
    vY= df[sY].values           # Extract y-variable
    mX= df[asX].values          # Extract x-variables

    if (bConst):
        iN= vY.size                 # Check number of observations
        mX= np.hstack([np.ones((iN, 1)), mX])       # Append a vector of 1s

    return (vY, mX)

###########################################################
### (vY, mX)= EstStack(vY, mX):
def EstStack(vY, mX):
    """
    Purpose:
      Estimate the model by OLS

    Inputs:
      vY        iN vector, dependent variable
      mX        iN x iK matrix, explanatory variables

    Return value:
      vBeta     iK x 1 vector, parameters
      dS2       double, residual variance
    """
    vBeta, dSSR= np.linalg.lstsq(mX, vY, rcond=None)[0:2]           # Run OLS y= X beta + e
    iN= vY.size                 # Check number of observations
    iK= vBeta.size

    dS2= dSSR/(iN-iK)

    return vBeta, dS2

###########################################################
### OutputStack(vBeta, dS2, vY, mX, sY, asX, sBase)
def OutputStack(vBeta, dS2, vY, mX, sY, asX, sBase):
    """
    Purpose:
      Provide output

    Inputs:
      vBeta     iK x 1 vector, parameters
      dS2       double, residual variance
      vY        iN vector, dependent variable
      mX        iN x iK matrix, explanatory variables
      asX       iK array, names of explanatory variables
      sBase     string, base of output filename for graph
    """
    print ('Ols estimates regressing ', sY, ' on ', asX)
    print (pd.DataFrame(vBeta, index=asX, columns=['beta']))
    print ('Residual variance S2= ', dS2)

    iK= vBeta.size
    vYhat= mX @ vBeta
    for i in range(1,iK):
        plt.subplot(1,iK-1,i)
        plt.plot(mX[:,i], vY, 'o')
        plt.title(asX[i])
        # plt.title(asX[i]+' x '+sY)

    plt.subplot(1,iK-1,1)
    plt.ylabel(sY)
    plt.savefig(sBase+'data.png')
    plt.show()

###########################################################
### main
def main():
    # Magic numbers
    sData= 'data/stackloss.csv'
    sY= 'Stack Loss'
    asX= ['Air Flow', 'Water Temperature', 'Acid Concentration']
    sBase= 'stack'

    # Initialisation
    (vY, mX)= ReadStack(sData, sY, asX, True)
    asX= ['constant']+asX

    # Estimation
    (vBeta, dS2)= EstStack(vY, mX)

    # Output
    OutputStack(vBeta, dS2, vY, mX, sY, asX, 'graphs/'+sBase)

###########################################################
### start main
if __name__ == "__main__":
    main()
