#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
speed_overhead1.py

Purpose:
    Compare speed of filling a big 'matrix' structure, by selecting a row, and
    changing (or reading) multiple items from that row.

    For the matrix structure, one may use
      1. a dataframe, referring to element df.loc[r, c]
      2. a dataframe, selecting a row, and changing elements of the row sr[c]
      3. a dictionary of dictionaries, selecting dtr= dt[r], element dtr[c]
      4. a numpy matrix, using mX[r, c]
      5. a numpy matrix, first selecting a row vXr= mX[r], then element vXr[c]

Conclusion:
    A dataframe has overhead, as does a series. A dictionary, or numpy matrix, is much
    quicker in selecting the element of choice.

    The flexibility of the dictionary may help, as it can also handle textual indices,
    whereas a numpy matrix can only work with numerical indices.

    The overhead of pandas in this respect is huge, leading to a speed difference of
    a factor around 3 between option 1 and 2 (hence preselecting the row can be useful).
    Instead using a dictionary can be up to 13 times quicker than accessing df.loc[r, c].
    Numpy is possibly marginally quicker than a dictionary, but not by much.

Version:
    1       First start

Date:
    2024/8/30

Author:
    Charles Bos
"""
###########################################################
### Imports
import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt

from lib.tracktime import *

###########################################################
### StoreDFSRDT(iS, iR, iC, iSeed)
def StoreDFSRDT(iS, iR, iC, iSeed):
    """
    Purpose:
        Store repetitively random numbers in a matrix-like structure, first
        selecting a row, then many times selecting a column to fill in.

    Inputs:
        iS      integer, number of repetitions
        iR      integer, number of rows
        iC      integer, number of columns
        iSeed   integer, seed
    """
    TrackInit()
    TrackTime('V1-df')
    df1= pd.DataFrame(np.zeros((iR, iC)))
    np.random.seed(iSeed)
    for s in range(iS):
        r= np.random.randint(iR)
        for sc in range(iS):
            c= np.random.randint(iC)
            df1.loc[r, c]= np.random.randn()
    # print ('v1: ', df1.mean().mean(), np.random.randn())

    TrackTime('V2-sr')
    df2= pd.DataFrame(np.zeros((iR, iC)))
    np.random.seed(iSeed)
    for s in range(iS):
        r= np.random.randint(iR)
        sr= df2.loc[r]
        for sc in range(iS):
            c= np.random.randint(iC)
            sr[c]= np.random.randn()
    # print ('v2: ', df2.mean().mean(), np.random.randn())

    TrackTime('V3-dt')
    np.random.seed(iSeed)
    dt= {r: {c: 0 for c in range(iC)} for r in range(iR)}
    for s in range(iS):
        r= np.random.randint(iR)
        dtr= dt[r]
        for sc in range(iS):
            c= np.random.randint(iC)
            dtr[c]= np.random.randn()
    df3= pd.DataFrame(dt)
    # print ('v3: ', df3.mean().mean(), np.random.randn())

    TrackTime('V4-np')
    np.random.seed(iSeed)
    mX= np.zeros((iR, iC))
    for s in range(iS):
        r= np.random.randint(iR)
        for sc in range(iS):
            c= np.random.randint(iC)
            mX[r, c]= np.random.randn()
    # print ('v4: ', mX.mean(), np.random.randn())

    TrackTime('V5-np-r')
    np.random.seed(iSeed)
    mXr= np.zeros((iR, iC))
    for s in range(iS):
        r= np.random.randint(iR)
        vXr= mXr[r]
        for sc in range(iS):
            c= np.random.randint(iC)
            vXr[c]= np.random.randn()
    # print ('v5: ', mXr.mean().mean(), np.random.randn())

    print ('\nWriting to matrix-like structure')
    TrackReport()

###########################################################
### main
def ReadDFSRDT(iS, iR, iC, iSeed):
    """
    Purpose:
        Read repetitively random numbers in a matrix-like structure, first
        selecting a row, then many times selecting a column to fill in.

    Inputs:
        iS      integer, number of repetitions
        iR      integer, number of rows
        iC      integer, number of columns
        iSeed   integer, seed
    """
    TrackInit()
    TrackTime('V1-df')
    np.random.seed(iSeed)
    df1= pd.DataFrame(np.random.randn(iR, iC))
    for s in range(iS):
        r= np.random.randint(iR)
        for sc in range(iS):
            c= np.random.randint(iC)
            dX= df1.loc[r, c]
    # print ('v1-df: ', dX, np.random.randn())

    TrackTime('V2-sr')
    np.random.seed(iSeed)
    df2= pd.DataFrame(np.random.randn(iR, iC))
    for s in range(iS):
        r= np.random.randint(iR)
        sr= df2.loc[r]
        for sc in range(iS):
            c= np.random.randint(iC)
            dX= sr[c]
    # print ('v2-sr: ', dX, np.random.randn())

    TrackTime('V3-dt')
    np.random.seed(iSeed)
    dt= {r: {c: np.random.randn() for c in range(iC)} for r in range(iR)}
    for s in range(iS):
        r= np.random.randint(iR)
        dtr= dt[r]
        for sc in range(iS):
            c= np.random.randint(iC)
            dX= dtr[c]
    # print ('v3-dt: ', dX, np.random.randn())

    TrackTime('V4-np')
    np.random.seed(iSeed)
    mX= np.random.randn(iR, iC)
    for s in range(iS):
        r= np.random.randint(iR)
        for sc in range(iS):
            c= np.random.randint(iC)
            dX= mX[r, c]
    # print ('v4-np: ', dX, np.random.randn())

    TrackTime('V5-np-r')
    np.random.seed(iSeed)
    mXr= np.random.randn(iR, iC)
    for s in range(iS):
        r= np.random.randint(iR)
        vXr= mXr[r]
        for sc in range(iS):
            c= np.random.randint(iC)
            dX= vXr[c]
    # print ('v5-np-r: ', dX, np.random.randn())

    print ('\nReading from matrix-like structure')
    TrackReport()

###########################################################
### main
def main():
    # Magic numbers
    iR= 1000
    iC= 200
    iS= 500
    iSeed= 1234
    # iS= 10

    # Initialisation

    # Estimation

    # Output
    StoreDFSRDT(iS, iR, iC, iSeed)
    ReadDFSRDT(iS, iR, iC, iSeed)


###########################################################
### start main
if __name__ == "__main__":
    main()
