# enable equation numbering

%%javascript
MathJax.Hub.Config({
    TeX: { equationNumbers: { autoNumber: "AMS" } }
});

UsageError: Line magic function `%%javascript` not found.


import numpy as np
from scipy.stats import poisson


def opt_DP(t, param, distri=poisson, 
           srange=(-100, 100, 1), arange=(0, 100, 1), disp=False):
    """
    Return a table of (inventory level, minimized expected total costs)
    Inputs:
        t      ~ period index, integer
        param  ~ parameters, dict
        distri ~ distribution, see scipy.stats
        srange ~ value range of inventory level, (min, max, step)
        arange ~ value range for action, (min, max, step)
    Output:
        table  ~ hash table (dict), 
                    key is the inventory level (integer)
                    value is the min expected total cost
    """
    
    # parse parameter
    T = param['T']
    k = param['k'][t]
    h = param['h'][t]
    p = param['p'][t]
    Ed = param['Ed'][t]
    
    if t == T:
        # there is no next value function
        nV = None
    else:
        # fetch next period value function
        nV = opt_DP(t+1, param, distri, srange, arange)
        
    # solve min problem at the current period
    
    table = dict()
    for x in range(*srange):
        # action indexed current value
        Va = inner_problem(x, arange, distri, Ed, k, h, p, nV)
        a, Vmin = min(Va.items(), key=lambda x: x[1])
        table[x] = {'V': Vmin, 'a': a}
    
    # display progress
    if disp: print(f"Period {t} is finished.")
        
    return table


def inner_problem(x, arange, distri, mu, k, h, p, tbl):
    """
    Return (action, value) list when current inventory is x
    Inputs:
        x      ~ current inventory level
        arange ~ set of available actions
        distri ~ distribution function
        mu     ~ parameter of the distribution
        k      ~ unit order cost
        h      ~ unit hold cost
        p      ~ unit backorder cost
        tbl    ~ value function of the next period
    Output:
        value  ~ hash table
    """
    
    value = dict()
    for a in range(*arange):
        value[a] = k * a
        # calculate expectation
        # todo, make the parameter changeable
        for d in range(70):
            pr = distri.pmf(d, mu)
            nx = x + a - d
            f = max(h*nx, -p*nx)
            nV = {'V': 0, 'a':0} if tbl is None else tbl.get(nx, {'V': np.inf, 'a': np.inf})
            value[a] = value[a] + pr * (f + nV['V'])
    return value


# time-varying demand and constant costs

param1 = {
    'T': 4,
    'k': [2, 2, 2, 2, 2],
    'h': [1, 1, 1, 1, -1],
    'p': [9, 9, 9, 9, 11],
    'Ed': [40, 40, 40, 2, 2]
}

V = opt_DP(0, param1)
print(f"The optimal value is: {V[0]['V']:.2f}")

The optimal value is: 293.93


# constant demand and time-varying costs

param2 = {
    'T': 4,
    'k': [7, 8, 3, 4, 1.5],
    'h': [1, 1, 1, 1, -1],
    'p': [9, 9, 9, 9, 11],
    'Ed': [30, 30, 30, 30, 30]
}

V = opt_DP(0, param2)
print(f"The optimal value is: {V[0]['V']:.2f}")

The optimal value is: 752.45


def inner_problem_2(dt, x, arange, distri, mu, k, h, p, tbl):
    """
    Return (action, value) list when current inventory is x
    Inputs:
        x      ~ current inventory level
        arange ~ set of available actions
        distri ~ distribution function
        mu     ~ parameter of the distribution
        k      ~ unit order cost
        h      ~ unit hold cost
        p      ~ unit backorder cost
        tbl    ~ value function of the next period
    Output:
        value  ~ hash table
    """
    
    value = dict()
    for a in range(*arange):
        value[a] = k * a
        # calculate expectation
        d = np.arange(100)
        pr = distri.pmf(d, mu)
        f = np.zeros((100, 2))
        f[:,0] = h * (x + a - d)
        f[:,1] = -p * (x + a - d)
        nx = x + a - dt
        nV = {'V': 0, 'a':0} if tbl is None else tbl.get(nx, {'V': np.inf, 'a': np.inf})
        value[a] = k * a + pr @ np.max(f, axis=1) + nV['V']
    return min(value.items(), key=lambda x: x[1])


def smooth_DP(t, demands, param, distri=poisson,
             srange=(-100, 100, 1), arange=(0, 100, 1), disp=False):
    # parse parameter
    T = param['T']
    k = param['k'][t]
    h = param['h'][t]
    p = param['p'][t]
    Ed = param['Ed'][t]
    dt = demands[t]
    
    if t == T:
        # there is no next value function
        nV = None
    else:
        # fetch next period value function
        nV = smooth_DP(
            t+1, demands, param, distri=distri
        )
        
    # solve min problem at the current period
    
    table = dict()
    for x in range(*srange):
        # action indexed current value
        a, Vmin = inner_problem_2(
            dt, x, arange, distri, Ed, k, h, p, nV
        )
        table[x] = {'V': Vmin, 'a': a}
    
    # display progress
    if disp: print(f"Period {t} is finished.")
        
    return table


def Monte_Carlo(repeats, param, distri):
    T = param['T'] + 1
    demands = np.zeros((repeats, 5))
    for t in range(T):
        mu = param['Ed'][t]
        demands[:, t] = distri.rvs(mu, size=repeats)
    
    costs = np.zeros(repeats)
    for i in range(repeats):
        tbl = smooth_DP(0, demands[i,:], param)
        costs[i] = tbl[0]['V']
    
    return costs


param1 = {
    'T': 4,
    'k': [2, 2, 2, 2, 2],
    'h': [1, 1, 1, 1, -1],
    'p': [9, 9, 9, 9, 11],
    'Ed': [40, 40, 40, 2, 2]
}

costs = Monte_Carlo(100, param1, poisson)


costs.mean()

293.2754128462732


costs.std()

18.45823672806469