factorlab/risk_factors.py at main · wesley1001/factorlab · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# import libraries
import pandas as pd
import numpy as np
from scipy.spatial import distance

# high-low spread estimator (hlse)
def hlse(ohlc_df, frequency='daily'):
    """
    Computes the high-low spread estimator, an estimate of bid-offer spreads, a measure of liquidity risk.
    See Corwin & Schultz (2011) for details: https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1106193

    Parameters
    ----------
    ohlc_df: DataFrame
        DataFrame with DatetimeIndex and Open, High, Low and Close (OHLC) prices from which to compute the high-low spread estimates.
    frequency: str, {'daily', 'weekly', 'monthly'}, default 'daily'
        daily: daily bid-offer spread estimate.
        weekly: weekly bid-offer spread estimate, resampled over a weekly frequency as the mean of daily estimates.
        monthly: monthly bid-offer spread estimate, resampled over a monthly frequency as the mean of daily estimates.

    Returns
    -------
    S: Series
        Datetimeindex and time series of high-low spread estimates.
    """

    # define vars: mid, 2 day high and 2 day low vars
    mid, high_2d, low_2d = (ohlc_df.high + ohlc_df.low)/2, ohlc_df.high.rolling(2).max(), ohlc_df.low.rolling(2).min()

    # compute adjustment for overnight price moves
    ohlc_df['gap_up'], ohlc_df['gap_down'] = ohlc_df.low - ohlc_df.close.shift(1), ohlc_df.high - ohlc_df.close.shift(1)
    # adjustment for gap up
    ohlc_df['high_adj'], ohlc_df['low_adj'] = np.where(ohlc_df.gap_up > 0, ohlc_df.high - ohlc_df.gap_up, ohlc_df.high), np.where(ohlc_df.gap_up > 0, ohlc_df.low - ohlc_df.gap_up, ohlc_df.low)
    # adjustment for gap down
    ohlc_df['high_adj'], ohlc_df['low_adj'] = np.where(ohlc_df.gap_down < 0, ohlc_df.high - ohlc_df.gap_down, ohlc_df.high), np.where(ohlc_df.gap_down < 0, ohlc_df.low - ohlc_df.gap_down, ohlc_df.low)

    # B beta
    B = (np.log(ohlc_df.high_adj/ohlc_df.low_adj))**2 + (np.log(ohlc_df.high_adj.shift(1)/ohlc_df.low_adj.shift(1)))**2
    # G gamma
    G = (np.log(high_2d/low_2d))**2
    # alpha
    alpha = ((np.sqrt(2 * B) - np.sqrt(B)) / (3 - 2 * np.sqrt(2))) - (np.sqrt(G/(3 - 2 * np.sqrt(2))))
    # replace negative values by 0
    alpha = pd.Series(np.where(alpha < 0, 0, alpha), index=alpha.index)
    # substitute alpha into equation 14 to get high-low spread estimate S
    S = (2 * (np.exp(alpha) - 1)) / (1 + np.exp(alpha))
    # resample using daily mean
    if frequency == 'weekly':
        S = S.resample('W').mean()
    if frequency == 'monthly':
        S = S.resample('M').mean()
    # drop NaNs
    S.dropna(inplace=True)

    return S

# turbulence index
def turbulence(ret_df, window_type='rolling', lookback=36, p_vals=False):
    """
    Computes the Mahalanobis distance from a basket of assset returns, aka the turbulence index.
    Turbulence measures the statistical unusualness of a set of returns given their historical pattern of behavior.
    High turbulence occurs when both the correlation and volatility of a basket of returns is far from the norm.
    As such, it can be a useful measure of tail risk at a portfolio level, i.e. when diversification is most likely to fail.
    For more details, see Skulls, Financial Turbulence, and Risk Management by Kritzman and Li (2010):
    https://www.tandfonline.com/doi/abs/10.2469/faj.v66.n5.3

    Parameters
    ----------
    ret_df: DataFrame
        DataFrame with DatetimeIndex and returns.
    window_type: str, {'fixed', 'expanding', 'rolling'}, default 'fixed'
        Provide a window type. If None, all observations are used in the calculation.
    lookback: int
        Number of observations to include in the window. If 'fixed' window_type, all observations will be used.
    p_vals: bool (optional), default False
        Provides the p-values of each turbulence observation. Those with p-values < 0.001 are generally considered outliers.

    Returns
    -------
    df: DataFrame
        DataFrame with DatetimeIndex and turbulence index series
    """
    # drop NaNs
    ret_df.dropna(inplace=True)

    # create emtpy df
    df = pd.DataFrame()
    # set window size
    window_size = lookback

    # if window type fixed
    if window_type =='fixed' or window_type is None:
        # demean returns
        ret_diff = ret_df.subtract(ret_df.mean()).to_numpy()
        # compute inverse covariance matrix
        inv_cov = np.linalg.pinv(ret_df.cov())
        # compute mahalanobis distance
        md = (ret_diff @ inv_cov @ ret_diff.T).diagonal()
        # create df
        df = pd.DataFrame(md, index=ret_df.index, columns=['turb'])

    else:
        # while loop for expanding or rolling window
        while window_size <= len(ret_df):
            # if window type expanding
            if window_type =='expanding':
                ret = ret_df.iloc[:window_size]
            # if window type rolling
            if window_type == 'rolling':
                ret = ret_df.iloc[window_size - lookback:window_size]
            # demean returns
            ret_diff = ret.subtract(ret.mean()).to_numpy()
            # compute inverse covariance matrix
            inv_cov = np.linalg.pinv(ret.cov())
            # compute mahalanobis distance
            md = (ret_diff @ inv_cov @ ret_diff.T).diagonal()
            # create df
            md_df = pd.DataFrame(md, index=ret.index, columns=['turb'])
            df = pd.concat([df, md_df.iloc[-1].to_frame().T])
            window_size += 1

    # include p_vals
    if p_vals == True:
        #calculate p-val for each turb val
        df['turb_pval'] = 1 - stats.chi2.cdf(df.turb, ret_df.shape[1] - 1)

    return df

# market divergence index (MDI)
def mdi(series, log=False, lookback=30):
    """
    Compute the signal to noise ratio of a price series and the market divergence index
    of a basket of price series.

    Parameters
    ----------
    series: Series or Dataframe
        Series or DataFrame with DatetimeIndex and price series.
    log: bool, default False
        Computes log of price series.
    lookback: int
        Number of observations to include in the window for signal to noise ratio.

    Returns
    -------
    snr: series
        signal to noise ratio of price series or market divergence index of basket of price series over n-day lookback window.
    """

    # log parameter
    if log is True:
        series = np.log(series)

    # compute the singal to noise ratio
    snr = (abs(series - series.shift(lookback))) / abs(series.diff()).rolling(lookback).sum()

    # mean of cross-section
    if type(series) is not pd.core.series.Series:
        snr = snr.mean(axis=1)

    return snr

# ADX