factorlab/factor_analysis.py at main · wesley1001/factorlab · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
# import libraries
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from ppca import PPCA
from sklearn.linear_model import LassoLarsIC, Lasso
from sklearn.metrics import r2_score
import statsmodels.api as sm
from scipy.stats import ttest_1samp, chi2_contingency, spearmanr, kendalltau, contingency

# principal component analysis
def pca(features_df, method='ppca', pc_name=None, n_components=1, min_var_explained=0.9):

    """
    Principal component analysis. Dimensionality reduction technique which converts a set of correlated features to a smaller
    number of uncorrelated features (principal components). Makes use of scikit-learn package for PCA and PPCA for
    probabilistic PCA.

    Parameters
    ----------
    features_df: DataFrame
        DataFrame with DatetimeIndex and features.
    pc_name: str, default None
        Name/prefix for principal components.
    n_components: int, default 1 for pca, 2 for ppca
        Number of principal components to keep.
    min_var_explained: float
        Minimum variance to be explained by principal components. Additional principal components will be automatically added
        until this minimum threshold is reached.

    Returns
    -------
    features_df: DataFrame
        DataFrame with DatetimeIndex, features and principal components added to it.
    """

    # probabilistic pca
    if method == 'ppca':

        # min nunber of components for PPCA
        n_components=2

        # drop NaNs
        features_df.dropna(how='all', inplace=True)

        # fit pca
        ppca = PPCA()
        ppca.fit(features_df.to_numpy(), d=n_components)
        pcs = ppca.transform()

        # variance explained by first n components
        variance_explained = ppca.var_exp[-1]

        # keep adding PCs until min var explained threhsold is met
        while variance_explained < min_var_explained:

            # add component to pca
            n_components += 1

            # fit pca
            ppca = PPCA()
            ppca.fit(features_df.to_numpy(), d=n_components)
            pcs = ppca.transform()

            # variance explained by first n components
            variance_explained = ppca.var_exp[-1]

        # print number of PCs added
        print('{} principal components added'.format(len(ppca.var_exp)))

    # pca
    else:

        # drop NaNs
        features_df.dropna(inplace=True)

        # fit pca
        pca = PCA(n_components=n_components)
        pcs = pca.fit_transform(features_df)

        # variance explained by first n components
        variance_explained = pca.explained_variance_ratio_.cumsum()[-1]

        # keep adding PCs until min var explained threhsold is met
        while variance_explained < min_var_explained:

            # add component to pca
            n_components += 1

            # fit pca
            pca = PCA(n_components=n_components)
            pcs = pca.fit_transform(features_df)

            # variance explained by first n components
            variance_explained = pca.explained_variance_ratio_.cumsum()[-1]

        # print number of pcs added
        print('{} principal components added'.format(len(pca.explained_variance_ratio_)))

    # add pcs to features df
    for i in range(pcs.shape[1]):
        if pc_name is not None:
            features_df[pc_name + '_pc'+ str(i+1)] = pcs[:, i]
        else:
            features_df['pc'+ str(i+1)] = pcs[:, i]

    # print variance explained by PCs
    print("Variance explained: {}\n".format(round(variance_explained,4)))

    return features_df

# orthogonalization of correlated factors in a multifactor model
def orthogonalize_factors(factors_df, output_format='df'):

    """
    As described by Klein and Chow (2013) in Orthogonalized Factors and Systematic Risk Decompositions:
    https://www.sciencedirect.com/science/article/abs/pii/S1062976913000185
    They propose an optimal simultaneous orthogonal transformation of factors, following the so-called symmetric procedure
    of Schweinler and Wigner (1970) and Löwdin (1970).  The data transformation allows the identification of the underlying uncorrelated
    components of common factors without changing their correlation with the original factors. It also facilitates the systematic risk
    decomposition by disentangling the coefficient of determination (R²) based on factors' volatilities, which makes it easier to distinguish
    the marginal risk contribution of each common risk factor to asset returns.

    Parameters
    ----------
    factors_df: DataFrame
        DataFrame with DatetimeIndex and facotrs to orthogonalize
    output_format: str,{'array', 'df'}, defaul 'df'
        Select the output format, numpy.array or DataFrame.

    Returns
    -------
    F_orth: numpy.array or DataFrame
        numpy.array of orthogonalized factors or DataFrame with DatetimeIndex and orthogonalized factors.
    """

    # before orthogonalization, factors should be normalized
    # if factors are in a DataFrame, convert to numpy for speed and ease of computation
    if isinstance(factors_df, pd.DataFrame):
        F = factors_df.to_numpy()
    # compute cov matrix
    M = np.cov(F.T, bias=False)
    # factorize cov matrix M
    u, s, vh = np.linalg.svd(M)
    # solve for symmetric matrix
    S = u @ np.diag(s**(-0.5)) @ vh
    # rescale symmetric matrix to original variances
    S_rs = S  @ (np.diag(np.sqrt(M)) * np.eye(S.shape[0], S.shape[1]))
    # convert to orthogonalized matrix
    F_orth = F @ S_rs
    # if selected, convert output format back to dataframe
    if output_format == 'df':
        F_orth = pd.DataFrame(F_orth, index=factors_df.index, columns=factors_df.columns)

    return F_orth

# multifactor risk model regressions and output tables (df)
def factor_exposures(ret_df, factors_df):

    """
    Multifactor risk model regressions and output.
    Multivariate regressions which iterates through individual asset returns (columns in ret_df) on factor returns
    (factors_df). Output for each asset is provided in a DataFrame (table_df) and stored in a dictionary of DataFrames (tables_dict)
    for all asset returns in ret_df. Ideally, factors should be orthogonalized before running this regression to improve
    statistical properties and interpretability. See orthogonalize_factors.py for the orthongonalization algorithm.

    Parameters
    ----------
    ret_df: DataFrame
        DataFrame with DatetimeIndex and asset returns, each column is an independent variable in a multivariate regression on the factor returns.
    factors_df: DataFrame
        DataFrame with DatetimeIndex and factor returns, these are the explanatory variables in the multifactor risk model regressions.

    Returns
    -------
    tables_dict: Dictionary of DataFrames
        Dictionary of DataFrames. Regression output for each asset is stored under the asset ticker in the dictionary;
        e.g. tables_dict['BTC'] provides the output for the regression of Bitcoin on the factors in factors_df.
        Output includes beta estimates (coefficients), t-statistics, p-values, r-squared (for all factors) and decomposed r-squared (for each factor).
    """

    # create df dict
    tables_dict = {}

    # iterate through asset cols
    for col in ret_df.columns:

        # stats to be included in table output
        stats = ['beta', 't_stat', 'p_val', 'var_expl']
        # regress asset returns on predictors
        data = pd.concat([ret_df.loc[:,[col]], factors_df], join='outer', axis=1)
        y, X = data.iloc[:,0], data.iloc[:,1:]
        # add constant
        X = sm.add_constant(X)
        # specify model
        model = sm.OLS(y,X, missing='drop')
        # fit model
        res = model.fit()
        # get stats
        rsq, betas, t_stats, p_vals = res.rsquared, res.params, res.tvalues, res.pvalues
        # get std
        f_sigma, a_sigma = X.std(), y.std()
        # compute risk contribution & r-square
        r_sq = ((betas * f_sigma)**2) / a_sigma**2
        # add to dictionary
        stats = {'beta':betas, 't_stat':t_stats, 'p_val': p_vals, 'var_expl': r_sq}
        # create df table with dictionary data
        table_df = pd.DataFrame(stats).T
        # add rsq as last col
        table_df[''] = [np.nan, np.nan, np.nan, rsq]
        # rename const col
        table_df.rename(columns={'const':'exp_ret'}, inplace=True)
        # add to tables dict and round
        tables_dict[col] = table_df.round(decimals=4)

    return tables_dict

# Lasso regression of a target on features to be used for feature selection
def lasso_feature_selection(target, features_df, alpha=0.05, auto_selection=True, criterion='aic'):
    """
    LASSO supervised learning feature selection, used as a preliminary step in the targeted PCA algorithm.
    Selects a subset of relevant features from a broader set of features by removing the redundant
    or irrelevant features, or features which are strongly correlated in the data without much loss of information.

    Parameters
    ----------
    target: Series or DataFrame
        Series or DataFrame with DatetimeIndex and target variable (y).
    features_df: DataFrame
        DataFrame with datetimeIndex and features (X).
    alpha: float, default 0.05
        Constant that multiplies the L1 regularization term. Defaults to 1.0. Alpha = 0 is equivalent to an OLS regression.
    auto-selection: bool, default True
        Lasso model fit with Lars using BIC or AIC for model selection.
    criterion: str, {'aic', 'bic'}, default 'aic'
        AIC is the Akaike information criterion and BIC is the Bayes Information criterion. Such criteria are useful to
        select the value of the regularization parameter by making a trade-off between the goodness of fit and
        the complexity of the model.A good model should explain the data well while being simple.

    Returns
    -------
    selected_features: list
        List of the subset of selected features from the LASSO regression.
    """

    # if target is Series, convert to DataFrame
    if isinstance(target, pd.Series):
        target = target.to_frame()

    # create reg df for lasso regression and feature selection
    reg_df = target.merge(features_df, how='outer', left_index=True, right_index=True).dropna()

    # create target and predictors
    # y first col, X rest of df
    X, y = reg_df.iloc[:, 1:], reg_df.iloc[:, 0]

    # auto selection
    if auto_selection==True:
        # specify model
        model = LassoLarsIC(criterion=criterion, normalize=False)
    else:
        model = Lasso(alpha=alpha)

    # fit model
    model.fit(X,y)
    print('Regressing {} on asset returns\n'.format(y.name))
    # predictions y_hat
    y_hat = model.predict(X)
    # compute adj R^2
    r2 = r2_score(y, y_hat)
    print('Adjusted R^2: {} \n'.format(round(r2, 2)))
    # selected features
    estimated_coef = np.nonzero(model.coef_)
    coef_idxs = estimated_coef[0].tolist()
    selected_features = X.iloc[:, coef_idxs].columns.tolist()
    print('{} features were selected: {}\n'.format(len(selected_features), selected_features))
    # compute removed features percentage
    percent_removed_features = 100 - (round(len(coef_idxs) / X.shape[1], 2) * 100)
    print('{}% of features were removed\n'.format(percent_removed_features))

    return selected_features

# targeted PCA algorithm
def tpca(target, features_df, min_var_explained=0.9):
    """
    Targeted PCA which uses LASSO supervised learning feature selection as a preliminary step before PCA.
    Selects a subset of relevant features from a broader set of features by removing the redundant or
    irrelevant features, i.e. features which are strongly correlated in the data without much loss of information.
    Runs a PCA on the subset of selected features from the LASSO regression.
    See Forecasting economic time series using targeted predictors by Bai and Ng (2008) for details:
    https://www.sciencedirect.com/science/article/abs/pii/S0304407608001085

    Parameters
    ----------
    target: Series or DataFrame
        Series or DataFrame with DatetimeIndex and target variable (y).
    features_df: DataFrame
        DataFrame with DatetimeIndex and features (X).
    min_var_explained: float, default 0.9
        Minimum variance explained by first L principal components.

    Returns
    -------
    pcs_df:
        DataFrame with DatetimeIndex and the first L principal components.
    """

    # if target is Series, convert to DataFrame
    if isinstance(target, pd.Series):
        target = target.to_frame()

    # create df for lasso regression and feature selection
    df = target.merge(features_df, how='outer', left_index=True, right_index=True).dropna()

    # add lags
    lasso_df = add_lags(df)

    # select features from LASSO regression
    selected_features = lasso_feature_selection(lasso_df.iloc[:, 0], lasso_df.iloc[:,1:])

    # extract L pcs from with selected features from lasso
    pcs_df = pca(lasso_df.loc[:, selected_features], pc_name='tpca', method='ppca', min_var_explained=min_var_explained).loc[:, 'tpca_pc1':]

    return pcs_df

# project target on predictors to estimate fitted target values (y-hat)
def project_target(target, features_df):
    """
    Project target variable on the features to obtain the fitted values for the target variable. Can be used for
    instrumental variables estimation.

    Parameters
    ----------
    target: Series
        Series with DatetimeIndex and target variable (y).
    features_df: DataFrame
        DataFrame with DatetimeIndex and features (X).

    Returns
    -------
    fitted_target: Series
        Series with DatetimeIndex and fitted values for the target variable.
    """

    # convert Series to DataFrame
    if isinstance(target, pd.Series):
        target = target.to_frame()

    # create df for lasso regression and feature selection and drop NaNs
    df = target.merge(features_df, how='outer', left_index=True, right_index=True).dropna()

    # X: predictors, y: target
    X, y = df.iloc[:,1:], df.iloc[:,0]
    # add constant
    X = sm.add_constant(X)

    # specify model
    model = sm.OLS(y,X)
    # fit model
    res = model.fit()
    # y-hat
    fitted_target = res.predict(X)
    # print regression details
    print('Project {} onto the features'.format(y.name))
    # print output
    print(res.summary())

    return fitted_target

# Information coefficient (IC)
def IC(factors, target_ret, lookahead=14, pc1=True, factor_bins=5, target_bins=5, ic_rolling_window=365):
    """
    Calculates correlation for returns, or degree of association for labels (bins), between the alpha factors (features)
    and forward returns (target). Correlation measures in what way two variables are related, whereas, association measures
    how related the variables are.
    orrelation measures such as the spearman rank, kendall and pearson compute.

    Measures the degree to which two nominal or ordinal variables are related, or the level of their association.
    Both factors and target should be discretized before computing a measure of the degree to which category membership

    Parameters
    ----------
    factors: Series or DataFrame
        Series or DataFrame with DatetimeIndex and alpha factors.
    target: Series
        Series with DatetimeIndex and target variable.
    lookahead: int, default 1
        Number of periods to shift forward returns (target).
    factor_bins: int, optional, default None
        Number of bins into which to discretize/label the normalized factors. None leaves factor inputs unchanged.
    target_bins: int, optional, default None
        Number of bins into which to discretize/label the normalized target. None leaves target inputs unchanged.

    Returns
    -------
    metrics: DataFrame
        DataFrame with computed stasticial association/correlation metrics.
    """

    # if bins is None or 1
    if factor_bins < 2 or target_bins < 2:
        print("Number of bins must be larger than 1. Please increase number of bins.\n")
        return

    else:

        # if factors or target are Series, convert to DataFrame
        if isinstance(factors, pd.Series):
            factors = factors.to_frame()
        if isinstance(target_ret, pd.Series):
            target_ret = target_ret.to_frame()
        # get principal components of factors
        if pc1:
            factors = pca(factors, method='pca', pc_name='trend', min_var_explained=0.5)
            # constrain pc1 to be positively correlated with factors
            col = factors.loc[:, factors.columns.str.contains('pc1')].columns[0]
            if factors.corr()[col].mean() < 0:
                factors[col] = factors[col] * -1

        # discretize factors and target
        # factor bins
        factor_quantiles_df = discretize(factors, bins=factor_bins)
        # target bins
        target_quantiles_df = discretize(target_ret, bins=target_bins)

        # merge factors and target, shift target by lookahead periods
        df = factors.merge(target_ret.shift(lookahead * -1), how='outer', left_index=True, right_index=True).dropna()
        quantiles_df = factor_quantiles_df.merge(target_quantiles_df.shift(lookahead * -1), how='outer',
                                                 left_index=True, right_index=True).dropna()

        # create empy dfs for correlation measures
        metrics, ic_df = pd.DataFrame(index=factors.columns), pd.DataFrame(index=df.index, columns=factors.columns)

        # calculate correlation and assocation measures
        # loop through factors
        for col in factors.columns:
            # contingency table
            cont_table = pd.crosstab(quantiles_df[col], quantiles_df.iloc[:, -1])
            # add metrics
            metrics.loc[col, 'IC/spearman_rank'] = spearmanr(quantiles_df[col], df.iloc[:, -1])[0]
            metrics.loc[col, 'p-val'] = spearmanr(quantiles_df[col], df.iloc[:, -1])[1]
            metrics.loc[col, 'kendall_tau'] = kendalltau(quantiles_df[col], df.iloc[:, -1])[0]
            metrics.loc[col, 'cramer_v'] = contingency.association(cont_table, method='cramer')
            metrics.loc[col, 'tschuprow_t'] = contingency.association(cont_table, method='tschuprow')
            metrics.loc[col, 'pearson_cc'] = contingency.association(cont_table, method='pearson')
            metrics.loc[col, 'chi2'] = chi2_contingency(cont_table)[0]
            metrics.loc[col, 'autocorrelation'] = \
            spearmanr(quantiles_df[col].iloc[1:].dropna(), quantiles_df[col].shift(1).dropna())[0]

            # window size
            window_size = ic_rolling_window

            # while loop for rolling window spearman rank corr
            while window_size <= df.shape[0]:
                # compute spearman rank correlation
                ic_df[col].iloc[window_size - 1] = \
                spearmanr(quantiles_df[col].iloc[window_size - ic_rolling_window:window_size],
                          df.iloc[window_size - ic_rolling_window:window_size, -1])[0]
                window_size += 1

    # plot ic df
    plt.style.use('ggplot')
    ic_df.plot(legend=True, figsize=(15, 7), linewidth=2, rot=0, title='Information Coefficient',
               ylabel='{}-day rolling window'.format(ic_rolling_window));

    # create dict to store dfs
    dict_dfs = {'metrics': metrics.sort_values(by='IC/spearman_rank', ascending=False).round(decimals=4),
                'ic_rolling': ic_df.dropna()}

    return dict_dfs

# factor returns
def factor_returns(factors, returns, lookahead=14, pc1=True, bins=None, tails=None, tcost=None):
    """
    Screens features for predictive relationship with the target and provides summary performance statistics

    Parameters
    ----------
    factors: Series or Dataframe
        Series or DataFrame with DatetimeIndex and factors.
    returns: Series
        Target returns series.
    lookahead: int, default 1
        Number of periods to shift forward returns.
    pc1: bool, default False
        Compute principal components of factors and add them to factors.
    bins: int, default None
        Number of desired bins for discretization.
    tails: str, default None
        Keeps only tail bins and ignores middle bins, 'two' for both tails, 'left' for left, 'right' for right
    tcost: float, default None
        Transaction fee subtracted from returns to get net returns.
        Depends on exchange, e.g. Binance maker/taker fee is 0.001.

    Returns
    -------
    dict_dfs: dictionary with DataFrames
        'net_ret' DataFrame with returns (net of t-cost) of target returns scaled on factor signals;
        'perf' DataFrame with performance metrics of net returns;
    """

    # convert to df if series
    if isinstance(factors, pd.Series):
        factors = factors.to_frame()

    # get principal components of factors
    if pc1:
        factors = pca(factors, method='pca', pc_name='trend', min_var_explained=0.5)
        # constrain pc1 to be positively correlated with factors
        col = factors.loc[:, factors.columns.str.contains('pc1')].columns[0]
        if factors.corr()[col].mean() < 0:
            factors[col] = factors[col] * -1

    # convert factors to signal
    signal_df = (normalize(factors, method='percentile') * 2) - 1
    # discretize signal into signal quantiles between -1 and 1
    signal_quantiles_df = discretize(signal_df, bins=bins, signal=True, tails=tails)

    # compute factor returns and tcosts
    if tcost is None:
        tcost = 0
    if bins is None:
        ret_df = signal_df.shift(lookahead).multiply(returns, axis=0)
        tcost_df = abs(signal_df.diff()).shift(lookahead) * tcost * (1 / lookahead)
    else:
        ret_df = signal_quantiles_df.shift(lookahead).multiply(returns, axis=0)
        tcost_df = abs(signal_quantiles_df.diff()).shift(lookahead) * tcost * (1 / lookahead)

    # compute net ret
    if lookahead > 1:
        net_ret_df = (ret_df / lookahead) - tcost_df
    else:
        # compute net returns
        net_ret_df = ret_df - tcost_df
    # create performance metrics df for net returns
    perf_df = factor_performance(net_ret_df, returns)
    perf_df.index.name = 'alpha_factors'

    # create quantiles for mean return by quantile plot
    if bins is None:
        bins = 5
    factor_quantiles_df = (discretize(factors, bins=bins, tails=tails) + 1).astype(int)

    # compute IR for each bin
    bins_ret = pd.DataFrame(index=range(1, bins + 1))
    for col in net_ret_df.columns:
        bins_ret[col] = (net_ret_df[col].groupby(factor_quantiles_df[col].shift(lookahead)).mean())
    # name index quantile
    bins_ret.index.name = 'quantile'
    # add top vs bottom quantile bin in index
    bins_ret.loc['top vs. bottom', :] = bins_ret.iloc[-1] - bins_ret.iloc[0]

    # show cum ret and bar chart subplots
    plt.style.use('ggplot')
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 7))
    net_ret_df.cumsum().plot(legend=True, linewidth=2, rot=0, ax=ax1, title='Cumulative returns')
    ax1.set_ylabel('Cumulative returns (net)');

    # plot the mean returns by quantile of the best performing factor
    col = perf_df.index[0]
    bins_ret[col].plot(kind='bar', color='#C59B8E', legend=False, rot=90, ax=ax2,
                       title='Mean Returns (net) by Factor Quantile: {}'.format(col));
    ax2.set_ylabel('Mean returns (net)');

    # create dict to store dfs
    dict_dfs = {'net_ret': net_ret_df, 'perf': perf_df}

    return dict_dfs

# factor performance
def factor_performance(factor_ret, returns, freq='daily'):
    """
    Computes key performance metrics for factor returns.

    Parameters
    ----------
    returns: Series or DataFrame
        Series or DataFrame with DatetimeIndex and factor returns series.
    freq: str, {'min', 'hourly', 'daily_business', 'daily', 'weekly', 'monthly'}, default 'daily'
        Frequency of returns.

    Returns
    -------
    metrics: DataFrame
        DataFrame with computed performance metrics.
    """

    # annualizaton adjustment factor
    if freq == 'min':
        ann_adj = 365 * 24 * 60
    elif freq == 'hourly':
        ann_adj = 365 * 24
    elif freq == 'daily_business':
        ann_adj = 252
    elif freq == 'weekly':
        ann_adj = 52
    elif freq == 'monthly':
        ann_adj = 12
    else:
        ann_adj = 365

    # convert to df if series
    if isinstance(factor_ret, pd.Series):
        returns = factor_ret.to_frame()

    # create metrics df and add performance metrics
    metrics = pd.DataFrame(index=factor_ret.columns)
    metrics['Annual return'] = factor_ret.mean() * ann_adj
    metrics['Annual volatility'] = factor_ret.std() * np.sqrt(ann_adj)
    metrics['Sharpe ratio'] = (factor_ret.mean() / factor_ret.std()) * np.sqrt(ann_adj)
    metrics['Sortino ratio'] = (factor_ret.mean() / factor_ret[factor_ret < 0].std()) * np.sqrt(ann_adj)
    metrics['Skewness'] = factor_ret.skew()
    metrics['Kurtosis'] = factor_ret.kurt()
    metrics['P-val'] = ttest_1samp(factor_ret.dropna(), popmean=0)[1] / 2

    # loop through df
    for col in factor_ret.columns:

        y = factor_ret[col]
        X = sm.add_constant(returns, prepend=True)
        data = pd.concat([y, X], axis=1).dropna()
        # Fit and summarize OLS model
        res = sm.OLS(data.iloc[:,0], data.iloc[:,1:]).fit(missing='drop')
        # add to metrics
        metrics.loc[col,'Annual alpha'], metrics.loc[col, 'Beta'] = ((res.params[0]+1)**ann_adj-1), res.params[1]

    # sort by sharpe ratio and round values to 2 decimals
    metrics = metrics.sort_values(by='Sharpe ratio', ascending=False).astype(float).round(decimals=2)

    return metrics