-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathobsession_main.py
More file actions
64 lines (51 loc) · 2.18 KB
/
obsession_main.py
File metadata and controls
64 lines (51 loc) · 2.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 6 08:02:30 2017
@author: csaunders
"""
import macro_trend_exec
import hours_per_article_exec
import engaged_time_exec
import non_obsession_exec
# get time periods of comparison: t2 / t1; end dates are exclusive
t1 = {'start':'2017-03-01', 'end':'2017-03-03'}
t2 = {'start':'2017-03-03', 'end':'2017-03-05'}
def macro_trend(t1, t2, dump_dir, API_interval=24):
"""returns high level trends, t2 / t1
t1 and t2 MUST be dictionaries of the form:
t2 = {'start':'2017-03-03', 'end':'2017-03-05'}
returns 5 objects:
pv1 - DataFrame keen API return: page views, articles and obsessions, t1
pv2 - DataFrame keen API return: page views, articles and obsessions, t2
t1 - DataFrame keen API return: total time, articles and obsessions, t1
t2 - DataFrame keen API return: total time, articles and obsessions, t2
tc - sorted DataFrame, merging pvs and time, calculating difference
"""
pv1, pv2, t1, t2, tc = macro_trend_exec.main(t1, t2, dump_dir,
API_interval=API_interval)
print('obsession pv change', tc.iloc[1:]['pv t2'].sum() /
tc.iloc[1:]['pv t1'].sum())
print('obsession time change', tc.iloc[1:]['time t2'].sum() /
tc.iloc[1:]['time t1'].sum())
return pv1, pv2, t1, t2, tc
def hours_per_article(pv_data, time_data, num=500):
"""num is the minimum number of page views to include in this analysis
"""
df = hours_per_article_exec.main(pv_data, time_data, num=num)
return df
def engaged_time(article_list, timeframe, dump_dir):
"""returns the 'average engaged_time' for an article; which is calculated
as the sum of IQR time for each article quartile divided by the count
"""
df_time = engaged_time_exec.main(article_list, timeframe, dump_dir)
return df_time
def non_obsession(pv_data, time_data, timeframe, dump_dir, num=500):
"""
returns:
df1 = article level dataframe
df2 - avg cumulative time per article by topic
"""
df1, df2 = non_obsession_exec.main(pv_data, time_data,
timeframe, dump_dir, num)
return df1, df2