-
Notifications
You must be signed in to change notification settings - Fork 15
/
servbiz_example_pipeline.py
121 lines (82 loc) · 4.32 KB
/
servbiz_example_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# -*- coding: utf-8 -*-
### Import Relevant Libraries
import os
# PYTHON_FOLDER = os.environ['PYTHON_FOLDER'] # Requires an environment variable to be preset
PYTHON_FOLDER = './'
os.chdir(PYTHON_FOLDER)
import pandas as pd
import configparser
import tvc_transform as tvct
import tvc_load_service_account as tvcload
### Set up Python output to show every dataframe column
pd.set_option('display.max_columns', 500)
### Set variables by reading from the config.ini file
company_name = 'ServBiz'
config = configparser.ConfigParser()
config.read('config.ini')
GOOGLE_CREDENTIALS_FILE = config[company_name]['GOOGLE_CREDENTIALS_FILE']
GOOGLE_SPREADSHEET_KEY = config[company_name]['GOOGLE_SPREADSHEET_KEY']
RAW_DATAFILE = config[company_name]['RAW_DATAFILE']
### Extract raw data
t = pd.read_csv(RAW_DATAFILE)
### Instantiate TVCLoad object with Google credentials file to write to Google Sheets
tvcl = tvcload.TVCLoad(GOOGLE_CREDENTIALS_FILE)
### Define segments. Each Segment name maps to a segment_col name
segments = {'Unsegmented' : None,
'Channel' : 'segment'
}
for seg in segments:
print('Processing the', seg, 'segment')
seg_col = segments[seg]
if seg_col is None:
use_seg = False
else:
use_seg = True
### Transform the raw data into dau_decorated
dau = tvct.create_dau_df(t,
user_id = 'client_id',
activity_date = 'date',
inc_amt = 'value_usd',
segment_col = seg_col
)
dau_decorated = tvct.create_dau_decorated_df(dau)
### Calculate Weekly Growth Accounting and Cohort Analysis based on wau_decorated
# WAU Decorated
wau_decorated = tvct.create_xau_decorated_df(dau_decorated, 'week', use_segment=use_seg)
# Weekly Growth Accounting
w_ga = tvct.consolidate_all_ga(wau_decorated, 'week',
use_segment = use_seg,
growth_rate_periods = 12,
keep_last_period = False)
tvcl.write_to_google_sheet(w_ga, seg + ' Weekly Growth Accounting', GOOGLE_SPREADSHEET_KEY)
# Weekly Cohorts
wau_cohorts = tvct.create_xau_cohort_df(wau_decorated, 'week', use_segment = use_seg)
tvcl.write_to_google_sheet(wau_cohorts, seg + ' Weekly Cohorts', GOOGLE_SPREADSHEET_KEY)
### Calculate Monthly Growth Accounting and Cohort Analysis based on mau_decorated
# MAU Decorated
mau_decorated = tvct.create_xau_decorated_df(dau_decorated, 'month', use_segment=use_seg)
# Monthly Growth Accounting
m_ga = tvct.consolidate_all_ga(mau_decorated, 'month',
use_segment = use_seg,
growth_rate_periods = 12,
keep_last_period = False)
tvcl.write_to_google_sheet(m_ga, seg + ' Monthly Growth Accounting', GOOGLE_SPREADSHEET_KEY)
# Monthly Cohorts
mau_cohorts = tvct.create_xau_cohort_df(mau_decorated, 'month', use_segment=use_seg)
tvcl.write_to_google_sheet(mau_cohorts, seg + ' Monthly Cohorts', GOOGLE_SPREADSHEET_KEY)
### Calculate the Rolling 28-Day DAU/MAU ratios
rolling_dau_mau = tvct.create_xau_window_df(dau_decorated,
time_period = 'day',
window_days = 28,
breakouts = [2, 4, 8, 12, 16, 20],
use_segment = use_seg,
use_final_day = False)
tvcl.write_to_google_sheet(rolling_dau_mau, seg + ' Rolling DAU/MAU', GOOGLE_SPREADSHEET_KEY)
### Calculate the Rolling 28-Day WAU/MAU ratios
rolling_wau_mau = tvct.create_xau_window_df(dau_decorated,
time_period = 'week',
window_days = 28,
breakouts = [2, 3, 4],
use_segment = use_seg,
use_final_day = False)
tvcl.write_to_google_sheet(rolling_wau_mau, seg + ' Rolling WAU/MAU', GOOGLE_SPREADSHEET_KEY)