-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathmain.py
More file actions
120 lines (96 loc) · 4.54 KB
/
main.py
File metadata and controls
120 lines (96 loc) · 4.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import os
import shutil
import networkx as nx
import numpy as np
import pandas as pd
import spektral
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold, train_test_split
from src.model import perCLTV
##############################
seed_value = 2023
lr = 0.0001
epochs = 500
beta1 = 0.5
beta2 = 0.5
timestep = 10
maxlen = 64
##############################
def data_process(timestep=10, maxlen=64):
df_S = pd.read_csv('./data/sample_data_individual_behavior.csv')
df_G = pd.read_csv('./data/sample_data_social_behavior.csv')
df_Y = pd.read_csv('./data/sample_data_label.csv')
churn_behavior_set = list(map(str, [4, 5, 7, 8, 13, 14, 16, 20, 21, 24, 29,
30, 34, 36, 40, 45, 49, 50, 52, 54, 55, 64, 68, 70, 73, 74, 76, 85, 87, 89]))
payment_behavior_set = list(
map(str, [1, 5, 25, 26, 29, 35, 44, 46, 48, 52, 55, 56, 70, 78, 81]))
B = df_S['seq'].apply(lambda x: x.split(
',') if pd.notna(x) else []).tolist()
C = [list([xx for xx in x if xx in churn_behavior_set]) for x in B]
P = [list([xx for xx in x if xx in payment_behavior_set]) for x in B]
B = tf.keras.preprocessing.sequence.pad_sequences(sequences=B,
maxlen=maxlen,
padding='post')
C = tf.keras.preprocessing.sequence.pad_sequences(sequences=C,
maxlen=maxlen,
padding='post')
P = tf.keras.preprocessing.sequence.pad_sequences(sequences=P,
maxlen=maxlen,
padding='post')
B = B.reshape(-1, timestep, maxlen)
C = C.reshape(-1, timestep, maxlen)
P = P.reshape(-1, timestep, maxlen)
G = nx.from_pandas_edgelist(df=df_G,
source='src_uid',
target='dst_uid',
edge_attr=['weight'])
A = nx.adjacency_matrix(G)
A = spektral.layers.GATConv.preprocess(A).astype('f4')
y1 = df_Y['churn_label'].values.reshape(-1, 1)
y2 = np.log(df_Y['payment_label'].values + 1).reshape(-1, 1)
print('B:', B.shape)
print('C:', C.shape)
print('P:', P.shape)
print('G:', A.shape)
print('y1:', y1.shape, 'y2:', y2.shape)
return B, C, P, A, y1, y2
B, C, P, A, y1, y2 = data_process(timestep=timestep, maxlen=maxlen)
N = A.shape[0]
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed_value)
for train_index, test_index in kfold.split(B, y1):
train_index, val_index = train_test_split(
train_index, test_size=0.1, random_state=seed_value)
mask_train = np.zeros(N, dtype=bool)
mask_val = np.zeros(N, dtype=bool)
mask_test = np.zeros(N, dtype=bool)
mask_train[train_index] = True
mask_val[val_index] = True
mask_test[test_index] = True
checkpoint_path = './model/checkpoint-{epoch:04d}.ckpt'
checkpoint_dir = os.path.dirname(checkpoint_path)
if os.path.exists(checkpoint_dir):
shutil.rmtree(checkpoint_dir)
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
patience=5,
mode='min')
best_checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
monitor='val_loss',
verbose=1,
save_best_only=True,
save_weights_only=True,
mode='auto')
model = perCLTV(timestep=timestep, behavior_maxlen=maxlen)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
loss={'output_1': tf.keras.losses.BinaryCrossentropy(),
'output_2': tf.keras.losses.MeanSquaredError()},
loss_weights={'output_1': beta1, 'output_2': beta2},
metrics={'output_1': tf.keras.metrics.AUC(),
'output_2': 'mae'})
model.fit([B, C, P, A], [y1, y2],
validation_data=([B, C, P, A], [y1, y2], mask_val),
sample_weight=mask_train,
batch_size=N,
epochs=epochs,
shuffle=False,
callbacks=[early_stopping, best_checkpoint],
verbose=1)