diff --git a/ClickThroughRate/WideDeepLearning/n1_benchmark.sh b/ClickThroughRate/WideDeepLearning/n1_benchmark.sh new file mode 100644 index 0000000..c8a055e --- /dev/null +++ b/ClickThroughRate/WideDeepLearning/n1_benchmark.sh @@ -0,0 +1,25 @@ +DEVICE_NUM_PER_NODE=1 +DATA_ROOT=/dataset/wdl_ofrecord/ofrecord +EMBD_SIZE=2322444 +BATHSIZE=2048 + +python3 wdl_train_eval.py \ + --learning_rate=0.001 \ + --batch_size=$BATHSIZE \ + --train_data_dir $DATA_ROOT/train \ + --loss_print_every_n_iter=100 \ + --eval_interval=0 \ + --deep_dropout_rate=0.5 \ + --max_iter=310 \ + --hidden_units_num=7\ + --hidden_size=1024 \ + --wide_vocab_size=$EMBD_SIZE \ + --deep_vocab_size=$EMBD_SIZE \ + --train_data_part_num 256 \ + --train_part_name_suffix_length=5 \ + --eval_data_dir $DATA_ROOT/val \ + --eval_data_part_num 256 \ + --eval_part_name_suffix_length=5 \ + --gpu_num_per_node $DEVICE_NUM_PER_NODE \ + --num_dataloader_thread_per_gpu 1 \ + --node_ips '127.0.0.1' \ diff --git a/ClickThroughRate/WideDeepLearning/wdl_train_eval.py b/ClickThroughRate/WideDeepLearning/wdl_train_eval.py index 964b3ba..cca6d9c 100644 --- a/ClickThroughRate/WideDeepLearning/wdl_train_eval.py +++ b/ClickThroughRate/WideDeepLearning/wdl_train_eval.py @@ -186,6 +186,7 @@ def _model(dense_fields, wide_sparse_fields, deep_sparse_fields): global_loss = 0.0 +time_begin = 0.0 def _create_train_callback(step): def nop(loss): global global_loss @@ -194,9 +195,15 @@ def nop(loss): def print_loss(loss): global global_loss + global time_begin global_loss += loss.mean() - print(step+1, 'time', time.time(), 'loss', global_loss/FLAGS.loss_print_every_n_iter) + time_end = time.time() + print("%d/%d, time: %.6f, latency(ms): %.14f, loss: %.16f" % (step + 1, FLAGS.max_iter, time_end, + (time_end - time_begin) * 1000 / FLAGS.loss_print_every_n_iter, + global_loss / FLAGS.loss_print_every_n_iter) + ) global_loss = 0.0 + time_begin = time.time() if (step + 1) % FLAGS.loss_print_every_n_iter == 0: return print_loss @@ -212,7 +219,7 @@ def CreateOptimizer(args): def _get_train_conf(): train_conf = flow.FunctionConfig() train_conf.default_data_type(flow.float) - train_conf.indexed_slices_optimizer_conf(dict(include_op_names=dict(op_name=['wide_embedding', 'deep_embedding']))) + # train_conf.indexed_slices_optimizer_conf(dict(include_op_names=dict(op_name=['wide_embedding', 'deep_embedding']))) return train_conf @@ -279,9 +286,11 @@ def main(): #flow.config.collective_boxing.enable_fusion(False) check_point = flow.train.CheckPoint() check_point.init() + global time_begin + time_begin = time.time() for i in range(FLAGS.max_iter): train_job().async_get(_create_train_callback(i)) - if (i + 1 ) % FLAGS.eval_interval == 0: + if FLAGS.eval_interval > 0 and (i + 1 ) % FLAGS.eval_interval == 0: labels = np.array([[0]]) preds = np.array([[0]]) cur_time = time.time()