dphpc_report/bibl_conf.bib at master · elwin/dphpc_report · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
------------------------------------------------------------------------

@ARTICLE{Article,
        AUTHOR =        {},
        TITLE =         {},
        JOURNAL =       {},
        Volume =        {},
        Number =        {},
        Pages =         {},
        Month =         {},
        YEAR =          {},
        Note =          {},
        summary =       {}
        }
@BOOK{Book,
        AUTHOR =        {},
        editor =        {},
        TITLE =         {},
        PUBLISHER =     {},
        YEAR =          {},
        Volume =        {},
        number =        {},
        Series =        {},
        Address =       {},
        Edition =       {},
        Month =         {},
        Note =          {},
        summary =       {}
        }
@BOOKLET{Booklet,
        TITLE =         {},
        Author =        {},
        Howpublished =  {},
        Address =       {},
        Month =         {},
        Year =          {},
        Note =          {},
        summary =       {}
        }
@INBOOK{Inbook,
        AUTHOR =        {},
        editor =        {},
        TITLE =         {},
        CHAPTER =       {},
        pages =         {},
        PUBLISHER =     {},
        YEAR =          {},
        Volume =        {},
        number =        {},
        Series =        {},
        Type =          {},
        Address =       {},
        Edition =       {},
        Month =         {},
        Note =          {},
        summary =       {}
        }
@INCOLLECTION{Incollection,
        AUTHOR =        {},
        TITLE =         {},
        BOOKTITLE =     {},
        PUBLISHER =     {},
        YEAR =          {},
        Editor =        {},
        Volume =        {},
        number =        {},
        Series =        {},
        Type =          {},
        Chapter =       {},
        Pages =         {},
        Address =       {},
        Edition =       {},
        Month =         {},
        Note =          {},
        summary =       {}
        }
@INPROCEEDINGS{Inproceedings,
        AUTHOR =        {},
        TITLE =         {},
        BOOKTITLE =     {},
        YEAR =          {},
        Editor =        {},
        Volume =        {},
        number =        {},
        Series =        {},
        Pages =         {},
        Address =       {},
        Month =         {},
        Organization =  {},
        Publisher =     {},
        Note =          {},
        summary =       {}
        }
@MANUAL{Manual,
        TITLE =         {},
        Author =        {},
        Organization =  {},
        Address =       {},
        Edition =       {},
        Month =         {},
        Year =          {},
        Note =          {},
        summary =       {}
        }
@MASTERSTHESIS{Mastersthesis,
        AUTHOR =        {},
        TITLE =         {},
        SCHOOL =        {},
        YEAR =          {},
        Type =          {},
        Address =       {},
        Month =         {},
        Note =          {},
        summary =       {}
        }
@MISC{Misc,
        Author =        {},
        Title =         {},
        Howpublished =  {},
        Month =         {},
        Year =          {},
        Note =          {},
        summary =       {}
        }
@PHDTHESIS{Phdthesis,
        AUTHOR =        {},
        TITLE =         {},
        SCHOOL =        {},
        YEAR =          {},
        Type =          {},
        Address =       {},
        Month =         {},
        Note =          {},
        summary =       {}
        }
@PROCEEDINGS{Proceedings,
        TITLE =         {},
        YEAR =          {},
        Editor =        {},
        Volume =        {},
        number =        {},
        Series =        {},
        Address =       {},
        Month =         {},
        Organization =  {},
        Publisher =     {},
        Note =          {},
        summary =       {}
        }
@TECHREPORT{Techreport,
        AUTHOR =        {},
        TITLE =         {},
        INSTITUTION =   {},
        YEAR =          {},
        Type =          {},
        Number =        {},
        Address =       {},
        Month =         {},
        Note =          {},
        summary =       {}
        }
@UNPUBLISHED{Unpublished,
        AUTHOR =        {},
        TITLE =         {},
        NOTE =          {},
        Month =         {},
        Year =          {},
        summary =       {}
        }

------------------------------------------------------------------------

%%%%%%%%%%%%%%%%%%%%%%%%%% CODE

@MISC{openMPI_decision_tree_4_0_2,
  author = {Open MPI Team},
  title = {OpenMPI decision tree for allreduce and allgather, version 4.0.2},
  Howpublished = {\url{https://github.com/open-mpi/ompi/blob/v4.0.2/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c}},
  version = {4.0.2},
  year = {2019}
}

@MISC{openMPI_decision_tree_4_1_2,
  author = {Open MPI Team},
  title = {OpenMPI decision tree for allreduce and allgather, version 4.1.2},
  Howpublished = {\url{https://github.com/open-mpi/ompi/blob/v4.1.2/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c}},
  version = {4.1.2},
  year = {2020}
}

@MISC{openMPI_git,
  author = {Open MPI Team},
  title = {OpenMPI Github},
  Howpublished = {\url{https://github.com/open-mpi/ompi/tree/v4.1.2}},
  version = {4.1.2},
  year = {2021}
}

%%%%%%%%%%%%%%%%%%%%%%%%%% STATISTICS

@inbook{modern_applied_statistics_with_S,
author = {Venables, Bill and Ripley, B},
year = {2002},
month = {01},
pages = {},
title = {Modern Applied Statistics With S},
journal = {Springer},
doi = {10.1007/b97626}
}

%%%%%%%%%%%%%%%%%%%%%%%%%% GENERAL

@BOOK{Higham:98,
        AUTHOR =        {N.J. Higham},
        TITLE =         {Handbook of Writing for Mathematical Sciences},
        PUBLISHER =     {SIAM},
        YEAR =          {1998}
        }

@Book{Strunk:00,
  author = 	 {W. Strunk~Jr. and E.B. White},
  title = 	 {Elements of Style},
  publisher = 	 {Longman},
  year = 	 {2000},
  edition = 	 {4th}
}

@MISC{Pueschel:10,
        Author =        {M.~P\"uschel},
        Title =         {Benchmarking comments},
        Howpublished =  {online:~http://people.inf.ethz.ch/markusp/teaching/263-2300-ETH-spring11/slides/class05.pdf}
        }

@ARTICLE{Bruck:97,
  Author={Bruck, J. and Ching-Tien Ho and Kipnis, S. and Upfal, E. and Weathersby, D.},
  journal={IEEE Transactions on Parallel and Distributed Systems},
  title={Efficient algorithms for all-to-all communications in multiport message-passing systems},
  year={1997},
  volume={8},
  number={11},
  pages={1143-1156},
  doi={10.1109/71.642949}}

@misc{demystifying_hoefler,
      title={Demystifying Parallel and Distributed Deep Learning: An In-Depth Concurrency Analysis},
      author={Tal Ben-Nun and Torsten Hoefler},
      year={2018},
      eprint={1802.09941},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}

@misc{outer_product_structure_NN_derivatives,
      title={The Outer Product Structure of Neural Network Derivatives},
      author={Craig Bakker and Michael J. Henry and Nathan O. Hodas},
      year={2018},
      eprint={1810.03798},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}

@misc{scale_distributed_DL_google,
      title={How to scale distributed deep learning?},
      author={Peter H. Jin and Qiaochu Yuan and Forrest Iandola and Kurt Keutzer},
      year={2016},
      eprint={1611.04581},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}

@misc{blink,
      title={Blink: Fast and Generic Collectives for Distributed ML},
      author={Guanhua Wang and Shivaram Venkataraman and Amar Phanishayee and Jorgen Thelin and Nikhil Devanur and Ion Stoica},
      year={2019},
      eprint={1910.04940},
      archivePrefix={arXiv},
      primaryClass={cs.DC}
}

@InProceedings{improving_collective_operations_in_MPICH,
author="Thakur, Rajeev
and Gropp, William D.",
editor="Dongarra, Jack
and Laforenza, Domenico
and Orlando, Salvatore",
title="Improving the Performance of Collective Operations in MPICH",
booktitle="Recent Advances in Parallel Virtual Machine and Message Passing Interface",
year="2003",
publisher="Springer Berlin Heidelberg",
address="Berlin, Heidelberg",
pages="257--267",
abstract="We report on our work on improving the performance of collective operations in MPICH on clusters connected by switched networks. For each collective operation, we use multiple algorithms depending on the message size, with the goal of minimizing latency for short messages and minimizing bandwidth usage for long messages. Although we have implemented new algorithms for all MPI collective operations, because of limited space we describe only the algorithms for allgather, broadcast, reduce-scatter, and reduce. We present performance results using the SKaMPI benchmark on a Myrinet-connected Linux cluster and an IBM SP. In all cases, the new algorithms significantly outperform the old algorithms used in MPICH on the Myrinet cluster, and, in many cases, they outperform the algorithms used in IBM's MPI on the SP.",
isbn="978-3-540-39924-7"
}

@article{network_performance_aware_collective_communication_operations_in_the_cloud,
  title={Network Performance Aware MPI Collective Communication Operations in the Cloud},
  author={Yifan Gong and Bingsheng He and Jianlong Zhong},
  journal={IEEE Transactions on Parallel and Distributed Systems},
  year={2015},
  volume={26},
  pages={3079-3089}
}

@INPROCEEDINGS{topology_aware_sparse_allreduce,
  author={Nguyen, Truong Thao and Wahib, Mohamed and Takano, Ryousei},
  booktitle={2019 IEEE 38th International Performance Computing and Communications Conference (IPCCC)},
  title={Topology-aware Sparse Allreduce for Large-scale Deep Learning},
  year={2019},
  volume={},
  number={},
  pages={1-8},
  doi={10.1109/IPCCC47392.2019.8958738}}


@misc{how_to_scale_distributed_DL,
      title={How to scale distributed deep learning?},
      author={Peter H. Jin and Qiaochu Yuan and Forrest Iandola and Kurt Keutzer},
      year={2016},
      eprint={1611.04581},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}

@misc{horovod,
      title={Horovod: fast and easy distributed deep learning in TensorFlow},
      author={Alexander Sergeev and Mike Del Balso},
      year={2018},
      eprint={1802.05799},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}

@inproceedings{BigDL,
author = {Dai, Jason Jinquan and Wang, Yiheng and Qiu, Xin and Ding, Ding and Zhang, Yao and Wang, Yanzhang and Jia, Xianyan and Zhang, Cherry Li and Wan, Yan and Li, Zhichao and Wang, Jiao and Huang, Shengsheng and Wu, Zhongyuan and Wang, Yang and Yang, Yuhao and She, Bowen and Shi, Dongjie and Lu, Qi and Huang, Kai and Song, Guoqiong},
title = {BigDL: A Distributed Deep Learning Framework for Big Data},
year = {2019},
isbn = {9781450369732},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3357223.3362707},
doi = {10.1145/3357223.3362707},
abstract = {ThispaperpresentsBigDL (adistributeddeeplearning framework for Apache Spark), which has been used by a variety of users in the industry for building deep learning applications on production big data platforms. It allows deep learning applications to run on the Apache Hadoop/Spark cluster so as to directly process the production data, and as a part of the end-to-end data analysis pipeline for deployment and management. Unlike existing deep learning frameworks, BigDL implements distributed, data parallel training directly on top of the functional compute model (with copy-on-write and coarse-grained operations) of Spark. We also share real-world experience and "war stories" of users that havead-optedBigDLtoaddresstheirchallenges(i.e., howtoeasilybuildend-to-enddataanalysisanddeep learning pipelines for their production data).},
booktitle = {Proceedings of the ACM Symposium on Cloud Computing},
pages = {50–60},
numpages = {11},
keywords = {distributed deep learning, big data, end-to-end data pipeline, Apache Spark},
location = {Santa Cruz, CA, USA},
series = {SoCC '19}
}

@misc{distributed_TF_with_MPI,
      title={Distributed TensorFlow with MPI},
      author={Abhinav Vishnu and Charles Siegel and Jeffrey Daily},
      year={2017},
      eprint={1603.02339},
      archivePrefix={arXiv},
      primaryClass={cs.DC}
}

@misc{ma2016theano_MPI,
      title={Theano-MPI: a Theano-based Distributed Training Framework},
      author={He Ma and Fei Mao and Graham W. Taylor},
      year={2016},
      eprint={1605.08325},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}

@misc{natural_compression_for_distributed_DL,
      title={Natural Compression for Distributed Deep Learning},
      author={Samuel Horvath and Chen-Yu Ho and Ludovit Horvath and Atal Narayan Sahu and Marco Canini and Peter Richtarik},
      year={2020},
      eprint={1905.10988},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}


@misc{distributed_ML_for_computational_engineering_using_MPI,
      title={Distributed Machine Learning for Computational Engineering using MPI},
      author={Kailai Xu and Weiqiang Zhu and Eric Darve},
      year={2020},
      eprint={2011.01349},
      archivePrefix={arXiv},
      primaryClass={cs.DC}
}

@article{Rabenseifner:05,
author = {Thakur, Rajeev and Rabenseifner, Rolf and Gropp, William},
title = {Optimization of Collective Communication Operations in MPICH},
year = {2005},
issue_date = {February  2005},
publisher = {Sage Publications, Inc.},
address = {USA},
volume = {19},
number = {1},
issn = {1094-3420},
url = {https://doi.org/10.1177/1094342005051521},
doi = {10.1177/1094342005051521},
abstract = {We describe our work on improving the performance of collective communication operations in MPICH for clusters connected by switched networks. For each collective operation, we use multiple algorithms depending on the message size, with the goal of minimizing latency for short messages and minimizing bandwidth use for long messages. Although we have implemented new algorithms for all MPI Message Passing Interface collective operations, because of limited space we describe only the algorithms for allgather, broadcast, all-to-all, reduce-scatter, reduce, and allreduce. Performance results on a Myrinet-connected Linux cluster and an IBM SP indicate that, in all cases, the new algorithms significantly outperform the old algorithms used in MPICH on the Myrinet cluster, and, in many cases, they outperform the algorithms used in IBM's MPI on the SP. We also explore in further detail the optimization of two of the most commonly used collective operations, allreduce and reduce, particularly for long messages and nonpower-of-two numbers of processes. The optimized algorithms for these operations perform several times better than the native algorithms on a Myrinet cluster, IBM SP, and Cray T3E. Our results indicate that to achieve the best performance for a collective communication operation, one needs to use a number of different algorithms and select the right algorithm for a particular message size and number of processes.},
journal = {Int. J. High Perform. Comput. Appl.},
month = {feb},
pages = {49–66},
numpages = {18},
keywords = {reduction, Collective communication, message passing, MPI}
}

% Neighborexchange
@INPROCEEDINGS{Chen:05,
  author={Jing Chen and Linbo Zhang and Yunquan Zhang and Wei Yuan},
  booktitle={Eighth International Conference on High-Performance Computing in Asia-Pacific Region (HPCASIA'05)},
  title={Performance evaluation of Allgather algorithms on terascale Linux cluster with fast Ethernet},
  year={2005},
  volume={},
  number={},
  pages={6 pp.-442},
  doi={10.1109/HPCASIA.2005.75}
}

@misc{narang2017exploring,
      title={Exploring Sparsity in Recurrent Neural Networks},
      author={Sharan Narang and Erich Elsen and Gregory Diamos and Shubho Sengupta},
      year={2017},
      eprint={1704.05119},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}