@inbook{d721c1d7fe604b77acc8c3633a79c3e4,
title = "Analyzing the I/O Patterns of Deep Learning Applications",
abstract = "A traditional HPC storage system is designed to manage an I/O workload dominated by write operation bursts, mainly for applications carrying out simulations and checkpointing partial results. Currently, this context is more diverse because of artificial intelligence applications{\textquoteright} workload, such as machine learning and deep learning. As ML/DL applications are becoming more compute-intensive, they require the power of HPC systems. However, the HPC I/O system could be a bottleneck to scaling these kind of applications, mainly in the training stage. In this paper, we present a methodology for analyzing the I/O patterns of deep learning applications that allows us to understand the DL applications{\textquoteright} I/O in HPC systems. We have applied our approach to serial and distributed DL codes by using the TensorFlow2 and PyTorch framework for the MNIST and CIFAR-10 datasets.",
keywords = "Deep learning, Distributed DL, I/O HPC, I/O Patterns",
author = "Edixon P{\'a}rraga and Betzabeth Le{\'o}n and Rom{\'a}n Bond and Diego Encinas and Aprigio Bezerra and Sandra Mendez and Dolores Rexachs and Emilio Luque",
note = "Publisher Copyright: {\textcopyright} 2021, Springer Nature Switzerland AG.",
year = "2021",
month = aug,
day = "16",
doi = "10.1007/978-3-030-84825-5_1",
language = "English",
isbn = "9783030848248",
series = "Communications in Computer and Information Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "3--16",
editor = "Marcelo Naiouf and Enzo Rucci and Franco Chichizola and {De Giusti}, Laura",
booktitle = "Cloud Computing, Big Data and Emerging Topics - 9th Conference, JCC-BDandET 2021, Proceedings",
}