@article{fa05240a266742a9ba8f3d19b05dff31,
title = "Particle simulation on the Cell BE architecture",
abstract = "This paper presents two parallel formulations for the Barnes-Hut algorithm on the Cell architecture, which differ in tree distribution and construction phases of the algorithm. In the initial parallelization, the domains are dynamically partitioned and assigned to the synergistic processing elements (SPEs), and SPEs construct local trees of the sub-domains in parallel. The enhanced parallelization scheme provides better clustering of the particles by sequentially constructing the global tree of the entire work space in the power processing element (PPE) and by partitioning the tree into sub-trees that can fit in the Local Store. SPEs operate on the sub-tree data and construct local trees in parallel. Our experimental evaluation indicates that this application performs much faster on the Cell BE compared to the Intel Xeon based system. Specifically, our first and second methods on the Cell BE outperform Intel Xeon by a factor of 5.8 and 7.1 for 8192 particles, respectively.",
author = "Betul Demiroz and Topcuoglu, \{Haluk R.\} and Mahmut Kandemir and Oguz Tosun",
note = "Funding Information: Mahmut Kandemir is a professor in the Computer Science and En- gineering Department at the Penn- sylvania State University. He is a member of the Microsystems De- sign Lab.Dr. Kandemir{\textquoteright}s research interests are in optimizing com- pilers, runtime systems, embedded systems, I/O and high performance storage, and power-aware comput- ing. He is the author of more than 80 journal publications and over 300 conference/workshop papers in these areas. He has graduated 11 Ph.D. and 8 masters students so far, and is currently supervising 15 Ph.D. students and 1 masters student. He has served in the program committees of 40 conferences and workshops. His research is funded by NSF, DARPA, and SRC. He is a recipient of NSF Career Award and the Penn State Engineering Society Outstanding Research Award. He currently serves as the Graduate Coordinator of the Computer Science and Engineering Department at Penn State. Funding Information: Acknowledgements This research was supported by The Scientific and Technological Research Council of Turkey (TUBITAK) with a research grant (Project Number: 108E035). Additionally, part of this research has been funded by Bogazici University Research Fund— 08HA101D. Dr. Mahmut Kandemir is supported in part by NSF grants 1017882, 0963839, 720645 and a grant from Microsoft. The authors acknowledge Georgia Institute of Technology, its Sony-Toshiba-IBM Center of Competence, and the National Science Foundation, for the use of Cell Broadband Engine resources that have contributed to this research. A preliminary work regarding this research [28] was presented at the Workshop on Programmability Issues for Multicore Computers (MULTIPROG) at the HIPEAC 2010 Conference, Pisa, Italy.",
year = "2011",
month = dec,
doi = "10.1007/s10586-011-0169-4",
language = "English (US)",
volume = "14",
pages = "419--432",
journal = "Cluster Computing",
issn = "1386-7857",
publisher = "Springer",
number = "4",
}