@inproceedings{769f444693e64c82b2bd6b966dc5bc81,
title = "Increasing GPU translation reach by leveraging under-utilized on-chip resources",
abstract = "Many GPU applications issue irregular memory accesses to a very large memory footprint. We confirm observations from prior work that these irregular access patterns are severely bottlenecked by insufficient Translation Lookaside Buffer (TLB) reach, resulting in expensive page table walks. In this work, we investigate mechanisms to improve TLB reach without increasing the page size or the size of the TLB itself. Our work is based around the observation that a GPU's instruction cache (I-cache) and Local Data Share (LDS) scratchpad memory are under-utilized in many applications, including those that suffer from poor TLB reach. We leverage this to opportunistically utilize idle capacity and port bandwidth from the GPU's I-cache and LDS structures for address translations. We explore various potential architectural designs for each structure to optimize performance and minimize complexity. Both structures are organized as a victim cache between the L1 and L2 TLBs to boost translation reach. We find that our designs can increase performance on average by 30.1% without impacting the performance of applications that do not require additional reach.",
author = "Kotra, {Jagadish B.} and Michael LeBeane and Kandemir, {Mahmut T.} and Loh, {Gabriel H.}",
note = "Publisher Copyright: {\textcopyright} 2021 Association for Computing Machinery.; 54th Annual IEEE/ACM International Symposium on Microarchitecture, MICRO 2021 ; Conference date: 18-10-2021 Through 22-10-2021",
year = "2021",
month = oct,
day = "18",
doi = "10.1145/3466752.3480105",
language = "English (US)",
series = "Proceedings of the Annual International Symposium on Microarchitecture, MICRO",
publisher = "IEEE Computer Society",
pages = "1169--1181",
booktitle = "MICRO 2021 - 54th Annual IEEE/ACM International Symposium on Microarchitecture, Proceedings",
address = "United States",
}