Skip to content

Commit bb9f0cc

Browse files
committed
GPU/TPC: Increace assumed cacheline size to 128 byte in cluster finder
1 parent 74675ef commit bb9f0cc

2 files changed

Lines changed: 8 additions & 3 deletions

File tree

GPU/GPUTracking/TPCClusterFinder/CfArray2D.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,13 @@ class LinearLayout
8686
template <tpccf::SizeT S>
8787
struct GridSize;
8888

89+
// GridSize for 1 byte and 2 byte elements are adjusted for 128 byte cachelines,
90+
// as these are prevelant on modern GPUs.
91+
8992
template <>
9093
struct GridSize<1> {
9194
enum {
92-
Width = 8,
95+
Width = 16,
9396
Height = 8,
9497
};
9598
};
@@ -98,10 +101,13 @@ template <>
98101
struct GridSize<2> {
99102
enum {
100103
Width = 8,
101-
Height = 4,
104+
Height = 8,
102105
};
103106
};
104107

108+
// GridSize for 4 bytes is only used for MC indexing on CPU.
109+
// So assume 64 byte cachelines here instead.
110+
105111
template <>
106112
struct GridSize<4> {
107113
enum {

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate
5353
TimebinsPerCacheline = TPCMapMemoryLayout<uint16_t>::Height,
5454
EntriesPerCacheline = PadsPerCacheline * TimebinsPerCacheline,
5555
NumOfCachedPads = GPUCA_WARP_SIZE / TimebinsPerCacheline,
56-
NumCLsPerWarp = GPUCA_WARP_SIZE / EntriesPerCacheline,
5756
NumOfCachedTBs = TimebinsPerCacheline * 8,
5857
// Threads index shared memory as [iThread / MaxNPadsPerRow][iThread % MaxNPadsPerRow].
5958
// Rounding up to a multiple of PadsPerCacheline ensures iThread / MaxNPadsPerRow < NumOfCachedTBs

0 commit comments

Comments
 (0)