File tree Expand file tree Collapse file tree
GPU/GPUTracking/TPCClusterFinder Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -86,10 +86,13 @@ class LinearLayout
8686template <tpccf::SizeT S>
8787struct GridSize ;
8888
89+ // GridSize for 1 byte and 2 byte elements are adjusted for 128 byte cachelines,
90+ // as these are prevelant on modern GPUs.
91+
8992template <>
9093struct GridSize <1 > {
9194 enum {
92- Width = 8 ,
95+ Width = 16 ,
9396 Height = 8 ,
9497 };
9598};
@@ -98,10 +101,13 @@ template <>
98101struct GridSize <2 > {
99102 enum {
100103 Width = 8 ,
101- Height = 4 ,
104+ Height = 8 ,
102105 };
103106};
104107
108+ // GridSize for 4 bytes is only used for MC indexing on CPU.
109+ // So assume 64 byte cachelines here instead.
110+
105111template <>
106112struct GridSize <4 > {
107113 enum {
Original file line number Diff line number Diff line change @@ -53,7 +53,6 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate
5353 TimebinsPerCacheline = TPCMapMemoryLayout<uint16_t >::Height,
5454 EntriesPerCacheline = PadsPerCacheline * TimebinsPerCacheline,
5555 NumOfCachedPads = GPUCA_WARP_SIZE / TimebinsPerCacheline,
56- NumCLsPerWarp = GPUCA_WARP_SIZE / EntriesPerCacheline,
5756 NumOfCachedTBs = TimebinsPerCacheline * 8 ,
5857 // Threads index shared memory as [iThread / MaxNPadsPerRow][iThread % MaxNPadsPerRow].
5958 // Rounding up to a multiple of PadsPerCacheline ensures iThread / MaxNPadsPerRow < NumOfCachedTBs
You can’t perform that action at this time.
0 commit comments