From a20afb61007a94f5c28294e9ae20043657152ef6 Mon Sep 17 00:00:00 2001
From: Bernhard Manfred Gruber <bernhardmgruber@gmail.com>
Date: Wed, 15 Oct 2025 01:40:49 +0000
Subject: [PATCH] Allow at::native::offset_t to be offset using `operator+=`
 (#164570)

This will be required by CCCL 3.1.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/164570
Approved by: https://github.com/Skylion007, https://github.com/eqy
---
 aten/src/ATen/native/cuda/SortStable.cu | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/aten/src/ATen/native/cuda/SortStable.cu b/aten/src/ATen/native/cuda/SortStable.cu
index 4d956616371de..8117eeeec558e 100644
--- a/aten/src/ATen/native/cuda/SortStable.cu
+++ b/aten/src/ATen/native/cuda/SortStable.cu
@@ -21,9 +21,15 @@ namespace {
 struct offset_t {
   int stride;
   int begin;
-  __device__ int operator[](int i) {
+  __device__ int operator[](int i) const {
     return stride * (begin + i);
   }
+#if CCCL_VERSION >= 3001000
+  __device__ offset_t& operator+=(int i) {
+    begin += i;
+    return *this;
+  }
+#endif
 };
 // Segmented sort by full sort algorithm:.
 // Say we are sorting a (2, 3) tensor. We have in flattened form:
