Skip to content

Commit b13228f

Browse files
committed
[libcu++] Optimize to_chars integral width calculation
1 parent f74dd50 commit b13228f

1 file changed

Lines changed: 52 additions & 1 deletion

File tree

libcudacxx/include/cuda/std/__charconv/to_chars.h

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,11 @@
2020
# pragma system_header
2121
#endif // no system header
2222

23+
#include <cuda/__cmath/ceil_div.h>
24+
#include <cuda/__cmath/ilog.h>
25+
#include <cuda/__cmath/pow2.h>
2326
#include <cuda/__cmath/uabs.h>
27+
#include <cuda/std/__bit/countl.h>
2428
#include <cuda/std/__charconv/chars_format.h>
2529
#include <cuda/std/__charconv/to_chars_result.h>
2630
#include <cuda/std/__concepts/concept_macros.h>
@@ -32,6 +36,7 @@
3236
#include <cuda/std/__type_traits/is_same.h>
3337
#include <cuda/std/__type_traits/is_signed.h>
3438
#include <cuda/std/__type_traits/make_unsigned.h>
39+
#include <cuda/std/__type_traits/num_bits.h>
3540
#include <cuda/std/cstdint>
3641

3742
#include <cuda/std/__cccl/prologue.h>
@@ -87,6 +92,27 @@ template <class _Tp>
8792
return __r;
8893
}
8994

95+
template <int _Base, class _Tp>
96+
[[nodiscard]] _CCCL_API constexpr int __to_chars_int_width(_Tp __v) noexcept
97+
{
98+
if constexpr (::cuda::is_power_of_two(_Base))
99+
{
100+
// For bases that are powers of 2, we can count leading zeros to compute the width more efficiently.
101+
constexpr auto __base_ilog2 = ::cuda::ilog2(_Base);
102+
103+
// If value == 0 still need one digit, so we always set the least significant bit.
104+
return ::cuda::ceil_div(__num_bits_v<_Tp> - ::cuda::std::countl_zero(static_cast<_Tp>(__v | 1)), __base_ilog2);
105+
}
106+
else if constexpr (_Base == 10)
107+
{
108+
return (__v > 1) ? ::cuda::ceil_ilog10(__v) : 1;
109+
}
110+
else
111+
{
112+
return ::cuda::std::__to_chars_int_width(__v, _Base);
113+
}
114+
}
115+
90116
template <class _Tp>
91117
_CCCL_API constexpr void __to_chars_int_generic(char* __last, _Tp __value, int __base) noexcept
92118
{
@@ -117,7 +143,32 @@ to_chars(char* __first, char* __last, _Tp __value, int __base = 10) noexcept
117143
else
118144
{
119145
const ptrdiff_t __cap = __last - __first;
120-
const int __n = ::cuda::std::__to_chars_int_width(__value, __base);
146+
147+
int __n{};
148+
switch (__base)
149+
{
150+
case 2:
151+
__n = ::cuda::std::__to_chars_int_width<2>(__value);
152+
break;
153+
case 4:
154+
__n = ::cuda::std::__to_chars_int_width<4>(__value);
155+
break;
156+
case 8:
157+
__n = ::cuda::std::__to_chars_int_width<8>(__value);
158+
break;
159+
case 10:
160+
__n = ::cuda::std::__to_chars_int_width<10>(__value);
161+
break;
162+
case 16:
163+
__n = ::cuda::std::__to_chars_int_width<16>(__value);
164+
break;
165+
case 32:
166+
__n = ::cuda::std::__to_chars_int_width<32>(__value);
167+
break;
168+
default:
169+
__n = ::cuda::std::__to_chars_int_width(__value, __base);
170+
break;
171+
}
121172

122173
if (__n > __cap)
123174
{

0 commit comments

Comments
 (0)