blob: 9123806df07563b841cb95b17ce8fc99a62a5d41 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h
index 755e620..85e445b 100644
--- a/Eigen/src/Core/arch/CUDA/Half.h
+++ b/Eigen/src/Core/arch/CUDA/Half.h
@@ -209,7 +209,11 @@ namespace half_impl {
// conversion steps back and forth.
EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
+#if defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
+ return __hadd(::__half(a), ::__half(b));
+#else
return __hadd(a, b);
+#endif
}
EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {
return __hmul(a, b);
@@ -218,9 +222,13 @@ EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
return __hsub(a, b);
}
EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
+#if defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000
+ return __hdiv(a, b);
+#else
float num = __half2float(a);
float denom = __half2float(b);
return __float2half(num / denom);
+#endif
}
EIGEN_STRONG_INLINE __device__ half operator - (const half& a) {
return __hneg(a);
|