1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
|
diff --git a/cuda/common/include/pcl/cuda/thrust.h b/cuda/common/include/pcl/cuda/thrust.h
index 57586ab..af073d7 100644
--- a/cuda/common/include/pcl/cuda/thrust.h
+++ b/cuda/common/include/pcl/cuda/thrust.h
@@ -42,6 +42,7 @@
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
+#include <thrust/device_malloc.h>
#include <thrust/copy.h>
#include <thrust/device_ptr.h>
#include <thrust/sequence.h>
diff --git a/gpu/features/src/fpfh.cu b/gpu/features/src/fpfh.cu
index 8d34f76..59a5f0c 100644
--- a/gpu/features/src/fpfh.cu
+++ b/gpu/features/src/fpfh.cu
@@ -231,12 +231,12 @@ namespace pcl
int *sinds = sindices + Warp::WARP_SIZE * warp_idx;
int size = sizes[idx];
- for(int i = lane; __any(i < size); i += Warp::STRIDE)
+ for(int i = lane; __any_sync(0xFFFFFFFF, i < size); i += Warp::STRIDE)
{
if (i < size)
sinds[lane] = ginds[i];
- int inds_num = __popc(__ballot(i < size));
+ int inds_num = __popc(__ballot_sync(0xFFFFFFFF, i < size));
for(int j = 0; j < inds_num; ++j)
{
diff --git a/gpu/octree/src/cuda/approx_nsearch.cu b/gpu/octree/src/cuda/approx_nsearch.cu
index e457255..3e1adfe 100644
--- a/gpu/octree/src/cuda/approx_nsearch.cu
+++ b/gpu/octree/src/cuda/approx_nsearch.cu
@@ -141,7 +141,7 @@ namespace pcl { namespace device { namespace appnearest_search
{
__shared__ volatile int per_warp_buffer[KernelPolicy::WARPS_COUNT];
- int mask = __ballot(node_idx != -1);
+ int mask = __ballot_sync(0xFFFFFFFF, node_idx != -1);
while(mask)
{
@@ -275,7 +275,7 @@ namespace pcl { namespace device { namespace appnearest_search
bool active = query_index < batch.queries_num;
- if (__all(active == false))
+ if (__all_sync(0xFFFFFFFF, active == false))
return;
Warp_appNearestSearch search(batch, query_index);
diff --git a/gpu/octree/src/cuda/knn_search.cu b/gpu/octree/src/cuda/knn_search.cu
index a99655d..b55e3c1 100644
--- a/gpu/octree/src/cuda/knn_search.cu
+++ b/gpu/octree/src/cuda/knn_search.cu
@@ -106,7 +106,7 @@ namespace pcl { namespace device { namespace knn_search
else
query_index = -1;
- while(__any(active))
+ while(__any_sync(0xFFFFFFFF, active))
{
int leaf = -1;
@@ -163,7 +163,7 @@ namespace pcl { namespace device { namespace knn_search
__device__ __forceinline__ void processLeaf(int node_idx)
{
- int mask = __ballot(node_idx != -1);
+ int mask = __ballot_sync(0xFFFFFFFF, node_idx != -1);
unsigned int laneId = Warp::laneId();
unsigned int warpId = Warp::id();
@@ -310,7 +310,7 @@ namespace pcl { namespace device { namespace knn_search
bool active = query_index < batch.queries_num;
- if (__all(active == false))
+ if (__all_sync(0xFFFFFFFF, active == false))
return;
Warp_knnSearch search(batch, query_index);
diff --git a/gpu/octree/src/cuda/radius_search.cu b/gpu/octree/src/cuda/radius_search.cu
index f90273f..8ae84e7 100644
--- a/gpu/octree/src/cuda/radius_search.cu
+++ b/gpu/octree/src/cuda/radius_search.cu
@@ -155,7 +155,7 @@ namespace pcl
else
query_index = -1;
- while(__any(active))
+ while(__any_sync(0xFFFFFFFF, active))
{
int leaf = -1;
@@ -217,7 +217,7 @@ namespace pcl
__device__ __forceinline__ void processLeaf(int leaf)
{
- int mask = __ballot(leaf != -1);
+ int mask = __ballot_sync(0xFFFFFFFF, leaf != -1);
while(mask)
{
@@ -255,7 +255,7 @@ namespace pcl
int *out = batch.output + active_query_index * batch.max_results + active_found_count;
int length_left = batch.max_results - active_found_count;
- int test = __any(active_lane == laneId && (leaf & KernelPolicy::CHECK_FLAG));
+ int test = __any_sync(0xFFFFFFFF, active_lane == laneId && (leaf & KernelPolicy::CHECK_FLAG));
if (test)
{
@@ -329,7 +329,7 @@ namespace pcl
total_new += new_nodes;
out += new_nodes;
- if (__all(idx >= length) || __any(out_of_bounds) || total_new == length_left)
+ if (__all_sync(0xFFFFFFFF, idx >= length) || __any_sync(0xFFFFFFFF, out_of_bounds) || total_new == length_left)
break;
}
return min(total_new, length_left);
@@ -343,7 +343,7 @@ namespace pcl
bool active = query_index < batch.queries.size;
- if (__all(active == false))
+ if (__all_sync(0xFFFFFFFF, active == false))
return;
Warp_radiusSearch<BatchType> search(batch, query_index);
|