We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 363e86e commit 87513e0Copy full SHA for 87513e0
3 files changed
src/backend/cuda/kernel/harris.hpp
@@ -19,6 +19,7 @@
19
#include "convolve.hpp"
20
#include "gradient.hpp"
21
#include "sort_by_key.hpp"
22
+#include "range.hpp"
23
24
namespace cuda
25
{
@@ -336,7 +337,9 @@ void harris(unsigned* corners_out,
336
337
338
int sort_elem = harris_responses.strides[3] * harris_responses.dims[3];
339
harris_responses.ptr = d_resp_corners;
340
+ // Create indices using range
341
harris_idx.ptr = memAlloc<unsigned>(sort_elem);
342
+ kernel::range<uint>(harris_idx, 0);
343
344
// Sort Harris responses
345
sort0ByKey<float, uint, false>(harris_responses, harris_idx);
src/backend/cuda/kernel/orb.hpp
@@ -17,6 +17,7 @@
17
18
#include "orb_patch.hpp"
#include <boost/scoped_ptr.hpp>
@@ -394,10 +395,12 @@ void orb(unsigned* out_feat,
394
395
396
int sort_elem = harris_sorted.strides[3] * harris_sorted.dims[3];
397
harris_sorted.ptr = d_score_harris;
398
399
400
401
402
// Sort features according to Harris responses
- sort0ByKey<float, uint, false>(harris_sorted, harris_idx);
403
+ kernel::sort0ByKey<float, uint, false>(harris_sorted, harris_idx);
404
405
feat_pyr[i] = std::min(feat_pyr[i], lvl_best[i]);
406
src/backend/cuda/kernel/range.hpp
@@ -18,10 +18,10 @@ namespace cuda
namespace kernel
// Kernel Launch Config Values
- static const unsigned TX = 32;
- static const unsigned TY = 8;
- static const unsigned TILEX = 512;
- static const unsigned TILEY = 32;
+ static const unsigned RANGE_TX = 32;
+ static const unsigned RANGE_TY = 8;
+ static const unsigned RANGE_TILEX = 512;
+ static const unsigned RANGE_TILEY = 32;
26
template<typename T>
27
__global__
@@ -74,10 +74,10 @@ namespace cuda
74
75
void range(Param<T> out, const int dim)
76
77
- dim3 threads(TX, TY, 1);
+ dim3 threads(RANGE_TX, RANGE_TY, 1);
78
79
- int blocksPerMatX = divup(out.dims[0], TILEX);
80
- int blocksPerMatY = divup(out.dims[1], TILEY);
+ int blocksPerMatX = divup(out.dims[0], RANGE_TILEX);
+ int blocksPerMatY = divup(out.dims[1], RANGE_TILEY);
81
dim3 blocks(blocksPerMatX * out.dims[2],
82
blocksPerMatY * out.dims[3],
83
1);
0 commit comments