Skip to content

Commit 87513e0

Browse files
committed
Fix sort calls from harris and orb in CUDA
1 parent 363e86e commit 87513e0

3 files changed

Lines changed: 14 additions & 8 deletions

File tree

src/backend/cuda/kernel/harris.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "convolve.hpp"
2020
#include "gradient.hpp"
2121
#include "sort_by_key.hpp"
22+
#include "range.hpp"
2223

2324
namespace cuda
2425
{
@@ -336,7 +337,9 @@ void harris(unsigned* corners_out,
336337

337338
int sort_elem = harris_responses.strides[3] * harris_responses.dims[3];
338339
harris_responses.ptr = d_resp_corners;
340+
// Create indices using range
339341
harris_idx.ptr = memAlloc<unsigned>(sort_elem);
342+
kernel::range<uint>(harris_idx, 0);
340343

341344
// Sort Harris responses
342345
sort0ByKey<float, uint, false>(harris_responses, harris_idx);

src/backend/cuda/kernel/orb.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "convolve.hpp"
1818
#include "orb_patch.hpp"
1919
#include "sort_by_key.hpp"
20+
#include "range.hpp"
2021

2122
#include <boost/scoped_ptr.hpp>
2223

@@ -394,10 +395,12 @@ void orb(unsigned* out_feat,
394395

395396
int sort_elem = harris_sorted.strides[3] * harris_sorted.dims[3];
396397
harris_sorted.ptr = d_score_harris;
398+
// Create indices using range
397399
harris_idx.ptr = memAlloc<unsigned>(sort_elem);
400+
kernel::range<uint>(harris_idx, 0);
398401

399402
// Sort features according to Harris responses
400-
sort0ByKey<float, uint, false>(harris_sorted, harris_idx);
403+
kernel::sort0ByKey<float, uint, false>(harris_sorted, harris_idx);
401404

402405
feat_pyr[i] = std::min(feat_pyr[i], lvl_best[i]);
403406

src/backend/cuda/kernel/range.hpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ namespace cuda
1818
namespace kernel
1919
{
2020
// Kernel Launch Config Values
21-
static const unsigned TX = 32;
22-
static const unsigned TY = 8;
23-
static const unsigned TILEX = 512;
24-
static const unsigned TILEY = 32;
21+
static const unsigned RANGE_TX = 32;
22+
static const unsigned RANGE_TY = 8;
23+
static const unsigned RANGE_TILEX = 512;
24+
static const unsigned RANGE_TILEY = 32;
2525

2626
template<typename T>
2727
__global__
@@ -74,10 +74,10 @@ namespace cuda
7474
template<typename T>
7575
void range(Param<T> out, const int dim)
7676
{
77-
dim3 threads(TX, TY, 1);
77+
dim3 threads(RANGE_TX, RANGE_TY, 1);
7878

79-
int blocksPerMatX = divup(out.dims[0], TILEX);
80-
int blocksPerMatY = divup(out.dims[1], TILEY);
79+
int blocksPerMatX = divup(out.dims[0], RANGE_TILEX);
80+
int blocksPerMatY = divup(out.dims[1], RANGE_TILEY);
8181
dim3 blocks(blocksPerMatX * out.dims[2],
8282
blocksPerMatY * out.dims[3],
8383
1);

0 commit comments

Comments
 (0)