Skip to content

Commit c8552fa

Browse files
author
pradeep
committed
Remove cuda_thrust_sort_by_key static dependency
Instead of creating a static library out of all separate instantiations of thrust_sort_by_key sources, we now directly embed sources generated(using cmake's configure_file command) into afcuda target. This also fixed separable compilation. Prior to this change, separate compilation failed (related to cuda device linking - undefined references). I tried to fix that problem, but couldn't get a break through. However, I realized that just directly using the generated sources with afcuda target will do the job without any additional static library.
1 parent 1196646 commit c8552fa

3 files changed

Lines changed: 32 additions & 37 deletions

File tree

src/backend/cuda/CMakeLists.txt

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ cuda_select_nvcc_arch_flags(cuda_architecture_flags ${CUDA_architecture_build_ta
3939
message(STATUS "CUDA_architecture_build_targets: ${CUDA_architecture_build_targets}")
4040

4141
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};${cuda_architecture_flags})
42+
4243
if(${CUDA_SEPARABLE_COMPILATION})
4344
# Enable relocatable device code generation for separable
4445
# compilation which is in turn required for any device linking done.
@@ -245,6 +246,7 @@ include(kernel/scan_by_key/CMakeLists.txt)
245246
include(kernel/thrust_sort_by_key/CMakeLists.txt)
246247

247248
cuda_add_library(afcuda
249+
${thrust_sort_sources}
248250
sort.hpp
249251

250252
all.cu
@@ -551,11 +553,18 @@ cuda_add_library(afcuda
551553

552554
${scan_by_key_sources}
553555

554-
OPTIONS ${platform_flags} ${cuda_cxx_flags} -Xcudafe \"--diag_suppress=1427\"
556+
OPTIONS
557+
${platform_flags}
558+
${cuda_cxx_flags}
559+
-Xcudafe \"--diag_suppress=1427\"
555560
)
556561
557562
arrayfire_set_default_cxx_flags(afcuda)
558563
564+
# NOTE: Do not add additional CUDA specific definitions here. Add it to the
565+
# cxx_definitions variable above. cxx_definitions is used to propigate
566+
# definitions to the scan_by_key and thrust_sort_by_key targets as well as the
567+
# cuda library above.
559568
target_compile_options(afcuda PRIVATE ${cxx_definitions})
560569
561570
add_library(ArrayFire::afcuda ALIAS afcuda)
@@ -594,7 +603,6 @@ target_link_libraries(afcuda
594603
c_api_interface
595604
cpp_api_interface
596605
afcommon_interface
597-
cuda_thrust_sort_by_key
598606
${CUDA_nvrtc_LIBRARY}
599607
${CUDA_CUBLAS_LIBRARIES}
600608
${CUDA_CUFFT_LIBRARIES}
Lines changed: 18 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
# Copyright (c) 2017, ArrayFire
1+
# Copyright (c) 2020, ArrayFire
22
# All rights reserved.
33
#
44
# This file is distributed under 3-clause BSD license.
55
# The complete license agreement can be obtained at:
66
# http://arrayfire.com/licenses/BSD-3-Clause
77

8-
file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/kernel/thrust_sort_by_key/thrust_sort_by_key_impl.cu" FILESTRINGS)
8+
file(STRINGS
9+
"${CMAKE_CURRENT_SOURCE_DIR}/kernel/thrust_sort_by_key/thrust_sort_by_key_impl.cu"
10+
FILESTRINGS)
911

1012
foreach(STR ${FILESTRINGS})
1113
if(${STR} MATCHES "// SBK_TYPES")
@@ -18,35 +20,18 @@ foreach(STR ${FILESTRINGS})
1820
endforeach()
1921

2022
foreach(SBK_TYPE ${SBK_TYPES})
21-
foreach(SBK_INST ${SBK_INSTS})
22-
23-
# When using cuda_compile with older versions of FindCUDA. The generated targets
24-
# have the same names as the source file. Since we are using the same file for
25-
# the compilation of these targets we need to rename them before sending them
26-
# to the cuda_compile command so that it doesn't generate multiple targets with
27-
# the same name
28-
file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/kernel/thrust_sort_by_key/thrust_sort_by_key_impl.cu"
29-
DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/kernel/thrust_sort_by_key")
30-
file(RENAME "${CMAKE_CURRENT_BINARY_DIR}/kernel/thrust_sort_by_key/thrust_sort_by_key_impl.cu"
31-
"${CMAKE_CURRENT_BINARY_DIR}/kernel/thrust_sort_by_key/thrust_sort_by_key_impl_${SBK_TYPE}_${SBK_INST}.cu")
32-
33-
cuda_compile(sort_by_key_gen_files
34-
${CMAKE_CURRENT_BINARY_DIR}/kernel/thrust_sort_by_key/thrust_sort_by_key_impl_${SBK_TYPE}_${SBK_INST}.cu
35-
${CMAKE_CURRENT_SOURCE_DIR}/kernel/thrust_sort_by_key_impl.hpp
36-
OPTIONS
37-
-DSBK_TYPE=${SBK_TYPE}
38-
-DINSTANTIATESBK_INST=INSTANTIATE${SBK_INST}
39-
"${platform_flags} ${cuda_cxx_flags} -DAFDLL"
40-
)
41-
42-
list(APPEND SORT_OBJ ${sort_by_key_gen_files})
43-
endforeach(SBK_INST ${SBK_INSTS})
23+
foreach(SBK_INST ${SBK_INSTS})
24+
set(INSTANTIATESBK_INST "INSTANTIATE${SBK_INST}")
25+
26+
configure_file(
27+
"${CMAKE_CURRENT_SOURCE_DIR}/kernel/thrust_sort_by_key/thrust_sort_by_key_impl.cu"
28+
"${CMAKE_CURRENT_BINARY_DIR}/kernel/thrust_sort_by_key/thrust_sort_by_key_impl_${SBK_TYPE}_${SBK_INST}.cu"
29+
)
30+
31+
list(
32+
APPEND
33+
thrust_sort_sources
34+
"${CMAKE_CURRENT_BINARY_DIR}/kernel/thrust_sort_by_key/thrust_sort_by_key_impl_${SBK_TYPE}_${SBK_INST}.cu"
35+
)
36+
endforeach(SBK_INST ${SBK_INSTS})
4437
endforeach(SBK_TYPE ${SBK_TYPES})
45-
46-
cuda_add_library(cuda_thrust_sort_by_key STATIC ${SORT_OBJ})
47-
48-
set_target_properties(cuda_thrust_sort_by_key
49-
PROPERTIES
50-
LINKER_LANGUAGE CXX
51-
FOLDER "Generated Targets"
52-
)

src/backend/cuda/kernel/thrust_sort_by_key/thrust_sort_by_key_impl.cu

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616

1717
namespace cuda {
1818
namespace kernel {
19-
INSTANTIATESBK_INST(SBK_TYPE)
20-
}
19+
// clang-format off
20+
@INSTANTIATESBK_INST@ ( @SBK_TYPE@ )
21+
// clang-format on
22+
} // namespace kernel
2123
} // namespace cuda

0 commit comments

Comments
 (0)