55# The complete license agreement can be obtained at:
66# http://arrayfire.com/licenses/BSD-3-Clause
77
8+ dependency_check (CUDA_FOUND "CUDA not found." )
9+
10+ include (AFcuda_helpers )
11+ include (FileToString )
812include (InternalUtils )
913include (select_compute_arch )
1014
11- dependency_check (CUDA_FOUND "CUDA not found." )
12-
13- find_cuda_helper_libs (nvrtc )
14- find_cuda_helper_libs (nvrtc-builtins )
15+ # Remove cublas_device library which is no longer included with the cuda
16+ # toolkit. Fixes issues with older CMake versions
17+ if (DEFINED CUDA_cublas_device_LIBRARY AND NOT CUDA_cublas_device_LIBRARY)
18+ list (REMOVE_ITEM CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_device_LIBRARY} )
19+ endif ()
1520
1621if (NOT OPENGL_FOUND)
1722 # create a dummy gl.h header to satisfy cuda_gl_interop.h requirement
@@ -24,9 +29,50 @@ if(NOT OPENGL_FOUND)
2429 file (WRITE "${dummy_gl_root} /gl.h" "// Dummy file to satisy cuda_gl_interop" )
2530endif ()
2631
27- get_filename_component (CUDA_LIBRARIES_PATH ${CUDA_cudart_static_LIBRARY} DIRECTORY CACHE )
32+ # Find if CUDA Toolkit is at least 10.0 to use static
33+ # lapack library. Otherwise, we have to use regular shared library
34+ if (UNIX AND CUDA_VERSION_MAJOR VERSION_GREATER 10 OR CUDA_VERSION_MAJOR VERSION_EQUAL 10)
35+ set (use_static_cuda_lapack ON )
36+ else ()
37+ set (use_static_cuda_lapack OFF )
38+ endif ()
2839
29- include (FileToString )
40+ find_cuda_helper_libs (nvrtc )
41+ find_cuda_helper_libs (nvrtc-builtins )
42+ if (UNIX )
43+ af_find_static_cuda_libs (culibos )
44+ af_find_static_cuda_libs (cublas_static )
45+ af_find_static_cuda_libs (cublasLt_static )
46+ af_find_static_cuda_libs (cufft_static )
47+ af_find_static_cuda_libs (cusparse_static )
48+
49+ # FIXME When NVCC resolves this particular issue.
50+ # NVCC doesn't like -l<full_path_static_lib>, hence we cannot
51+ # use ${CMAKE_*_LIBRARY} variables in the following flags.
52+ set (af_cuda_static_flags "-rdc=true;-dlink" )
53+ set (af_cuda_static_flags "${af_cuda_static_flags} ;-lculibos" )
54+ set (af_cuda_static_flags "${af_cuda_static_flags} ;-lcublas_static" )
55+ set (af_cuda_static_flags "${af_cuda_static_flags} ;-lcublasLt_static" )
56+ set (af_cuda_static_flags "${af_cuda_static_flags} ;-lcufft_static" )
57+ set (af_cuda_static_flags "${af_cuda_static_flags} ;-lcusparse_static" )
58+
59+ if (${use_static_cuda_lapack} )
60+ af_find_static_cuda_libs (cusolver_static )
61+ set (cusolver_static_lib "${CUDA_cusolver_static_LIBRARY} " )
62+
63+ # NVIDIA LAPACK library liblapack_static.a is a subset of LAPACK and only
64+ # contains GPU accelerated stedc and bdsqr. The user has to link
65+ # libcusolver_static.a with liblapack_static.a in order to build
66+ # successfully.
67+ af_find_static_cuda_libs (lapack_static )
68+
69+ set (af_cuda_static_flags "${af_cuda_static_flags} ;-lcusolver_static" )
70+ else ()
71+ set (cusolver_lib "${CUDA_cusolver_LIBRARY} " )
72+ endif ()
73+ endif ()
74+
75+ get_filename_component (CUDA_LIBRARIES_PATH ${CUDA_cudart_static_LIBRARY} DIRECTORY CACHE )
3076
3177if (NOT CUDA_architecture_build_targets)
3278 cuda_detect_installed_gpus (detected_gpus )
@@ -171,54 +217,9 @@ file_to_string(
171217 NULLTERM
172218 )
173219
174- ## Copied from FindCUDA.cmake
175- ## The target_link_library needs to link with the cuda libraries using
176- ## PRIVATE
177- function (cuda_add_library cuda_target )
178- cuda_add_cuda_include_once ()
179-
180- # Separate the sources from the options
181- cuda_get_sources_and_options (_sources _cmake_options _options ${ARGN} )
182- cuda_build_shared_library (_cuda_shared_flag ${ARGN} )
183- # Create custom commands and targets for each file.
184- cuda_wrap_srcs ( ${cuda_target} OBJ _generated_files ${_sources}
185- ${_cmake_options} ${_cuda_shared_flag}
186- OPTIONS ${_options} )
187-
188- # Compute the file name of the intermedate link file used for separable
189- # compilation.
190- cuda_compute_separable_compilation_object_file_name (link_file ${cuda_target} "${${cuda_target} _SEPARABLE_COMPILATION_OBJECTS}" )
191-
192- # Add the library.
193- add_library (${cuda_target} ${_cmake_options}
194- ${_generated_files}
195- ${_sources}
196- ${link_file}
197- )
198-
199- # Add a link phase for the separable compilation if it has been enabled. If
200- # it has been enabled then the ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS
201- # variable will have been defined.
202- cuda_link_separable_compilation_objects ("${link_file} " ${cuda_target} "${_options} " "${${cuda_target} _SEPARABLE_COMPILATION_OBJECTS}" )
203-
204- target_link_libraries (${cuda_target}
205- PRIVATE ${CUDA_LIBRARIES}
206- )
207-
208- # We need to set the linker language based on what the expected generated file
209- # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP.
210- set_target_properties (${cuda_target}
211- PROPERTIES
212- LINKER_LANGUAGE ${CUDA_C_OR_CXX}
213- POSITION_INDEPENDENT_CODE ON
214- )
215-
216- endfunction ()
217-
218220arrayfire_get_cuda_cxx_flags (cuda_cxx_flags )
219221arrayfire_get_platform_definitions (platform_flags )
220222
221-
222223get_property (boost_includes TARGET Boost::boost PROPERTY INTERFACE_INCLUDE_DIRECTORIES )
223224get_property (boost_definitions TARGET Boost::boost PROPERTY INTERFACE_COMPILE_DEFINITIONS )
224225
@@ -245,9 +246,78 @@ list(APPEND cuda_cxx_flags ${cxx_definitions})
245246include (kernel/scan_by_key/CMakeLists.txt )
246247include (kernel/thrust_sort_by_key/CMakeLists.txt )
247248
249+ # CUDA static libraries require device linking to successfully link
250+ # against afcuda target. Device linking requires CUDA_SEPARABLE_COMPILATION
251+ # to be ON. Therefore, we turn on separable compilation for a subset of
252+ # source files while compiling af_cuda_static_cuda_library target. Once
253+ # this subset is compiled, separable compilation is reset to it's original
254+ # value.
255+ if (UNIX )
256+ # Static linking cuda libs require device linking, which in turn
257+ # requires separable compilation.
258+ set (pior_val_CUDA_SEPARABLE_COMPILATION OFF )
259+ if (DEFINED CUDA_SEPARABLE_COMPILATION)
260+ set (pior_val_CUDA_SEPARABLE_COMPILATION ${CUDA_SEPARABLE_COMPILATION} )
261+ endif ()
262+ set (CUDA_SEPARABLE_COMPILATION ON )
263+ endif ()
264+
265+ cuda_add_library (af_cuda_static_cuda_library STATIC
266+ blas.cu
267+ blas.hpp
268+ cufft.cu
269+ cufft.hpp
270+ fft.cu
271+ sparse.cu
272+ sparse.hpp
273+ sparse_arith.cu
274+ sparse_arith.hpp
275+ sparse_blas.cu
276+ sparse_blas.hpp
277+ solve.cu
278+ solve.hpp
279+
280+ OPTIONS
281+ ${platform_flags} ${cuda_cxx_flags} ${af_cuda_static_flags}
282+ -Xcudafe \"--diag_suppress=1427\" -DAFDLL
283+ )
284+
285+ set_target_properties(af_cuda_static_cuda_library
286+ PROPERTIES
287+ LINKER_LANGUAGE CXX
288+ FOLDER " Generated Targets "
289+ )
290+
291+ if(UNIX)
292+ target_link_libraries(af_cuda_static_cuda_library
293+ PRIVATE
294+ Boost::boost
295+ ${CMAKE_DL_LIBS }
296+ ${cusolver_lib}
297+ -Wl,--start-group
298+ ${CUDA_culibos_LIBRARY} #also a static libary
299+ ${CUDA_cublas_static_LIBRARY}
300+ ${CUDA_cublasLt_static_LIBRARY}
301+ ${CUDA_cufft_static_LIBRARY}
302+ ${CUDA_lapack_static_LIBRARY}
303+ ${CUDA_cusparse_static_LIBRARY}
304+ ${cusolver_static_lib}
305+ -Wl,--end-group
306+ )
307+ set(CUDA_SEPARABLE_COMPILATION ${pior_val_CUDA_SEPARABLE_COMPILATION} )
308+ else()
309+ target_link_libraries(af_cuda_static_cuda_library
310+ PRIVATE
311+ Boost::boost
312+ ${CUDA_CUBLAS_LIBRARIES}
313+ ${CUDA_CUFFT_LIBRARIES}
314+ ${CUDA_cusolver_LIBRARY}
315+ ${CUDA_cusparse_LIBRARY}
316+ )
317+ endif()
318+
248319cuda_add_library(afcuda
249320 ${thrust_sort_sources}
250- sort .hpp
251321
252322 all.cu
253323 anisotropic_diffusion.cpp
@@ -390,7 +460,6 @@ cuda_add_library(afcuda
390460 backend.hpp
391461 bilateral.hpp
392462 binary.hpp
393- blas.cpp
394463 blas.hpp
395464 canny.hpp
396465 cast.hpp
@@ -407,7 +476,6 @@ cuda_add_library(afcuda
407476 cudnn.hpp
408477 cudnnModule.cpp
409478 cudnnModule.hpp
410- cufft.cpp
411479 cufft.hpp
412480 cusolverDn.cpp
413481 cusolverDn.hpp
@@ -427,7 +495,6 @@ cuda_add_library(afcuda
427495 fast.hpp
428496 fast_pyramid.cpp
429497 fast_pyramid.hpp
430- fft.cpp
431498 fft.hpp
432499 fftconvolve.cpp
433500 fftconvolve.hpp
@@ -509,15 +576,12 @@ cuda_add_library(afcuda
509576 shift.hpp
510577 sift.hpp
511578 sobel.hpp
512- solve.cpp
513579 solve.hpp
580+ sort.hpp
514581 sort_by_key.hpp
515582 sort_index.hpp
516- sparse.cpp
517583 sparse.hpp
518- sparse_arith.cpp
519584 sparse_arith.hpp
520- sparse_blas.cpp
521585 sparse_blas.hpp
522586 surface.cpp
523587 surface.hpp
@@ -570,6 +634,8 @@ target_compile_options(afcuda PRIVATE ${cxx_definitions})
570634add_library(ArrayFire::afcuda ALIAS afcuda)
571635
572636add_dependencies(afcuda ${jit_kernel_targets} ${nvrtc_kernel_targets} )
637+ add_dependencies(af_cuda_static_cuda_library ${nvrtc_kernel_targets} )
638+ add_dependencies(afcuda af_cuda_static_cuda_library)
573639
574640target_include_directories (afcuda
575641 PUBLIC
@@ -586,29 +652,14 @@ target_include_directories (afcuda
586652 ${cuDNN_INCLUDE_DIRS}
587653)
588654
589- # Remove cublas_device library which is no longer included with the cuda
590- # toolkit. Fixes issues with older CMake versions
591- if(DEFINED CUDA_cublas_device_LIBRARY AND NOT CUDA_cublas_device_LIBRARY)
592- list(REMOVE_ITEM CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_device_LIBRARY} )
593- endif()
594-
595- # Remove cublas_device library which is no longer included with the cuda
596- # toolkit. Fixes issues with older CMake versions
597- if(DEFINED CUDA_cublas_device_LIBRARY AND NOT CUDA_cublas_device_LIBRARY)
598- list(REMOVE_ITEM CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_device_LIBRARY} )
599- endif()
600-
601655target_link_libraries(afcuda
602656 PRIVATE
603657 c_api_interface
604658 cpp_api_interface
605659 afcommon_interface
606- ${CUDA_nvrtc_LIBRARY}
607- ${CUDA_CUBLAS_LIBRARIES}
608- ${CUDA_CUFFT_LIBRARIES}
609- ${CUDA_cusolver_LIBRARY}
610- ${CUDA_cusparse_LIBRARY}
611660 ${CMAKE_DL_LIBS }
661+ ${CUDA_nvrtc_LIBRARY}
662+ af_cuda_static_cuda_library
612663 )
613664
614665# If the driver is not found the cuda driver api need to be linked against the
@@ -703,13 +754,17 @@ function(afcu_collect_libs libname)
703754endfunction()
704755
705756if(AF_INSTALL_STANDALONE)
706- afcu_collect_libs(cufft)
707757 afcu_collect_libs(cudnn)
708- afcu_collect_libs(cublas)
709- afcu_collect_libs(cublasLt)
710- afcu_collect_libs(cusolver)
711- afcu_collect_libs(cusparse)
712758 afcu_collect_libs(nvrtc FULL_VERSION)
759+ if(WIN32)
760+ afcu_collect_libs(cufft)
761+ afcu_collect_libs(cublas)
762+ afcu_collect_libs(cublasLt)
763+ afcu_collect_libs(cusolver)
764+ afcu_collect_libs(cusparse)
765+ elseif(NOT ${use_static_cuda_lapack} )
766+ afcu_collect_libs(cusolver)
767+ endif()
713768
714769 if(APPLE)
715770 afcu_collect_libs(cudart)
0 commit comments