-
Notifications
You must be signed in to change notification settings - Fork 548
Expand file tree
/
Copy pathCMakeLists.txt
More file actions
928 lines (848 loc) · 26.8 KB
/
CMakeLists.txt
File metadata and controls
928 lines (848 loc) · 26.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
# Copyright (c) 2017, ArrayFire
# All rights reserved.
#
# This file is distributed under 3-clause BSD license.
# The complete license agreement can be obtained at:
# http://arrayfire.com/licenses/BSD-3-Clause
generate_product_version(af_cuda_ver_res_file
FILE_NAME "afcuda"
FILE_DESCRIPTION "CUDA Backend Dynamic-link library"
)
dependency_check(CUDA_FOUND "CUDA not found.")
if(AF_WITH_CUDNN)
dependency_check(cuDNN_FOUND "CUDNN not found.")
endif()
include(AFcuda_helpers)
include(FileToString)
include(InternalUtils)
include(select_compute_arch)
# Remove cublas_device library which is no longer included with the cuda
# toolkit. Fixes issues with older CMake versions
if(DEFINED CUDA_cublas_device_LIBRARY AND NOT CUDA_cublas_device_LIBRARY)
list(REMOVE_ITEM CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_device_LIBRARY})
endif()
if(NOT OPENGL_FOUND)
# create a dummy gl.h header to satisfy cuda_gl_interop.h requirement
# all opengl functionality is made available via glad third party code
# that is built along with arrayfire code base.
set(dummy_gl_root "${ArrayFire_BINARY_DIR}/include/GL")
if(APPLE)
set(dummy_gl_root "${ArrayFire_BINARY_DIR}/include/OpenGL")
endif()
file(WRITE "${dummy_gl_root}/gl.h" "// Dummy file to satisy cuda_gl_interop")
endif()
# Find if CUDA Toolkit is at least 10.0 to use static
# lapack library. Otherwise, we have to use regular shared library
if(UNIX AND (CUDA_VERSION_MAJOR VERSION_GREATER 10 OR CUDA_VERSION_MAJOR VERSION_EQUAL 10))
set(use_static_cuda_lapack ON)
else()
set(use_static_cuda_lapack OFF)
endif()
set(CUDA_architecture_build_targets "Auto" CACHE
STRING "The compute architectures targeted by this build. (Options: Auto;3.0;Maxwell;All;Common)")
find_cuda_helper_libs(nvrtc)
find_cuda_helper_libs(nvrtc-builtins)
list(APPEND nvrtc_libs ${CUDA_nvrtc_LIBRARY})
if(UNIX)
list(APPEND nvrtc_libs ${CUDA_nvrtc-builtins_LIBRARY})
endif()
if(UNIX AND AF_WITH_STATIC_CUDA_NUMERIC_LIBS)
# The libraries that may be staticly linked or may be loaded at runtime
set(AF_CUDA_optionally_static_libraries)
af_multiple_option(NAME AF_cusparse_LINK_LOADING
DEFAULT "Module"
DESCRIPTION "The approach to load the cusparse library. Static linking(Static) or Dynamic runtime loading(Module) of the module"
OPTIONS "Module" "Static")
if(AF_cusparse_LINK_LOADING STREQUAL "Static")
af_find_static_cuda_libs(cusparse_static PRUNE)
list(APPEND AF_CUDA_optionally_static_libraries ${AF_CUDA_cusparse_static_LIBRARY})
endif()
af_find_static_cuda_libs(culibos)
af_find_static_cuda_libs(cublas_static PRUNE)
af_find_static_cuda_libs(cublasLt_static PRUNE)
af_find_static_cuda_libs(cufft_static)
if(CUDA_VERSION VERSION_GREATER 11.4)
af_find_static_cuda_libs(nvrtc_static)
af_find_static_cuda_libs(nvrtc-builtins_static)
af_find_static_cuda_libs(nvptxcompiler_static)
set(nvrtc_libs ${AF_CUDA_nvrtc_static_LIBRARY}
${AF_CUDA_nvrtc-builtins_static_LIBRARY}
${AF_CUDA_nvptxcompiler_static_LIBRARY})
endif()
# FIXME When NVCC resolves this particular issue.
# NVCC doesn't like -l<full_path_static_lib>, hence we cannot
# use ${CMAKE_*_LIBRARY} variables in the following flags.
set(af_cuda_static_flags "${af_cuda_static_flags};-lculibos")
set(af_cuda_static_flags "${af_cuda_static_flags};-lcublas_static")
if(CUDA_VERSION VERSION_GREATER 10.0)
set(af_cuda_static_flags "${af_cuda_static_flags};-lcublasLt_static")
endif()
set(af_cuda_static_flags "${af_cuda_static_flags};-lcufft_static")
if(${use_static_cuda_lapack})
af_find_static_cuda_libs(cusolver_static PRUNE)
set(cusolver_static_lib "${AF_CUDA_cusolver_static_LIBRARY}")
# NVIDIA LAPACK library liblapack_static.a is a subset of LAPACK and only
# contains GPU accelerated stedc and bdsqr. The user has to link
# libcusolver_static.a with liblapack_static.a in order to build
# successfully.
# Cuda Versions >= 12.0 changed lib name to libcusolver_lapack_static.a
if (CUDA_VERSION VERSION_GREATER_EQUAL 12.0)
af_find_static_cuda_libs(cusolver_lapack_static)
else()
af_find_static_cuda_libs(lapack_static)
endif()
set(af_cuda_static_flags "${af_cuda_static_flags};-lcusolver_static")
else()
set(cusolver_lib "${CUDA_cusolver_LIBRARY}" OpenMP::OpenMP_CXX)
endif()
endif()
get_filename_component(CUDA_LIBRARIES_PATH ${CUDA_cudart_static_LIBRARY} DIRECTORY CACHE)
mark_as_advanced(
CUDA_LIBRARIES_PATH
CUDA_architecture_build_targets)
if(CUDA_VERSION_MAJOR VERSION_LESS 11)
find_package(CUB)
if(NOT TARGET CUB::CUB)
af_dep_check_and_populate(${cub_prefix}
URI https://github.com/NVIDIA/cub.git
REF 1.10.0
)
find_package(CUB REQUIRED
PATHS ${${cub_prefix}_SOURCE_DIR})
endif()
endif()
file(GLOB jit_src "kernel/jit.cuh")
file_to_string(
SOURCES ${jit_src}
VARNAME jit_files
EXTENSION "hpp"
OUTPUT_DIR "kernel_headers"
TARGETS jit_kernel_targets
NAMESPACE "arrayfire cuda"
WITH_EXTENSION
)
set(nvrtc_src
${CUDA_INCLUDE_DIRS}/cuda_fp16.h
${CUDA_INCLUDE_DIRS}/cuda_fp16.hpp
${CUDA_TOOLKIT_ROOT_DIR}/include/cuComplex.h
${CUDA_TOOLKIT_ROOT_DIR}/include/math_constants.h
${CUDA_TOOLKIT_ROOT_DIR}/include/vector_types.h
${CUDA_TOOLKIT_ROOT_DIR}/include/vector_functions.h
${PROJECT_SOURCE_DIR}/src/api/c/optypes.hpp
${PROJECT_SOURCE_DIR}/include/af/defines.h
${PROJECT_SOURCE_DIR}/include/af/traits.hpp
${PROJECT_BINARY_DIR}/include/af/version.h
${CMAKE_CURRENT_SOURCE_DIR}/Param.hpp
${CMAKE_CURRENT_SOURCE_DIR}/assign_kernel_param.hpp
${CMAKE_CURRENT_SOURCE_DIR}/backend.hpp
${CMAKE_CURRENT_SOURCE_DIR}/dims_param.hpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel/interp.hpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel/shared.hpp
${CMAKE_CURRENT_SOURCE_DIR}/math.hpp
${CMAKE_CURRENT_SOURCE_DIR}/minmax_op.hpp
${CMAKE_CURRENT_SOURCE_DIR}/utility.hpp
${CMAKE_CURRENT_SOURCE_DIR}/types.hpp
${CMAKE_CURRENT_SOURCE_DIR}/../common/Binary.hpp
${CMAKE_CURRENT_SOURCE_DIR}/../common/Transform.hpp
${CMAKE_CURRENT_SOURCE_DIR}/../common/half.hpp
${CMAKE_CURRENT_SOURCE_DIR}/../common/internal_enums.hpp
${CMAKE_CURRENT_SOURCE_DIR}/../common/kernel_type.hpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel/anisotropic_diffusion.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/approx1.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/approx2.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/assign.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/bilateral.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/canny.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/convolve1.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/convolve2.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/convolve3.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/convolve_separable.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/copy.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/diagonal.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/diff.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/exampleFunction.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/fftconvolve.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/flood_fill.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/gradient.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/histogram.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/hsv_rgb.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/identity.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/iir.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/index.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/iota.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/ireduce.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/lookup.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/lu_split.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/match_template.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/meanshift.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/medfilt.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/memcopy.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/moments.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/morph.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/pad_array_borders.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/range.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/resize.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/reorder.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/rotate.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/select.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/scan_dim.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/scan_dim_by_key.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/scan_first.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/scan_first_by_key.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/sobel.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/sparse.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/sparse_arith.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/susan.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/tile.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/transform.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/transpose.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/transpose_inplace.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/triangle.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/unwrap.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/where.cuh
${CMAKE_CURRENT_SOURCE_DIR}/kernel/wrap.cuh
)
file_to_string(
SOURCES ${nvrtc_src}
VARNAME nvrtc_files
EXTENSION "hpp"
OUTPUT_DIR "nvrtc_kernel_headers"
TARGETS nvrtc_kernel_targets
NAMESPACE "arrayfire cuda"
WITH_EXTENSION
NULLTERM
)
include(kernel/scan_by_key/CMakeLists.txt)
include(kernel/thrust_sort_by_key/CMakeLists.txt)
add_library(afcuda
$<$<PLATFORM_ID:Windows>:${af_cuda_ver_res_file}>
${thrust_sort_sources}
blas.cu
blas.hpp
cudaDataType.hpp
cufft.cu
cufft.hpp
cusparse_descriptor_helpers.hpp
fft.cu
sparse.cu
sparse.hpp
sparse_arith.cu
sparse_arith.hpp
sparse_blas.cu
sparse_blas.hpp
solve.cu
solve.hpp
EnqueueArgs.hpp
all.cu
anisotropic_diffusion.cpp
any.cu
approx.cpp
bilateral.cpp
canny.cpp
count.cu
Event.cpp
Event.hpp
exampleFunction.cpp
fast.cu
harris.cu
histogram.cpp
homography.cu
hsv_rgb.cpp
match_template.cpp
max.cu
mean.cu
meanshift.cpp
medfilt.cpp
min.cu
moments.cpp
nearest_neighbour.cu
orb.cu
pad_array_borders.cpp
product.cu
random_engine.cu
regions.cu
resize.cpp
rotate.cpp
set.cu
sift.cu
sobel.cpp
sort.cu
sort_by_key.cu
sort_index.cu
sum.cu
topk.cu
transform.cpp
transpose.cpp
transpose_inplace.cpp
kernel/anisotropic_diffusion.hpp
kernel/approx.hpp
kernel/assign.hpp
kernel/atomics.hpp
kernel/bilateral.hpp
kernel/canny.hpp
kernel/config.hpp
kernel/convolve.hpp
kernel/convolve_separable.cpp
kernel/diagonal.hpp
kernel/diff.hpp
kernel/exampleFunction.hpp
kernel/fast.hpp
kernel/fast_lut.hpp
kernel/fftconvolve.hpp
kernel/flood_fill.hpp
kernel/gradient.hpp
kernel/harris.hpp
kernel/histogram.hpp
kernel/homography.hpp
kernel/hsv_rgb.hpp
kernel/identity.hpp
kernel/iir.hpp
kernel/index.hpp
kernel/interp.hpp
kernel/iota.hpp
kernel/ireduce.hpp
kernel/lookup.hpp
kernel/lu_split.hpp
kernel/match_template.hpp
kernel/mean.hpp
kernel/meanshift.hpp
kernel/medfilt.hpp
kernel/memcopy.hpp
kernel/moments.hpp
kernel/morph.hpp
kernel/nearest_neighbour.hpp
kernel/orb.hpp
kernel/orb_patch.hpp
kernel/pad_array_borders.hpp
kernel/random_engine.hpp
kernel/random_engine_mersenne.hpp
kernel/random_engine_philox.hpp
kernel/random_engine_threefry.hpp
kernel/range.hpp
kernel/reduce.hpp
kernel/reduce_by_key.hpp
kernel/regions.hpp
kernel/reorder.hpp
kernel/resize.hpp
kernel/rotate.hpp
kernel/scan_dim.hpp
kernel/scan_dim_by_key.hpp
kernel/scan_dim_by_key_impl.hpp
kernel/scan_first.hpp
kernel/scan_first_by_key.hpp
kernel/scan_first_by_key_impl.hpp
kernel/select.hpp
kernel/shared.hpp
kernel/shfl_intrinsics.hpp
kernel/sift.hpp
kernel/sobel.hpp
kernel/sort.hpp
kernel/sort_by_key.hpp
kernel/sparse.hpp
kernel/sparse_arith.hpp
kernel/susan.hpp
kernel/thrust_sort_by_key.hpp
kernel/thrust_sort_by_key_impl.hpp
kernel/tile.hpp
kernel/topk.hpp
kernel/transform.hpp
kernel/transpose.hpp
kernel/transpose_inplace.hpp
kernel/triangle.hpp
kernel/unwrap.hpp
kernel/where.hpp
kernel/wrap.hpp
Array.cpp
Array.hpp
Kernel.cpp
Kernel.hpp
LookupTable1D.hpp
Module.hpp
Param.hpp
ThrustAllocator.cuh
ThrustArrayFirePolicy.hpp
anisotropic_diffusion.hpp
approx.hpp
arith.hpp
assign.cpp
assign.hpp
backend.hpp
bilateral.hpp
binary.hpp
blas.hpp
canny.hpp
cast.hpp
cholesky.cpp
cholesky.hpp
complex.hpp
compile_module.cpp
convolve.cpp
convolve.hpp
convolveNN.cpp
copy.cpp
copy.hpp
cublas.cpp
cublas.hpp
$<$<BOOL:${AF_WITH_CUDNN}>: cudnn.cpp
cudnn.hpp
cudnnModule.cpp
cudnnModule.hpp>
cufft.hpp
cusolverDn.cpp
cusolverDn.hpp
cusparse.cpp
cusparse.hpp
cusparseModule.cpp
cusparseModule.hpp
device_manager.cpp
device_manager.hpp
debug_cuda.hpp
thrust_utils.hpp
diagonal.cpp
diagonal.hpp
diff.cpp
diff.hpp
driver.cpp
err_cuda.hpp
exampleFunction.hpp
fast.hpp
fast_pyramid.cpp
fast_pyramid.hpp
fft.hpp
fftconvolve.cpp
fftconvolve.hpp
flood_fill.cpp
flood_fill.hpp
GraphicsResourceManager.cpp
GraphicsResourceManager.hpp
gradient.cpp
gradient.hpp
harris.hpp
hist_graphics.cpp
hist_graphics.hpp
histogram.hpp
homography.hpp
hsv_rgb.hpp
identity.cpp
identity.hpp
iir.cpp
iir.hpp
image.cpp
image.hpp
index.cpp
index.hpp
inverse.cpp
inverse.hpp
iota.cpp
iota.hpp
ireduce.cpp
ireduce.hpp
jit.cpp
join.cpp
join.hpp
logic.hpp
lookup.cpp
lookup.hpp
lu.cpp
lu.hpp
match_template.hpp
math.hpp
mean.hpp
meanshift.hpp
medfilt.hpp
memory.cpp
memory.hpp
minmax_op.hpp
moments.hpp
morph.cpp
morph.hpp
nearest_neighbour.hpp
orb.hpp
platform.cpp
platform.hpp
plot.cpp
plot.hpp
print.hpp
qr.cpp
qr.hpp
random_engine.hpp
range.cpp
range.hpp
reduce.hpp
reduce_impl.hpp
regions.hpp
reorder.cpp
reorder.hpp
resize.hpp
reshape.cpp
rotate.hpp
scalar.hpp
scan.cpp
scan.hpp
scan_by_key.cpp
scan_by_key.hpp
select.cpp
select.hpp
set.hpp
shift.cpp
shift.hpp
sift.hpp
sobel.hpp
solve.hpp
sort.hpp
sort_by_key.hpp
sort_index.hpp
sparse.hpp
sparse_arith.hpp
sparse_blas.hpp
surface.cpp
surface.hpp
susan.cpp
susan.hpp
svd.cpp
svd.hpp
tile.cpp
tile.hpp
threadsMgt.hpp
topk.hpp
traits.hpp
transform.hpp
transpose.hpp
triangle.cpp
triangle.hpp
types.hpp
unary.hpp
unwrap.cpp
unwrap.hpp
utility.cpp
utility.hpp
vector_field.cpp
vector_field.hpp
where.cpp
where.hpp
wrap.cpp
wrap.hpp
jit/BufferNode.hpp
jit/ShiftNode.hpp
jit/kernel_generators.hpp
${scan_by_key_sources}
)
if(UNIX AND AF_WITH_STATIC_CUDA_NUMERIC_LIBS)
check_cxx_compiler_flag("-Wl,--start-group -Werror" group_flags)
if(group_flags)
set(START_GROUP -Wl,--start-group)
set(END_GROUP -Wl,--end-group)
endif()
target_link_libraries(afcuda
PRIVATE
${cusolver_lib}
${START_GROUP}
${CUDA_culibos_LIBRARY} #also a static libary
${AF_CUDA_cublas_static_LIBRARY}
${AF_CUDA_cublasLt_static_LIBRARY}
${AF_CUDA_cufft_static_LIBRARY}
${AF_CUDA_optionally_static_libraries}
${nvrtc_libs}
${cusolver_static_lib}
${END_GROUP})
if(CUDA_VERSION VERSION_GREATER 10.0)
target_link_libraries(afcuda
PRIVATE
${AF_CUDA_cublasLt_static_LIBRARY})
endif()
if(CUDA_VERSION VERSION_GREATER 9.5)
target_link_libraries(afcuda
PRIVATE
${CUDA_lapack_static_LIBRARY})
endif()
else()
target_link_libraries(afcuda
PRIVATE
${CUDA_CUBLAS_LIBRARIES}
${CUDA_CUFFT_LIBRARIES}
${CUDA_cusolver_LIBRARY}
${nvrtc_libs}
)
endif()
if(CUDA_VERSION_MAJOR VERSION_LESS 11)
target_link_libraries(afcuda
PRIVATE
CUB::CUB
)
endif()
af_detect_and_set_cuda_architectures(afcuda)
if(CUDA_VERSION VERSION_LESS 11.0)
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
set_target_properties(afcuda
PROPERTIES
CUDA_STANDARD 14
CUDA_STANDARD_REQUIRED ON)
else()
target_compile_options(afcuda
PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--std=c++14>)
endif()
else()
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
set_target_properties(afcuda
PROPERTIES
CUDA_STANDARD 17
CUDA_STANDARD_REQUIRED ON)
else()
target_compile_options(afcuda
PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--std=c++17>)
endif()
endif()
target_compile_definitions(afcuda
PRIVATE
AF_CUDA
# CUDA_NO_HALF prevents the inclusion of the half class in the global namespace
# which conflicts with the half class in ArrayFire's common namespace. prefer
# using __half class instead for CUDA
CUDA_NO_HALF
$<$<BOOL:${AF_WITH_CUDNN}>:WITH_CUDNN>
)
# New API of cuSparse was introduced in 10.1.168 for Linux and the older
# 10.1.105 fix version doesn't it. Unfortunately, the new API was introduced in
# in a fix release of CUDA - unconventionally. As CMake's FindCUDA module
# doesn't provide patch/fix version number, we use 10.2 as the minimum
# CUDA version to enable this new cuSparse API.
if(CUDA_VERSION_MAJOR VERSION_GREATER 10 OR
(UNIX AND
CUDA_VERSION_MAJOR VERSION_EQUAL 10 AND CUDA_VERSION_MINOR VERSION_GREATER 1))
target_compile_definitions(afcuda
PRIVATE
AF_USE_NEW_CUSPARSE_API)
endif()
target_compile_options(afcuda
PRIVATE
$<$<BOOL:${AF_WITH_FAST_MATH}>:$<$<COMPILE_LANGUAGE:CUDA>:-use_fast_math>>
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
$<$<COMPILE_LANGUAGE:CUDA>:-Xcudafe --diag_suppress=unrecognized_gcc_pragma>
$<$<COMPILE_LANGUAGE:CUDA>: $<$<CXX_COMPILER_ID:MSVC>: -Xcompiler=/wd4251
-Xcompiler=/wd4068
-Xcompiler=/wd4275
-Xcompiler=/wd4668
-Xcompiler=/wd4710
-Xcompiler=/wd4505
-Xcompiler=/bigobj>>
)
if(UNIX AND AF_WITH_STATIC_CUDA_NUMERIC_LIBS AND AF_cusparse_LINK_LOADING STREQUAL "Static")
target_compile_definitions(afcuda
PRIVATE
AF_cusparse_STATIC_LINKING)
endif()
add_library(ArrayFire::afcuda ALIAS afcuda)
add_dependencies(afcuda ${jit_kernel_targets} ${nvrtc_kernel_targets})
if(UNIX AND AF_WITH_PRUNE_STATIC_CUDA_NUMERIC_LIBS)
add_dependencies(afcuda ${cuda_pruned_library_targets})
endif()
target_include_directories (afcuda
PUBLIC
$<BUILD_INTERFACE:${ArrayFire_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${ArrayFire_BINARY_DIR}/include>
$<INSTALL_INTERFACE:${AF_INSTALL_INC_DIR}>
PRIVATE
${ArrayFire_SOURCE_DIR}/src/api/c
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/kernel
${CMAKE_CURRENT_SOURCE_DIR}/jit
${CMAKE_CURRENT_BINARY_DIR})
target_include_directories (afcuda
SYSTEM PRIVATE
$<$<BOOL:${AF_WITH_CUDNN}>:${cuDNN_INCLUDE_DIRS}>
${CUDA_INCLUDE_DIRS}
)
target_link_libraries(afcuda
PRIVATE
c_api_interface
cpp_api_interface
afcommon_interface
${CMAKE_DL_LIBS}
)
# If the driver is not found the cuda driver api need to be linked against the
# libcuda.so stub located in the lib[64]/stubs directory
if(CUDA_CUDA_LIBRARY)
target_link_libraries(afcuda PRIVATE ${CUDA_CUDA_LIBRARY})
else()
message(STATUS "CUDA driver library missing. Looking for libcuda stub.")
find_library(CUDA_CUDA_STUB
NAMES cuda
PATHS ${CUDA_LIBRARIES_PATH}/stubs
NO_DEFAULT_PATH
)
if(CUDA_CUDA_STUB)
message(STATUS "CUDA driver stub FOUND: ${CUDA_CUDA_STUB}")
endif()
#NOTE: Only link against the stub library when building
target_link_libraries(afcuda
PUBLIC
$<BUILD_INTERFACE:${CUDA_CUDA_STUB}>)
endif()
# TODO(umar): This is required for NVRTC to work correctly on OSX. It may not
# be necessary on other platforms.
if(APPLE)
target_link_libraries(afcuda PUBLIC -Wl,-rpath,${CUDA_LIBRARIES_PATH})
endif()
af_split_debug_info(afcuda ${AF_INSTALL_LIB_DIR})
install(TARGETS afcuda
EXPORT ArrayFireCUDATargets
COMPONENT cuda
PUBLIC_HEADER DESTINATION af
RUNTIME DESTINATION ${AF_INSTALL_BIN_DIR}
LIBRARY DESTINATION ${AF_INSTALL_LIB_DIR}
ARCHIVE DESTINATION ${AF_INSTALL_LIB_DIR}
FRAMEWORK DESTINATION framework
INCLUDES DESTINATION ${AF_INSTALL_INC_DIR}
)
set(cuda_deps "")
set (PX ${CMAKE_SHARED_LIBRARY_PREFIX})
set (SX ${CMAKE_SHARED_LIBRARY_SUFFIX})
set (dlib_path_prefix ${CUDA_LIBRARIES_PATH})
if (WIN32)
set(dlib_path_prefix "${CUDA_TOOLKIT_ROOT_DIR}/bin")
endif ()
function(afcu_collect_libs libname)
set(options "FULL_VERSION")
set(single_args "LIB_MAJOR;LIB_MINOR")
set(multi_args "")
cmake_parse_arguments(cuda_args "${options}" "${single_args}" "${multi_args}" ${ARGN})
if(cuda_args_LIB_MAJOR AND cuda_args_LIB_MINOR)
set(lib_major ${cuda_args_LIB_MAJOR})
set(lib_minor ${cuda_args_LIB_MINOR})
else()
set(lib_major ${CUDA_VERSION_MAJOR})
set(lib_minor ${CUDA_VERSION_MINOR})
endif()
set(lib_version "${lib_major}.${lib_minor}")
if (WIN32)
find_file(CUDA_${libname}_LIBRARY_DLL
NAMES
"${PX}${libname}64_${lib_major}${SX}"
"${PX}${libname}64_${lib_major}${lib_minor}${SX}"
"${PX}${libname}64_${lib_major}0_0${SX}"
"${PX}${libname}64_${lib_major}${lib_minor}_0${SX}"
"${PX}${libname}_${lib_major}0_0${SX}"
PATHS ${dlib_path_prefix}
)
mark_as_advanced(CUDA_${libname}_LIBRARY_DLL)
install(FILES "${CUDA_${libname}_LIBRARY_DLL}"
DESTINATION ${AF_INSTALL_BIN_DIR}
COMPONENT cuda_dependencies)
elseif (APPLE)
get_filename_component(outpath "${dlib_path_prefix}/${PX}${libname}.${lib_major}.${lib_minor}${SX}" REALPATH)
install(FILES "${outpath}"
DESTINATION ${AF_INSTALL_BIN_DIR}
RENAME "${PX}${libname}.${lib_version}${SX}"
COMPONENT cuda_dependencies)
else () #UNIX
find_library(CUDA_${libname}_LIBRARY
NAMES ${libname}
PATHS
${dlib_path_prefix})
get_filename_component(outpath "${CUDA_${libname}_LIBRARY}" REALPATH)
if(cuda_args_FULL_VERSION)
set(library_install_name "${PX}${libname}${SX}.${lib_version}")
else()
set(library_install_name "${PX}${libname}${SX}.${lib_major}")
endif()
install(FILES ${outpath}
DESTINATION ${AF_INSTALL_LIB_DIR}
RENAME ${library_install_name}
COMPONENT cuda_dependencies)
endif ()
endfunction()
function(afcu_collect_cudnn_libs cudnn_infix)
set(internal_infix "_")
if(NOT "${cudnn_infix}" STREQUAL "")
set(internal_infix "_${cudnn_infix}_")
string(TOUPPER ${internal_infix} internal_infix)
endif()
if(WIN32)
set(cudnn_lib "${cuDNN${internal_infix}DLL_LIBRARY}")
else()
get_filename_component(cudnn_lib "${cuDNN${internal_infix}LINK_LIBRARY}" REALPATH)
endif()
install(FILES ${cudnn_lib} DESTINATION ${AF_INSTALL_LIB_DIR} COMPONENT cuda_dependencies)
endfunction()
if(AF_INSTALL_STANDALONE)
if(AF_WITH_CUDNN)
afcu_collect_cudnn_libs("")
if(cuDNN_VERSION_MAJOR VERSION_EQUAL 8)
# cudnn changed how dlls are shipped starting major version 8
# except the main dll a lot of the other DLLs are loaded upon demand
afcu_collect_cudnn_libs(cnn_infer)
afcu_collect_cudnn_libs(cnn_train)
afcu_collect_cudnn_libs(ops_infer)
afcu_collect_cudnn_libs(ops_train)
elseif(cuDNN_VERSION_MAJOR VERSION_GREATER_EQUAL 9)
# infer and train libraries are now combined in version 9
afcu_collect_cudnn_libs(cnn)
afcu_collect_cudnn_libs(ops)
endif()
endif()
if(WIN32 OR NOT AF_WITH_STATIC_CUDA_NUMERIC_LIBS)
if(CUDA_VERSION_MAJOR VERSION_EQUAL 12)
afcu_collect_libs(cufft LIB_MAJOR 11 LIB_MINOR 3)
elseif(CUDA_VERSION_MAJOR VERSION_EQUAL 11)
afcu_collect_libs(cufft LIB_MAJOR 10 LIB_MINOR 4)
else()
afcu_collect_libs(cufft)
endif()
afcu_collect_libs(cublas)
if(CUDA_VERSION VERSION_GREATER 10.0)
afcu_collect_libs(cublasLt)
endif()
if(CUDA_VERSION_MAJOR VERSION_EQUAL 12)
afcu_collect_libs(cusolver LIB_MAJOR 11 LIB_MINOR 7)
else()
afcu_collect_libs(cusolver)
endif()
afcu_collect_libs(cusparse)
if(CUDA_VERSION VERSION_GREATER 12.0)
afcu_collect_libs(nvJitLink)
endif()
elseif(NOT ${use_static_cuda_lapack})
if(CUDA_VERSION_MAJOR VERSION_EQUAL 12)
afcu_collect_libs(cusolver LIB_MAJOR 11 LIB_MINOR 7)
else()
afcu_collect_libs(cusolver)
endif()
endif()
if(WIN32 OR CUDA_VERSION VERSION_LESS 11.5 OR NOT AF_WITH_STATIC_CUDA_NUMERIC_LIBS)
afcu_collect_libs(nvrtc)
if(CUDA_VERSION VERSION_GREATER 10.0)
afcu_collect_libs(nvrtc-builtins FULL_VERSION)
else()
if(APPLE)
afcu_collect_libs(cudart)
get_filename_component(nvrtc_outpath "${dlib_path_prefix}/${PX}nvrtc-builtins.${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}${SX}" REALPATH)
install(FILES ${nvrtc_outpath}
DESTINATION ${AF_INSTALL_BIN_DIR}
RENAME "${PX}nvrtc-builtins${SX}"
COMPONENT cuda_dependencies)
elseif(UNIX)
get_filename_component(nvrtc_outpath "${dlib_path_prefix}/${PX}nvrtc-builtins${SX}" REALPATH)
install(FILES ${nvrtc_outpath}
DESTINATION ${AF_INSTALL_LIB_DIR}
RENAME "${PX}nvrtc-builtins${SX}"
COMPONENT cuda_dependencies)
else()
afcu_collect_libs(nvrtc-builtins)
endif()
endif()
endif()
endif()
source_group(include REGULAR_EXPRESSION ${ArrayFire_SOURCE_DIR}/include/*)
source_group(api\\cpp REGULAR_EXPRESSION ${ArrayFire_SOURCE_DIR}/src/api/cpp/*)
source_group(api\\c REGULAR_EXPRESSION ${ArrayFire_SOURCE_DIR}/src/api/c/*)
source_group(backend REGULAR_EXPRESSION ${ArrayFire_SOURCE_DIR}/src/backend/common/*|${CMAKE_CURRENT_SOURCE_DIR}/*)
source_group(backend\\kernel REGULAR_EXPRESSION ${CMAKE_CURRENT_SOURCE_DIR}/kernel/*|${CMAKE_CURRENT_SOURCE_DIR}/kernel/thrust_sort_by_key/*|${CMAKE_CURRENT_SOURCE_DIR}/kernel/scan_by_key/*)
source_group("generated files" FILES ${ArrayFire_BINARY_DIR}/src/backend/build_version.hpp ${ArrayFire_BINARY_DIR}/include/af/version.h
REGULAR_EXPRESSION ${CMAKE_CURRENT_BINARY_DIR}/${kernel_headers_dir}/*)
source_group("" FILES CMakeLists.txt)
mark_as_advanced(
FETCHCONTENT_SOURCE_DIR_NV_CUB
FETCHCONTENT_UPDATES_DISCONNECTED_NV_CUB
)