diff --git a/ArrayFireConfig.cmake.in b/ArrayFireConfig.cmake.in
index 3ffd8e0c51..c34b5a22c4 100644
--- a/ArrayFireConfig.cmake.in
+++ b/ArrayFireConfig.cmake.in
@@ -9,12 +9,14 @@
#
# ----------------------------------------------------------------------------
#
-# ArrayFire_CPU_FOUND - True of the ArrayFire CPU library has been found.
-# ArrayFire_CPU_LIBRARIES - Location of ArrayFire's CPU library, if found
-# ArrayFire_CUDA_FOUND - True of the ArrayFire CUDA library has been found.
-# ArrayFire_CUDA_LIBRARIES - Location of ArrayFire's CUDA library, if found
-# ArrayFire_OpenCL_FOUND - True of the ArrayFire OpenCL library has been found.
-# ArrayFire_OpenCL_LIBRARIES - Location of ArrayFire's OpenCL library, if found
+# ArrayFire_CPU_FOUND - True of the ArrayFire CPU library has been found.
+# ArrayFire_CPU_LIBRARIES - Location of ArrayFire's CPU library, if found
+# ArrayFire_CUDA_FOUND - True of the ArrayFire CUDA library has been found.
+# ArrayFire_CUDA_LIBRARIES - Location of ArrayFire's CUDA library, if found
+# ArrayFire_OpenCL_FOUND - True of the ArrayFire OpenCL library has been found.
+# ArrayFire_OpenCL_LIBRARIES - Location of ArrayFire's OpenCL library, if found
+# ArrayFire_Unified_FOUND - True of the ArrayFire Unified library has been found.
+# ArrayFire_Unified_LIBRARIES - Location of ArrayFire's Unified library, if found
#
#=============================================================================
# Copyright (c) 2015, ArrayFire
@@ -48,17 +50,23 @@
get_filename_component(ArrayFire_INCLUDE_DIRS "@INCLUDE_DIR@" ABSOLUTE)
-# keep in the backends in the slowest to fastest order
-foreach(backend CPU OpenCL CUDA)
- string(TOLOWER "${backend}" lowerbackend)
+macro(find_backend backend libname)
set(targetFile ${CMAKE_CURRENT_LIST_DIR}/@BACKEND_DIR@/ArrayFire${backend}.cmake)
if(EXISTS ${targetFile})
include(${targetFile})
set(ArrayFire_${backend}_FOUND ON)
- set(ArrayFire_${backend}_LIBRARIES af${lowerbackend})
+ set(ArrayFire_${backend}_LIBRARIES af${libname})
# set the default backend
- set(ArrayFire_LIBRARIES af${lowerbackend})
+ set(ArrayFire_LIBRARIES af${libname})
else()
set(ArrayFire_${backend}_FOUND OFF)
endif()
+endmacro()
+
+# keep in the backends in the slowest to fastest order
+foreach(backend CPU OpenCL CUDA)
+ string(TOLOWER "${backend}" lowerbackend)
+ find_backend("${backend}" "${lowerbackend}")
endforeach()
+
+find_backend("Unified" "")
diff --git a/CMakeLists.txt b/CMakeLists.txt
index aee379cf43..c76ef4b430 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -35,6 +35,8 @@ OPTION(BUILD_SIFT "Build ArrayFire nonfree algorithms" OFF)
MARK_AS_ADVANCED(BUILD_SIFT)
+OPTION(BUILD_UNIFIED "Build Backend-Independent ArrayFire API" ON)
+
# Set a default build type if none was specified
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
@@ -163,6 +165,11 @@ IF(${BUILD_OPENCL})
ADD_SUBDIRECTORY(src/backend/opencl)
ENDIF()
+IF(${BUILD_UNIFIED})
+ ADD_DEFINITIONS(-DAF_UNIFIED)
+ ADD_SUBDIRECTORY(src/api/unified)
+ENDIF()
+
IF(${BUILD_DOCS})
ADD_SUBDIRECTORY(docs)
ENDIF()
diff --git a/CMakeModules/Version.cmake b/CMakeModules/Version.cmake
index 7dca44e778..3a474d1755 100644
--- a/CMakeModules/Version.cmake
+++ b/CMakeModules/Version.cmake
@@ -2,8 +2,8 @@
# Make a version file that includes the ArrayFire version and git revision
#
SET(AF_VERSION_MAJOR "3")
-SET(AF_VERSION_MINOR "1")
-SET(AF_VERSION_PATCH "3")
+SET(AF_VERSION_MINOR "2")
+SET(AF_VERSION_PATCH "0")
SET(AF_VERSION "${AF_VERSION_MAJOR}.${AF_VERSION_MINOR}.${AF_VERSION_PATCH}")
SET(AF_API_VERSION_CURRENT ${AF_VERSION_MAJOR}${AF_VERSION_MINOR})
diff --git a/CMakeModules/build_clBLAS.cmake b/CMakeModules/build_clBLAS.cmake
index faa415185e..d0a9e135bf 100644
--- a/CMakeModules/build_clBLAS.cmake
+++ b/CMakeModules/build_clBLAS.cmake
@@ -14,7 +14,7 @@ ENDIF()
ExternalProject_Add(
clBLAS-external
GIT_REPOSITORY https://github.com/arrayfire/clBLAS.git
- GIT_TAG 47662a6ac1186c756508109d7fef8827efab4504
+ GIT_TAG 102c832825e8e4d60ad73ca97e95668463294068
PREFIX "${prefix}"
INSTALL_DIR "${prefix}"
UPDATE_COMMAND ""
diff --git a/CMakeModules/build_forge.cmake b/CMakeModules/build_forge.cmake
index 5784b76f0f..21b8aac8ad 100644
--- a/CMakeModules/build_forge.cmake
+++ b/CMakeModules/build_forge.cmake
@@ -22,7 +22,7 @@ ENDIF()
ExternalProject_Add(
forge-ext
GIT_REPOSITORY https://github.com/arrayfire/forge.git
- GIT_TAG af3.1.2
+ GIT_TAG af3.2.0
PREFIX "${prefix}"
INSTALL_DIR "${prefix}"
UPDATE_COMMAND ""
diff --git a/CMakeModules/osx_install/OSXInstaller.cmake b/CMakeModules/osx_install/OSXInstaller.cmake
index 4a1fc97845..dc3a8b2491 100644
--- a/CMakeModules/osx_install/OSXInstaller.cmake
+++ b/CMakeModules/osx_install/OSXInstaller.cmake
@@ -76,7 +76,7 @@ PKG_BUILD( PKG_NAME ArrayFireCPU
SCRIPT_DIR ${OSX_INSTALL_DIR}/cpu_scripts
IDENTIFIER com.arrayfire.pkg.arrayfire.cpu.lib
PATH_TO_FILES package/lib
- FILTERS opencl cuda)
+ FILTERS opencl cuda unified)
PKG_BUILD( PKG_NAME ArrayFireCUDA
DEPENDS afcuda
@@ -85,7 +85,7 @@ PKG_BUILD( PKG_NAME ArrayFireCUDA
SCRIPT_DIR ${OSX_INSTALL_DIR}/cuda_scripts
IDENTIFIER com.arrayfire.pkg.arrayfire.cuda.lib
PATH_TO_FILES package/lib
- FILTERS cpu opencl)
+ FILTERS cpu opencl unified)
PKG_BUILD( PKG_NAME ArrayFireOPENCL
DEPENDS afopencl
@@ -93,7 +93,15 @@ PKG_BUILD( PKG_NAME ArrayFireOPENCL
INSTALL_LOCATION /usr/local/lib
IDENTIFIER com.arrayfire.pkg.arrayfire.opencl.lib
PATH_TO_FILES package/lib
- FILTERS cpu cuda)
+ FILTERS cpu cuda unified)
+
+PKG_BUILD( PKG_NAME ArrayFireUNIFIED
+ DEPENDS af
+ TARGETS unified_package
+ INSTALL_LOCATION /usr/local/lib
+ IDENTIFIER com.arrayfire.pkg.arrayfire.unified.lib
+ PATH_TO_FILES package/lib
+ FILTERS cpu cuda opencl)
PKG_BUILD( PKG_NAME ArrayFireHeaders
TARGETS header_package
@@ -107,5 +115,5 @@ PKG_BUILD( PKG_NAME ArrayFireExtra
IDENTIFIER com.arrayfire.pkg.arrayfire.extra
PATH_TO_FILES package/share)
-PRODUCT_BUILD(DEPENDS ${cpu_package} ${cuda_package} ${opencl_package} ${header_package} ${extra_package})
+PRODUCT_BUILD(DEPENDS ${cpu_package} ${cuda_package} ${opencl_package} ${unified_package} ${header_package} ${extra_package})
diff --git a/CMakeModules/osx_install/distribution.dist b/CMakeModules/osx_install/distribution.dist
index 6fe9ba09cb..3dc82379c9 100644
--- a/CMakeModules/osx_install/distribution.dist
+++ b/CMakeModules/osx_install/distribution.dist
@@ -4,32 +4,55 @@
+
ArrayFireCPU.pkg
ArrayFireCUDA.pkg
ArrayFireOPENCL.pkg
+ ArrayFireUNIFIED.pkg
ArrayFireHeaders.pkg
-
-
-
+
+
+
+
-
+
-
+
-
+
+
+
+
diff --git a/README.md b/README.md
index b5aa3b0eef..695adbed03 100644
--- a/README.md
+++ b/README.md
@@ -20,12 +20,10 @@ ArrayFire binary installers can be downloaded at the [ArrayFire Downloads](http:
* Email:
### Build Status
-| | Build | Tests |
-|-----------------|-----------------|-----------------|
-| Linux x86 | [](http://ci.arrayfire.org/job/arrayfire-linux/branch/devel/) | [](http://ci.arrayfire.org/job/arrayfire-linux-test/branch/devel/) |
-| Linux Tegra | [](http://ci.arrayfire.org/job/arrayfire-tegra/branch/devel/) | [](http://ci.arrayfire.org/job/arrayfire-tegra-test/branch/devel/) |
-| Windows | [](http://ci.arrayfire.org/job/arrayfire-windows/branch/devel/) | [](http://ci.arrayfire.org/job/arrayfire-windows-test/branch/devel/) |
-| OSX | [](http://ci.arrayfire.org/job/arrayfire-osx/branch/devel/) | [](http://ci.arrayfire.org/job/arrayfire-osx-test/branch/devel/) |
+| | Linux x86 | Linux armv7l | Linux aarch64 | Windows | OSX |
+|:-------:|:---------:|:------------:|:-------------:|:-------:|:---:|
+| Build | [](http://ci.arrayfire.org/job/arrayfire-linux/branch/devel/) | [](http://ci.arrayfire.org/job/arrayfire-tegrak1/branch/devel/) | [](http://ci.arrayfire.org/job/arrayfire-tegrax1/branch/devel/) | [](http://ci.arrayfire.org/job/arrayfire-windows/branch/devel/) | [](http://ci.arrayfire.org/job/arrayfire-osx/branch/devel/) |
+| Test | [](http://ci.arrayfire.org/job/arrayfire-linux-test/branch/devel/) | [](http://ci.arrayfire.org/job/arrayfire-tegrak1-test/branch/devel/) | [](http://ci.arrayfire.org/job/arrayfire-tegrax1-test/branch/devel/) | [](http://ci.arrayfire.org/job/arrayfire-windows-test/branch/devel/) | [](http://ci.arrayfire.org/job/arrayfire-osx-test/branch/devel/) |
Test coverage: [](https://coveralls.io/r/arrayfire/arrayfire?branch=HEAD)
diff --git a/assets b/assets
index d5b0b7cd5d..7c2a12739a 160000
--- a/assets
+++ b/assets
@@ -1 +1 @@
-Subproject commit d5b0b7cd5d44299458696571df7fb1aa7d99701e
+Subproject commit 7c2a12739ac0f5830d26334731e9ac96ba01e2d7
diff --git a/docs/details/algorithm.dox b/docs/details/algorithm.dox
index d2d0d50bd7..a823572b59 100644
--- a/docs/details/algorithm.dox
+++ b/docs/details/algorithm.dox
@@ -15,6 +15,15 @@ This function performs the operation across all batches present in the input sim
Find the sum of values in the input
+This table defines the return value types for the corresponding input types
+
+Input Type | Output Type
+--------------------|---------------------
+f32, f64, c32, c64 | same as input
+s32, u32, s64, u64 | same as input
+s16 | s32
+u16, u8, b8 | u32
+
\copydoc batch_detail_algo
@@ -25,6 +34,15 @@ Find the sum of values in the input
Find the product of values in the input
+This table defines the return value types for the corresponding input types
+
+Input Type | Output Type
+--------------------|---------------------
+f32, f64, c32, c64 | same as input
+s32, u32, s64, u64 | same as input
+s16 | s32
+u16, u8, b8 | u32
+
\copydoc batch_detail_algo
@@ -55,6 +73,8 @@ Find the maximum values and their locations
Find if of all of the values in input are true
+Return type is b8 for all input types
+
\copydoc batch_detail_algo
@@ -65,6 +85,8 @@ Find if of all of the values in input are true
Find if of any of the values in input are true
+Return type is b8 for all input types
+
\copydoc batch_detail_algo
@@ -75,6 +97,8 @@ Find if of any of the values in input are true
Count the number of non-zero elements in the input
+Return type is u32 for all input types
+
\copydoc batch_detail_algo
@@ -85,6 +109,15 @@ Count the number of non-zero elements in the input
Perform exclusive sum along specified dimension
+This table defines the return value types for the corresponding input types
+
+Input Type | Output Type
+--------------------|---------------------
+f32, f64, c32, c64 | same as input
+s32, u32, s64, u64 | same as input
+s16 | s32
+u16, u8, b8 | u32
+
\copydoc batch_detail_algo
@@ -95,6 +128,8 @@ Perform exclusive sum along specified dimension
Locate the indices of non-zero elements
+Return type is u32 for all input types
+
The locations are provided by flattening the input into a linear array.
@@ -135,7 +170,8 @@ Sort an multi dimensional array
Sort input arrays get the sorted indices
-Sort a multi dimensional array and return sorted indices
+Sort a multi dimensional array and return sorted indices. Index array is of
+type u32.
diff --git a/docs/details/backend.dox b/docs/details/backend.dox
new file mode 100644
index 0000000000..fafa453e6f
--- /dev/null
+++ b/docs/details/backend.dox
@@ -0,0 +1,66 @@
+/**
+\addtogroup arrayfire_func
+@{
+
+\defgroup unified_func_setbackend setBackend
+
+\brief Set the current backend when using Unified backend
+
+This is a noop when using one of CPU, CUDA, or OpenCL backend.
+
+However, when using on of those 3 but trying to set it to a different backend
+will return in an exception.
+
+\ingroup unified_func
+\ingroup arrayfire_func
+
+=======================================================================
+
+\defgroup unified_func_getbackendcount getBackendCount
+
+\brief Get the number of backends whose libraries were successfully loaded.
+
+This will be between 0-3. 0 Being no backends were loaded and 3 being all
+backends loaded successfully.
+
+\ingroup unified_func
+\ingroup arrayfire_func
+
+=======================================================================
+
+\defgroup unified_func_getavailbackends getAvailableBackends
+
+\brief Returns an integer indicating the backends loaded successfully.
+
+The number returned denotes the backends available according to the table:
+
+Return Value | Backends Available
+-------------|-----------------------
+0 | None
+1 | CPU
+2 | CUDA
+3 | CPU and CUDA
+4 | OpenCL
+5 | CPU and OpenCL
+6 | CUDA and OpenCL
+7 | CPU, CUDA and OpenCL
+
+\ingroup unified_func
+\ingroup arrayfire_func
+
+=======================================================================
+
+\defgroup unified_func_getbackendid getBackendId
+
+\brief Get's the backend enum for an array
+
+This will return one of the values from the \ref af_backend enum.
+The return value specifies which backend the array was created on.
+
+\ingroup unified_func
+\ingroup arrayfire_func
+
+=======================================================================
+
+@}
+*/
diff --git a/docs/details/image.dox b/docs/details/image.dox
index 4e1b0a5cdc..234f4f72e9 100644
--- a/docs/details/image.dox
+++ b/docs/details/image.dox
@@ -329,6 +329,9 @@ distance as well as the color distance.
The bilateral filter requires the size of the filter (in pixels) and the upper
bound on color values, N, where pixel values range from 0–N inclusively.
+The return type of the array is f64 for f64 input, f32 for all other input
+types.
+
=======================================================================
\defgroup image_func_erode erode
diff --git a/docs/details/index.dox b/docs/details/index.dox
index 85386b25db..90b9924d5e 100644
--- a/docs/details/index.dox
+++ b/docs/details/index.dox
@@ -14,6 +14,12 @@
\brief Copy and write values in the locations specified by the sequences
+\ingroup index_mat
+
+\defgroup index_func_util util
+
+\brief Utility functions to create objects of type \ref af_index_t
+
\ingroup index_mat
@}
*/
diff --git a/docs/details/vision.dox b/docs/details/vision.dox
index af0f11437e..1d9d6b99ac 100644
--- a/docs/details/vision.dox
+++ b/docs/details/vision.dox
@@ -90,6 +90,29 @@ before using it, make sure you have the appropriate permission to do so.
=======================================================================
+\defgroup cv_func_gloh gloh
+\ingroup featdescriptor_mat
+
+\brief SIFT feature detector and GLOH descriptor extractor
+
+Detects features using the Scale Invariant Feature Transform (SIFT),
+by David Lowe. Descriptors are extracted using Gradient Location and
+Orientation Histogram (GLOH).
+
+Lowe, D. G., "Distinctive Image Features from Scale-Invariant Keypoints",
+International Journal of Computer Vision, 60, 2, pp. 91-110, 2004.
+
+Mikolajczyk, K., and Schmid, C., "A performance evaluation of local
+descriptors", IEEE Transactions on Pattern Analysis and Machine Intelligence,
+10, 27, pp. 1615-1630, 2005.
+
+WARNING: Although GLOH is free of patents, the SIFT algorithm, used to detect
+features that will later be used by GLOH descriptors, is patented by the
+University of British Columbia, before using it, make sure you have the
+appropriate permission to do so.
+
+=======================================================================
+
\defgroup cv_func_hamming_matcher hammingMatcher
\ingroup featmatcher_mat
@@ -147,6 +170,30 @@ Template matching is an image processing technique to find small patches of an i
match a given template image. A more in depth discussion on the topic can be found
[here](http://en.wikipedia.org/wiki/Template_matching).
+=======================================================================
+
+\defgroup cv_func_homography homography
+\ingroup homography_mat
+
+\brief Homography Estimation
+
+Homography estimation find a perspective transform between two sets of 2D points.
+Currently, two methods are supported for the estimation, RANSAC (RANdom SAmple Consensus)
+and LMedS (Least Median of Squares). Both methods work by randomly selecting a subset
+of 4 points of the set of source points, computing the eigenvectors of that set and
+finding the perspective transform. The process is repeated several times, a maximum of
+times given by the value passed to the iterations arguments for RANSAC (for the CPU
+backend, usually less than that, depending on the quality of the dataset, but for CUDA
+and OpenCL backends the transformation will be computed exactly the amount of times
+passed via the iterations parameter), the returned value is the one that matches the
+best number of inliers, which are all of the points that fall within a maximum L2
+distance from the value passed to the inlier_thr argument. For the LMedS case, the
+number of iterations is currently hardcoded to meet the following equation:
+
+\f$ m = \frac{log(1 - P)}{log[1 - {(1 - \epsilon)}^{p}]}\f$,
+
+where \f$ P = 0.99\f$, \f$ \epsilon = 40\%\f$ and \f$ p = 4\f$.
+
@}
diff --git a/docs/layout.xml b/docs/layout.xml
index d637c7f55a..3a66b563e4 100644
--- a/docs/layout.xml
+++ b/docs/layout.xml
@@ -3,10 +3,12 @@
+
+
diff --git a/docs/pages/INSTALL.md b/docs/pages/INSTALL.md
index 3d9983aff9..dabb10b318 100644
--- a/docs/pages/INSTALL.md
+++ b/docs/pages/INSTALL.md
@@ -2,12 +2,21 @@ ArrayFire binary installation instructions {#installing}
=====
Installing ArrayFire couldn't be easier. We ship installers for Windows,
-OSX, and several variants of Linux. In general the installation procedure
-proceeds like this:
+OSX, and Linux. Although you could
+[build ArrayFire from source](https://github.com/arrayfire/arrayfire), we
+suggest using our pre-compiled binaries as they include the Intel Math
+Kernel Library to accelerate linear algebra functions.
-1. [Download](http://arrayfire.com/download/) the ArrayFire installer for your
+Please note that although our download page requires a valid login, registration
+is free and downloading ArrayFire is also free. We request your contact
+information so that we may notify you of software updates and occasionally
+collect user feedback about our library.
+
+In general, the installation process for ArrayFire looks like this:
+
+1. Install prerequisites
+2. [Download](http://arrayfire.com/download/) the ArrayFire installer for your
operating system
-2. Install prerequisites
3. Install ArrayFire
4. Test the installation
5. [Where to go for help?](#GettingHelp)
@@ -16,107 +25,138 @@ Below you will find instructions for
* [Windows](#Windows)
* Linux including
- * [Debian (.deb) 8](#Debian)
- * [Ubuntu (.deb) 14.10 and later](#Ubuntu)
- * [Fedora (.rpm) 21](#Fedora)
+ * [Debian 8](#Debian)
+ * [Ubuntu 14.04 and later](#Ubuntu)
+ * [RedHat, Fedora, and CentOS](#RPM-distros)
* [Mac OSX (.sh and brew)](#OSX)
# Windows
-Simply [download](http://arrayfire.com/download/) and run the installer.
If you wish to use CUDA or OpenCL please ensure that you have also installed
support for these technologies from your video card vendor's website.
+Next [download](http://arrayfire.com/download/) and run the ArrayFire installer.
+After it has completed, you need to add ArrayFire to the path for all users.
+
+1. Open Advanced System Settings:
+ * Windows 8: Move the Mouse pointer to the bottom right corner of the
+ screen, Right click, choose System. Then click "Advanced System Settings"
+ * Windows 7: Open the Start Menu and Right Click on "Computer". Then choose
+ Properties and click "Advanced System Settings"
+2. In Advanced System Settings window, click on Advanced tab
+3. Click on Environment Variables, then under System Variables, find PATH, and
+ click on it.
+4. In edit mode, append %AF_PATH%/lib. NOTE: Ensure that there is a semi-colon
+ separating %AF_PATH%/lib from any existing content (e.g.
+ EXISTING_PATHS;%AF_PATH%/lib;) otherwise other software may not function
+ correctly.
+
+Finally, verify that the path addition worked correctly. You can do this by:
+
+1. Open Visual Studio 2013. Open the HelloWorld solution which is located at
+ `%AF_PATH%/examples/helloworld/helloworld.exe`.
+2. Build and run the helloworld example. Be sure to, select the
+ platform/configuration of your choice using the platform drop-down (the
+ options are CPU, CUDA, and OpenCL) and Solution Configuration drop down
+ (options of Release and Debug) menus. Run the helloworld example
+
# Linux
## Debian 8
-First [download](http://arrayfire.com/download/) ArrayFire. Then, using the
-`gdebi` package manager, you can install ArrayFire and all dependencies as
-follows:
-
- gdebi arrayfire*.deb
-
-If you prefer to use the `.sh` installer, it and all prerequisite packages
-may be installed as follows:
+First install the prerequisite packages:
# Prerequisite packages:
- apt-get install libfreeimage-dev libatlas3gf-base libfftw3-dev cmake
+ apt-get install libfreeimage-dev libatlas3gf-base libfftw3-dev libglew-dev libglewmx-dev libglfw3-dev cmake
# Enable GPU support (OpenCL):
apt-get install ocl-icd-libopencl1
- # Run Installer
- ./arrayfire_3.0.0_Linux_x86_64.sh --exclude-subdir --prefix=/usr/local
+If you wish to use CUDA, please
+[download the latest version of CUDA](https://developer.nvidia.com/cuda-zone)
+and install it on your system.
-To enable CUDA support, edit `/etc/apt/sources.list` and append `non-free`
-to the line containing `deb http://.../debian jessie main`. Then, as root, run
+Next [download](http://arrayfire.com/download/) ArrayFire. After you have the
+file, run the installer.
- apt-get update
- apt-get install nvidia-cuda-dev
+ ./arrayfire_*_Linux_x86_64.sh --exclude-subdir --prefix=/usr/local
-## Fedora 21
+## RedHat, Fedora, and CentOS
-First [download](http://arrayfire.com/download/) ArrayFire. Then, using the
-`yum` package manager, you can install ArrayFire and all dependencies as
-follows:
-
- yum --nogpgcheck localinstall arrayfire*.rpm
-
-Or with the self-extracting installer
+First install the prerequisite packages:
# Install prerequiste packages
- yum install freeimage atlas fftw cmake
+ yum install freeimage atlas fftw libGLEW libGLEWmx glfw cmake
- # Run Installer
- ./arrayfire_3.0.0_Linux_x86_64.sh --exclude-subdir --prefix=/usr/local
+On Centos and Redhat the `glfw` package is outdated and you will need to compile
+it from source. Please
+[these instructions](https://github.com/arrayfire/arrayfire/wiki/GLFW-for-ArrayFire).
-## Ubuntu 14.10 and later
+If you wish to use CUDA, please
+[download the latest version of CUDA](https://developer.nvidia.com/cuda-downloads)
+and install it on your system.
-First [download](http://arrayfire.com/download/) ArrayFire. Then, using the
-`gdebi` package manager, you can install ArrayFire and all dependencies as
-follows:
+Next [download](http://arrayfire.com/download/) ArrayFire. After you have the
+file, run the installer.
- sudo apt-get install gdebi
- gdebi arrayfire*.deb
+ ./arrayfire_*_Linux_x86_64.sh --exclude-subdir --prefix=/usr/local
-If you prefer to use the `.sh` installer, it and all prerequisite packages
-may be installed as follows:
+## Ubuntu 14.04 and later
+
+First install the prerequisite packages:
# Prerequisite packages:
sudo apt-get install libfreeimage-dev libatlas3gf-base libfftw3-dev cmake
- # Enable GPU support (OpenCL and/or CUDA):
- sudo apt-get install ocl-icd-libopencl1
- sudo apt-get install nvidia-cuda-dev
+Ubuntu 14.04 will not have the libglfw3-dev package in its repositories. You can either build the library from source (following the instructions listed) or install the library from a PPA as follows:
- # Run Installer
- sudo ./arrayfire_3.0.0_Linux_x86_64.sh --exclude-subdir --prefix=/usr/local
+```
+sudo apt-add repository ppa:keithw/glfw3
+sudo apt-get update
+sudo apt-get install glfw3
+```
-# Mac OSX
+After this point, the installation should proceed identically to Ubuntu 14.10 or newer.
-## Self-extracting zip from ArrayFire website
+If your system has a CUDA GPU, we suggest downloading the latest drivers
+from NVIDIA in the form of a Debian package and installing using the
+package manager. At present, CUDA downloads can be found on the
+[NVIDIA CUDA download page](https://developer.nvidia.com/cuda-downloads)
+Follow NVIDIA's instructions for getting CUDA set up.
-On OSX there are several dependencies that are not integrated into the
-operating system. It is easiest to install these using [Homebrew](http://brew.sh/),
-but you can also build them yourself if you prefer.
+If you wish to use OpenCL, simply install the OpenCL ICD loader along
+with any drivers required for your hardware.
+
+ # Enable GPU support (OpenCL):
+ apt-get install ocl-icd-libopencl1
+
+### Special instructions for Tegra K1
+If you are using ArrayFire on the Tegra K1 also install these packages:
-First [download](http://arrayfire.com/download/) ArrayFire. You may install
-ArrayFire to `/usr/local` from XTerm using the following commands:
+ sudo apt-get install libatlas3gf-base libatlas-dev libfftw3-dev liblapacke-dev
- brew install boost fftw cmake freeimage
+In addition to these packages, you will need to compile GLFW3 from source
+using the instructions above.
- sudo ./arrayfire_3.0.0_Linux_x86_64.sh --exclude-subdir --prefix=/usr/local
+Finally, [download](http://arrayfire.com/download/) ArrayFire. After you have
+the file, run the installer using:
-## Brew installation
+ ./arrayfire_*_Linux_x86_64.sh --exclude-subdir --prefix=/usr/local
+
+# Mac OSX
+
+On OSX there are several dependencies that are not integrated into the
+operating system. The ArrayFire installer automatically satisfies these
+dependencies using [Homebrew](http://brew.sh/).
+If you don't have Homebrew installed on your system, the ArrayFire installer
+will ask you do to so.
-GitHub user [sutoiku](https://github.com/sutoiku) has been kind enough to
-write a brew installation script for ArrayFire. This installation method will
-download and compile ArrayFire and all prerequisites. Please remember to
-register on the ArrayFire website so we can keep you up to date about new
-versions of our software!
+Simply [download](http://arrayfire.com/download) the ArrayFire installer
+and double-click it to carry out the installation.
- brew install arrayfire
+ArrayFire can also be installed through Homebrew directly using
+`brew install arrayfire`; however, it will
+not include MKL acceleration of linear algebra functions.
## Testing installation
diff --git a/docs/pages/getting_started.md b/docs/pages/getting_started.md
index 451f994f60..6d1c7cdd3d 100644
--- a/docs/pages/getting_started.md
+++ b/docs/pages/getting_started.md
@@ -17,6 +17,8 @@ underlying data may be one of various [basic types](\ref af::af_dtype):
* [c64](\ref c64) complex double-precision (`cdouble`)
* [s64](\ref s64) 64-bit signed integer (`intl`)
* [u64](\ref u64) 64-bit unsigned integer (`uintl`)
+* [s16](\ref s16) 16-bit signed integer (`short`)
+* [u16](\ref u16) 16-bit unsigned integer (`unsigned short`)
Older devices may not support double precision operations.
diff --git a/docs/pages/gfor.md b/docs/pages/gfor.md
index 28410a7f18..a7ed9a195d 100644
--- a/docs/pages/gfor.md
+++ b/docs/pages/gfor.md
@@ -74,14 +74,6 @@ gfor (seq k, 0, n-1) {
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
-array A = constant(1,n,n,m);
-array B = constant(1,n,n);
-gfor (seq k, 0,m-1) {
- A(span,span,k) = A(span,span,k) * B; // matrix-matrix multiply
-}
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
array A = randu(n,m);
array B = constant(0,n,m);
@@ -122,30 +114,6 @@ gfor (seq ii, n)
H(span,ii) = compute(A(span,ii), B(span,ii), ep);
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Multiplications {#gfor_mul}
----------------
-
-ArrayFire supports bulk multiplications of vector-vector, matrix-vector, and
-matrix-matrix types using GFOR. This is especially useful with many small
-matrices.
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.cpp}
-array A = constant(1,n,n);
-array B = constant(1,n,1);
-array C = constant(0,n,m);
-gfor (seq k, n)
- B(k) = A(k,span) * A(span,k); // vector-vector multiply
-
-A = constant(1,n,n,m);
-gfor (seq k, m)
- C(span,k) = A(span,span,k) * B; // matrix-vector multiply
-
-A = constant(1,n,n,m);
-B = constant(1,n,n);
-gfor (seq k, m)
- A(span,span,k) = A(span,span,k) * B; // matrix-matrix multiply
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
The Iterator {#gfor_iterator}
------------
diff --git a/docs/pages/matrix_manipulation.md b/docs/pages/matrix_manipulation.md
index 8fd7b35355..35b2b9a61f 100644
--- a/docs/pages/matrix_manipulation.md
+++ b/docs/pages/matrix_manipulation.md
@@ -2,30 +2,272 @@ Matrix Manipulation {#matrixmanipulation}
===================
Many different kinds of [matrix manipulation routines](\ref manip_mat) are available:
-* tile() to repeat a matrix along dimensions
-* join() to concatenate two matrices along a dimension
+* flat() - flatten an array to one dimension
+* flip() - flip an array along a dimension
+* join() - join up to 4 arrays
+* moddims() - change the dimensions of an array without changing the data
+* reorder() - changes the dimension order within the array
+* shift() - shifts data along a dimension
+* tile() - repeats an array along a dimension
+* transpose() - performs a matrix transpose
* [array()](\ref af::array) to adjust the dimensions of an array
-* [transpose](\ref af::array::T) a matrix or vector
+* [transpose](\ref af::array::T) a matrix or vector with shorthand notation
-tile() allows you to repeat a matrix along specified
-dimensions, effectively 'tiling' the matrix. Please note that the
-dimensions passed in indicate the number of times to replicate the
-matrix in each dimension, not the final dimensions of the matrix.
+### flat()
+The __flat()__ function flattens an array to one dimension.
+```
+a [3 3 1 1]
+ 1.0000 4.0000 7.0000
+ 2.0000 5.0000 8.0000
+ 3.0000 6.0000 9.0000
-\snippet test/matrix_manipulation.cpp ex_matrix_manipulation_tile
+flat(a) [9 1 1 1]
+ 1.0000
+ 2.0000
+ 3.0000
+ 4.0000
+ 5.0000
+ 6.0000
+ 7.0000
+ 8.0000
+ 9.0000
-join() allows you to joining two matrices together. Matrix
-dimensions must match along every dimension except the dimension
-of joining (dimensions are 0-indexed). For example, a 2x3 matrix
-can be joined with a 2x4 matrix along dimension 1, but not along
-dimension 0 since {3,4} don`t match up.
+```
+The flat function has the following overloads:
+* __array af::flat(const array& in)__ -- flatten an array
+* __af_err af_flat(af_array* out, const af_array in)__ -- C interface for flat() function
-\snippet test/matrix_manipulation.cpp ex_matrix_manipulation_join
-Construct a regular mesh grid from vectors `x` and `y`. For example, a
-mesh grid of the vectors {1,2,3,4} and {5,6} would result in two matrices:
+### flip()
+The __flip()__ function flips the contents of an array along a chosen dimension.
+```
+a [5 2 1 1]
+ 1.0000 6.0000
+ 2.0000 7.0000
+ 3.0000 8.0000
+ 4.0000 9.0000
+ 5.0000 10.0000
-\snippet test/matrix_manipulation.cpp ex_matrix_manipulation_mesh
+flip(a, 0) [5 2 1 1]
+ 5.0000 10.0000
+ 4.0000 9.0000
+ 3.0000 8.0000
+ 2.0000 7.0000
+ 1.0000 6.0000
+
+flip(a, 1) [5 2 1 1]
+ 6.0000 1.0000
+ 7.0000 2.0000
+ 8.0000 3.0000
+ 9.0000 4.0000
+ 10.0000 5.0000
+```
+The flip function has the following overloads:
+* __array af::flip(const array &in, const unsigned dim)__ -- flips an array along a dimension
+* __af_err af_flip(af_array *out, const af_array in, const unsigned dim)__ -- C interface for flip()
+
+### join()
+The __join()__ function can join up to 4 arrays together.
+```
+a [5 1 1 1]
+ 1.0000
+ 2.0000
+ 3.0000
+ 4.0000
+ 5.0000
+
+join(0, a, a) [10 1 1 1]
+ 1.0000
+ 2.0000
+ 3.0000
+ 4.0000
+ 5.0000
+ 1.0000
+ 2.0000
+ 3.0000
+ 4.0000
+ 5.0000
+
+join(1, a, a) [5 2 1 1]
+ 1.0000 1.0000
+ 2.0000 2.0000
+ 3.0000 3.0000
+ 4.0000 4.0000
+ 5.0000 5.0000
+```
+The join function has several overloads:
+* __array af::join(const int dim, const array &first, const array &second)__ -- Joins 2 arrays along a dimension
+
+* __array af::join(const int dim, const array &first, const array &second, const array &third)__ -- Joins 3 arrays along a dimension.
+
+* __array af::join(const int dim, const array &first, const array &second, const array &third, const array &fourth)__ -- Joins 4 arrays along a dimension
+
+* __af_err af_join(af_array *out, const int dim, const af_array first, const af_array second)__ -- C interface function to join 2 arrays along a dimension
+
+* __af_err af_join_many(af_array *out, const int dim, const unsigned n_arrays, const af_array *inputs)__ -- C interface function to join up to 10 arrays along a dimension
+
+### moddims()
+The __moddims()__ function changes the dimensions of an array without changing its data or order. It is important to remember that the function only modifies the _metadata_ associated with the array and does not actually modify the content of the array.
+```
+a [8 1 1 1]
+ 1.0000
+ 2.0000
+ 1.0000
+ 2.0000
+ 1.0000
+ 2.0000
+ 1.0000
+ 2.0000
+
+af::dim4 new_dims(2, 4);
+moddims(a, new_dims) [2 4 1 1]
+ 1.0000 1.0000 1.0000 1.0000
+ 2.0000 2.0000 2.0000 2.0000
+
+moddims(a, a.elements(), 1, 1, 1) [8 1 1 1]
+ 1.0000
+ 2.0000
+ 1.0000
+ 2.0000
+ 1.0000
+ 2.0000
+ 1.0000
+ 2.0000
+```
+The moddims function has several overloads:
+* __array af::moddims(const array &in, const unsigned ndims, const dim_t *const dims)__ -- mods number of dimensions to match _ndims_ as specidied in the array _dims_
+* __array af::moddims(const array &in, const dim4 &dims)__ -- mods dimensions as specified by _dims_
+* __array af::moddims(const array &in, const dim_t d0, const dim_t d1=1, const dim_t d2=1, const dim_t d3=1)__ -- mods dimensions of an array
+* __af_err af_moddims(af_array *out, const af_array in, const unsigned ndims, const dim_t *const dims)__ -- C interface to mod dimensions of an array
+
+### reorder()
+The __reorder()__ function changes the order of the dimensions within the array. This actually alters the underlying data of the array.
+```
+a [2 2 3 1]
+ 1.0000 3.0000
+ 2.0000 4.0000
+
+ 1.0000 3.0000
+ 2.0000 4.0000
+
+ 1.0000 3.0000
+ 2.0000 4.0000
+
+
+reorder(a, 1, 0, 2) [2 2 3 1] //equivalent to a transpose
+ 1.0000 2.0000
+ 3.0000 4.0000
+
+ 1.0000 2.0000
+ 3.0000 4.0000
+
+ 1.0000 2.0000
+ 3.0000 4.0000
+
+
+reorder(a, 2, 0, 1) [3 2 2 1]
+ 1.0000 2.0000
+ 1.0000 2.0000
+ 1.0000 2.0000
+
+ 3.0000 4.0000
+ 3.0000 4.0000
+ 3.0000 4.0000
+```
+The reorder function the following several overloads:
+* __array af::reorder(const array &in, const unsigned x, const unsigned y=1, const unsigned z=2, const unsigned w=3)__ -- Reorders dimensions of an array
+
+* __af_err af_reorder(af_array *out, const af_array in, const unsigned x, const unsigned y, const unsigned z, const unsigned w)__ -- C interface for reordering function
+
+### shift()
+The __shift()__ function shifts data in a circular buffer fashion along a chosen dimension.
+```
+a [3 5 1 1]
+ 0.0000 0.0000 0.0000 0.0000 0.0000
+ 3.0000 4.0000 5.0000 1.0000 2.0000
+ 3.0000 4.0000 5.0000 1.0000 2.0000
+
+shift(a, 0, 2 ) [3 5 1 1]
+ 0.0000 0.0000 0.0000 0.0000 0.0000
+ 1.0000 2.0000 3.0000 4.0000 5.0000
+ 1.0000 2.0000 3.0000 4.0000 5.0000
+
+shift(a, -1, 2 ) [3 5 1 1]
+ 1.0000 2.0000 3.0000 4.0000 5.0000
+ 1.0000 2.0000 3.0000 4.0000 5.0000
+ 0.0000 0.0000 0.0000 0.0000 0.0000
+```
+The shift function has the following overloads:
+* __array af::shift(const array &in, const int x, const int y=0, const int z=0, const int w=0)__ -- Shifts array along specified dimensions
+
+* __af_err af_shift(af_array *out, const af_array in, const int x, const int y, const int z, const int w)__ -- C interface for shifting an array
+
+### tile()
+The __tile()__ function repeats an array along a dimension
+```
+a [3 1 1 1]
+ 1.0000
+ 2.0000
+ 3.0000
+
+tile(a, 2) [6 1 1 1]
+ 1.0000
+ 2.0000
+ 3.0000
+ 1.0000
+ 2.0000
+ 3.0000
+
+tile(a, 2, 2) [6 2 1 1]
+ 1.0000 1.0000
+ 2.0000 2.0000
+ 3.0000 3.0000
+ 1.0000 1.0000
+ 2.0000 2.0000
+ 3.0000 3.0000
+
+af::dim4 tile_dims(1, 2, 3);
+tile(a, tile_dims) [3 2 3 1]
+ 1.0000 1.0000
+ 2.0000 2.0000
+ 3.0000 3.0000
+
+ 1.0000 1.0000
+ 2.0000 2.0000
+ 3.0000 3.0000
+
+ 1.0000 1.0000
+ 2.0000 2.0000
+ 3.0000 3.0000
+
+```
+The tile function has several overloads:
+* __array af::tile(const array &in, const unsigned x, const unsigned y=1, const unsigned z=1, const unsigned w=1)__ -- Tiles array along specified dimensions
+* __array af::tile(const array &in, const dim4 &dims)__ -- Tile an array according to a dim4 object
+* __af_err af_tile(af_array *out, const af_array in, const unsigned x, const unsigned y, const unsigned z, const unsigned w)__ -- C interface for tiling an array
+
+### transpose()
+The __transpose()__ function performs a standard matrix transpose. The input array must have the dimensions of a 2D-matrix.
+```
+a [3 3 1 1]
+ 1.0000 3.0000 3.0000
+ 2.0000 1.0000 3.0000
+ 2.0000 2.0000 1.0000
+
+transpose(a) [3 3 1 1]
+ 1.0000 2.0000 2.0000
+ 3.0000 1.0000 2.0000
+ 3.0000 3.0000 1.0000
+
+```
+The transpose function has several overloads:
+* __array af::transpose(const array &in, const bool conjugate=false)__ -- Transposes a matrix.
+
+* __void af::transposeInPlace(array &in, const bool conjugate=false)__ -- Transposes a matrix in-place.
+
+* __af_err af_transpose(af_array *out, af_array in, const bool conjugate)__ -- C interface to transpose a matrix.
+
+* __af_err af_transpose_inplace(af_array in, const bool conjugate)__ -- C interface to transpose a matrix in-place.
[array()](\ref af::array) can be used to create a (shallow) copy of a matrix
with different dimensions. The number of elements must remain the same as
@@ -37,3 +279,25 @@ The [T()](\ref af::array::T) and [H()](\ref af::array::H) methods can be
used to form the [matrix or vector transpose](\ref af::array::T) .
\snippet test/matrix_manipulation.cpp ex_matrix_manipulation_transpose
+
+### Combining re-ordering functions to enumerate grid coordinates
+By using a combination of the array restructuring functions, we can quickly code complex manipulation patterns with a few lines of code. For example, consider generating _(x,y)_ coordinates for a grid where each axis goes from *1 to n*. Instead of using several loops to populate our arrays we can just use a small combination of the above functions.
+```
+unsigned n=3;
+af::array xy = join(1
+ tile(seq(1, n), n)
+ flat( transpose(tile(seq(1, n), 1, n)) )
+ );
+xy [9 2 1 1]
+ 1.0000 1.0000
+ 2.0000 1.0000
+ 3.0000 1.0000
+ 1.0000 2.0000
+ 2.0000 2.0000
+ 3.0000 2.0000
+ 1.0000 3.0000
+ 2.0000 3.0000
+ 3.0000 3.0000
+```
+### Conclusion
+Functions provided by arrayfire offer ease and flexibility for efficiently manipulating the structure of arrays. The provided functions can be used as building blocks to generate, shift, or prepare data to any form imaginable!
diff --git a/docs/pages/release_notes.md b/docs/pages/release_notes.md
index 6bc53622ca..f1b195b184 100644
--- a/docs/pages/release_notes.md
+++ b/docs/pages/release_notes.md
@@ -1,6 +1,112 @@
Release Notes {#releasenotes}
==============
+v3.2.0
+=================
+
+Major Updates
+-------------
+
+* Added Unified backend
+ * Allows switching backends at runtime
+ * Read [Unified Backend](\ref unifiedbackend) for more.
+* Support for 16-bit integers (\ref s16 and \ref u16)
+ * All functions that support 32-bit interger types (\ref s32, \ref u32),
+ now also support 16-bit interger types
+
+Function Additions
+------------------
+* Unified Backend
+ * \ref setBackend() - Sets a backend as active
+ * \ref getBackendCount() - Gets the number of backends available for use
+ * \ref getAvailableBackends() - Returns information about available backends
+ * \ref getBackendId() - Gets the backend enum for an array
+
+* Vision
+ * \ref homography() - Homography estimation
+ * \ref gloh() - GLOH Descriptor for SIFT
+
+* Image Processing
+ * \ref loadImageNative() - Load an image as native data without modification
+ * \ref saveImageNative() - Save an image without modifying data or type
+
+* Graphics
+ * \ref af::Window::plot3() - 3-dimensional line plot
+ * \ref af::Window::surface() - 3-dimensional curve plot
+
+* Indexing
+ * \ref af_create_indexers()
+ * \ref af_set_array_indexer()
+ * \ref af_set_seq_indexer()
+ * \ref af_set_seq_param_indexer()
+ * \ref af_release_indexers()
+
+* CUDA Backend Specific
+ * \ref setNativeId() - Set the CUDA device with given native id as active
+ * ArrayFire uses a modified order for devices. The native id for a
+ device can be retreived using `nvidia-smi`
+
+* OpenCL Backend Specific
+ * \ref setDeviceId() - Set the OpenCL device using the `clDeviceId`
+
+Other Improvements
+------------------------
+* Added \ref c32 and \ref c64 support for \ref isNaN(), \ref isInf() and \ref iszero()
+* Added CPU information for `x86` and `x86_64` architectures in CPU backend's \ref info()
+* Batch support for \ref approx1() and \ref approx2()
+ * Now can be used with gfor as well
+* Added \ref s64 and \ref u64 support to:
+ * \ref sort() (along with sort index and sort by key)
+ * \ref setUnique(), \ref setUnion(), \ref setIntersect()
+ * \ref convolve() and \ref fftConvolve()
+ * \ref histogram() and \ref histEqual()
+ * \ref lookup()
+ * \ref mean()
+* Added \ref AF_MSG macro
+
+Build Improvements
+------------------
+* Submodules update is now automatically called if not cloned recursively
+* [Fixes for compilation](https://github.com/arrayfire/arrayfire/issues/766) on Visual Studio 2015
+* Option to use [fallback to CPU LAPACK](https://github.com/arrayfire/arrayfire/pull/1053)
+ for linear algebra functions in case of CUDA 6.5 or older versions.
+
+Bug Fixes
+--------------
+* Fixed [memory leak](https://github.com/arrayfire/arrayfire/pull/1096) in \ref susan()
+* Fixed [failing test](https://github.com/arrayfire/arrayfire/commit/144a2db)
+ in \ref lower() and \ref upper() for CUDA compute 53
+* Fixed [bug](https://github.com/arrayfire/arrayfire/issues/1092) in CUDA for indexing out of bounds
+* Fixed [dims check](https://github.com/arrayfire/arrayfire/commit/6975da8) in \ref iota()
+* Fixed [out-of-bounds access](https://github.com/arrayfire/arrayfire/commit/7fc3856) in \ref sift()
+* Fixed [memory allocation](https://github.com/arrayfire/arrayfire/commit/5e88e4a) in \ref fast() OpenCL
+* Fixed [memory leak](https://github.com/arrayfire/arrayfire/pull/994) in image I/O functions
+* \ref dog() now returns float-point type arrays
+
+Documentation Updates
+---------------------
+* Improved tutorials documentation
+ * More detailed Using on [Linux](\ref using_on_windows), [OSX](\ref using_on_windows),
+ [Windows](\ref using_on_windows) pages.
+* Added return type information for functions that return different type
+ arrays
+
+New Examples
+------------
+* Graphics
+ * [Plot3](\ref plot3.cpp)
+ * [Surface](\ref surface.cpp)
+* [Shallow Water Equation](\ref swe.cpp)
+* [Basic](\ref basic.cpp) as a Unified backend example
+
+Installers
+-----------
+* All installers now include the Unified backend and corresponding CMake files
+* Visual Studio projects include Unified in the Platform Configurations
+* Added installer for Jetson TX1
+* SIFT and GLOH do not ship with the installers as SIFT is protected by
+ patents that do not allow commercial distribution without licensing.
+
v3.1.3
==============
diff --git a/docs/pages/unified_backend.md b/docs/pages/unified_backend.md
new file mode 100644
index 0000000000..96bf94d0a3
--- /dev/null
+++ b/docs/pages/unified_backend.md
@@ -0,0 +1,212 @@
+Unified Backend {#unifiedbackend}
+==========
+
+[TOC]
+
+# Introduction
+
+The Unified backend was introduced in ArrayFire with version 3.2.
+While this is not an independent backend, it allows the user to switch between
+the different ArrayFire backends (CPU, CUDA and OpenCL) at runtime.
+
+# Compiling with Unified
+
+The steps to compile with the unified backend are the same as compiling with
+any of the other backends.
+The only change being that the executable needs to be linked with the __af__
+library (`libaf.so` (Linux), `libaf.dylib` (OSX), `af.lib` (Windows)).
+
+Check the Using with [Linux](\ref using_on_linux), [OSX](\ref using_on_osx),
+[Windows](\ref using_on_windows) for more details.
+
+To use with CMake, use the __ArrayFire_Unified_LIBRARIES__ variable.
+
+# Using the Unified Backend
+
+The Unified backend will try to dynamically load the backend libraries. The
+priority of backends is __CUDA -> OpenCL -> CPU__
+
+The most important aspect to note here is that all the libraries the ArrayFire
+libs depend on need to be in the environment paths
+
+* `LD_LIBRARY_PATH` -> Linux, Unix, OSX
+* `DYLD_LIBRARY_PATH` -> OSX
+* `PATH` -> Windows
+
+If any of the libs are missing, then the library will fail to load and the
+backend will be marked as unavailable.
+
+Optionally, The ArrayFire libs may be present in `AF_PATH` or `AF_BUILD_PATH`
+environment variables if the path is not in the system paths. These are
+treated as fallback paths in case the files are not found in the system paths.
+However, all the other upstream libraries for ArrayFire libs must be present
+in the system path variables shown above.
+
+### Special Mention: CUDA NVVM
+For the CUDA backend, ensure that the CUDA NVVM libs/dlls are in the path.
+These can be easily missed since CUDA installation does not add the paths by default.
+
+On Linux and OSX, add `/usr/local/cuda/nvvm/(lib or lib64)` to LD_LIBRARY_PATH or
+DYLD_LIBRARY_PATH.
+
+On Windows, you can set up a post build event that copys the NVVM dlls to
+the executable directory by using the following commands:
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.c}
+echo copy "$(CUDA_PATH)\nvvm\bin\nvvm64*.dll" "$(OutDir)"
+copy "$(CUDA_PATH)\nvvm\bin\nvvm64*.dll" "$(OutDir)"
+if errorlevel 1 (
+ echo "CUDA NVVM DLLs copy failed due to missing files."
+ exit /B 0
+)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+This ensures that the NVVM DLLs are copied if present, but does not fail the
+build if the copy fails. This is how ArrayFire ships it's examples.
+
+The other option is to set `%%CUDA_PATH%/nvvm/bin` in the PATH environment
+variable.
+
+# Switching Backends
+
+The af_backend enum stores the possible backends.
+To select a backend, call the af::setBackend function as shown below.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.c}
+af::setBackend(AF_BACKEND_OPENCL); // Sets CUDA as current backend
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To get the count of the number of backends available (the number of `libaf*`
+backend libraries loaded successfully), call the af::getBackendCount function.
+
+# Example
+
+This example is shortened form of [basic.cpp](\ref basic.cpp).
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.c}
+#include
+
+void testBackend()
+{
+ af::info();
+ af_print(af::randu(5, 4));
+}
+
+int main()
+{
+ try {
+ printf("Trying CPU Backend\n");
+ af::setBackend(AF_BACKEND_CPU);
+ testBackend();
+ } catch (af::exception& e) {
+ printf("Caught exception when trying CPU backend\n");
+ fprintf(stderr, "%s\n", e.what());
+ }
+
+ try {
+ printf("Trying CUDA Backend\n");
+ af::setBackend(AF_BACKEND_CUDA);
+ testBackend();
+ } catch (af::exception& e) {
+ printf("Caught exception when trying CUDA backend\n");
+ fprintf(stderr, "%s\n", e.what());
+ }
+
+ try {
+ printf("Trying OpenCL Backend\n");
+ af::setBackend(AF_BACKEND_OPENCL);
+ testBackend();
+ } catch (af::exception& e) {
+ printf("Caught exception when trying OpenCL backend\n");
+ fprintf(stderr, "%s\n", e.what());
+ }
+
+ return 0;
+}
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This output would be:
+
+ Trying CPU Backend
+ ArrayFire v3.2.0 (CPU, 64-bit Linux, build fc7630f)
+ [0] Intel: Intel(R) Core(TM) i7-4770K CPU @ 3.50GHz Max threads(8)
+ af::randu(5, 4)
+ [5 4 1 1]
+ 0.0000 0.2190 0.3835 0.5297
+ 0.1315 0.0470 0.5194 0.6711
+ 0.7556 0.6789 0.8310 0.0077
+ 0.4587 0.6793 0.0346 0.3834
+ 0.5328 0.9347 0.0535 0.0668
+
+ Trying CUDA Backend
+ ArrayFire v3.2.0 (CUDA, 64-bit Linux, build fc7630f)
+ Platform: CUDA Toolkit 7.5, Driver: 355.11
+ [0] Quadro K5000, 4093 MB, CUDA Compute 3.0
+ af::randu(5, 4)
+ [5 4 1 1]
+ 0.7402 0.4464 0.7762 0.2920
+ 0.9210 0.6673 0.2948 0.3194
+ 0.0390 0.1099 0.7140 0.8109
+ 0.9690 0.4702 0.3585 0.1541
+ 0.9251 0.5132 0.6814 0.4452
+
+ Trying OpenCL Backend
+ ArrayFire v3.2.0 (OpenCL, 64-bit Linux, build fc7630f)
+ [0] NVIDIA : Quadro K5000
+ -1- INTEL : Intel(R) Core(TM) i7-4770K CPU @ 3.50GHz
+ af::randu(5, 4)
+ [5 4 1 1]
+ 0.4107 0.0081 0.6600 0.1046
+ 0.8224 0.3775 0.0764 0.8827
+ 0.9518 0.3027 0.0901 0.1647
+ 0.1794 0.6456 0.5933 0.8060
+ 0.4198 0.5591 0.1098 0.5938
+
+# Dos and Don'ts
+
+It is very easy to run into exceptions if you are not careful with the
+switching of backends.
+
+### Don't: Do not use arrays between different backends
+
+ArrayFire checks the input arrays to functions for mismatches with the active
+backend. If an array created on one backend, but used when another backend is
+set to active, an exception with code 503 (`AF_ERR_ARR_BKND_MISMATCH`) is
+thrown.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.c}
+#include
+
+int main()
+{
+ try {
+ af::setBackend(AF_BACKEND_CUDA);
+ af::array A = af::randu(5, 5);
+
+ af::setBackend(AF_BACKEND_OPENCL);
+ af::array B = af::constant(10, 5, 5);
+ af::array C = af::matmul(A, B); // This will throw an exception
+
+ } catch (af::exception& e) {
+ fprintf(stderr, "%s\n", e.what());
+ }
+
+ return 0;
+}
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+### Do: Use a naming scheme to track arrays and backends
+
+We recommend that you use a technique to track the arrays on the backends. One
+suggested technique would be to use a suffix of `_cpu`, `_cuda`, `_opencl`
+with the array names. So an array created on the CUDA backend would be named
+`myarray_cuda`.
+
+If you have not used the af::setBackend function anywhere in your code, then
+you do not have to worry about this as all the arrays will be created on the
+same default backend.
+
+### Don't: Do not use custom kernels (CUDA/OpenCL) with the Unified backend
+
+This is another area that is a no go when using the Unified backend. It not
+recommended that you use custom kernels with unified backend. This is mainly
+becuase the Unified backend is meant to be ultra portable and should use only
+ArrayFire and native CPU code.
diff --git a/docs/pages/using_on_linux.md b/docs/pages/using_on_linux.md
index 1f8f95e8ca..493080f447 100644
--- a/docs/pages/using_on_linux.md
+++ b/docs/pages/using_on_linux.md
@@ -1,23 +1,33 @@
Using ArrayFire on Linux {#using_on_linux}
=====
-
+Once you have [installed](\ref installing) ArrayFire on your system, the next thing to do is
+set up your build system. On Linux, you can create ArrayFire projects using
+almost any editor, compiler, or build system. The only requirements are
+that you include the ArrayFire header directories and link with the ArrayFire
+library you intend to use.
+
+## The big picture
+
+On Linux, we suggest you install ArrayFire to the `/usr/local` directory
+so that all of the include files and libraries are part of your standard path.
+The installer will populate files in the following sub-directories:
+
+ include/arrayfire.h - Primary ArrayFire include file
+ include/af/*.h - Additional include files
+ lib/libaf* - CPU, CUDA, and OpenCL libraries (.a, .so)
+ lib/libforge* - Visualization library
+ share/ArrayFire/cmake/* - CMake config (find) scripts
+ share/ArrayFire/examples/* - All ArrayFire examples
+
+Because ArrayFire follows standard installation practices, you can use basically
+any build system to create and compile projects that use ArrayFire.
Among the many possible build systems on Linux we suggest using ArrayFire with
-either CMake or Makefiles with CMake being the preferred build system.
-
-## Pre-requisites
-
-Before you get started, make sure you have the necessary pre-requisites.
+either CMake or Makefiles with CMake being our preferred build system.
-- If you are using CUDA, please make sure you have [CUDA 7](https://developer.nvidia.com/cuda-downloads) installed on your system.
- - [Contact us](support@arrayfire.com) for custom builds (eg. different toolkits)
+## Prerequisite software
-- If you are using OpenCL, please make sure you have one of the following SDKs.
- - [AMD OpenCL SDK](http://developer.amd.com/tools-and-sdks/opencl-zone/amd-accelerated-parallel-processing-app-sdk/)
- - [Intel OpenCL SDK](https://software.intel.com/en-us/articles/download-the-latest-intel-amt-software-development-kit-sdk)
- - [NVIDIA CUDA](https://developer.nvidia.com/cuda-downloads)
-
-You will also need the following dependencies to use ArrayFire.
+To build ArrayFire projects you will need a compiler
#### Fedora, Centos and Redhat
@@ -28,107 +38,131 @@ yum install epel-release
yum update
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Install the common dependencies
+Install build dependencies
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
yum install gcc gcc-c++ cmake make
-yum install freeimage
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Install glfw (not required for no-gl installers)
-
-Fedora:
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-yum install glfw
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-For Centos and Redhat, please follow [these instructions](https://github.com/arrayfire/arrayfire/wiki/GLFW-for-ArrayFire)
-
#### Debian and Ubuntu
Install common dependencies
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
apt-get install build-essential cmake cmake-curses-gui
-apt-get install libfreeimage3
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Install glfw (not required for no-gl installers)
-
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-apt-get install libglfw3
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-For Debian 7 and Ubuntu 14.04, please follow [these instructions](https://github.com/arrayfire/arrayfire/wiki/GLFW-for-ArrayFire)
+## CMake
-**Special instructions for Tegra-K1**
+We recommend that the CMake build system be used to create ArrayFire projects.
+If you are writing a new ArrayFire project in C/C++ from scratch, we suggest
+you grab a copy of our
+[CMake Project Example](https://github.com/arrayfire/arrayfire-project-templates);
+however, it is useful to read the documentation below in case you need to add
+ArrayFire to an existing project.
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-sudo apt-get install libatlas3gf-base libatlas-dev libfftw3-dev liblapacke-dev
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+As [discussed above](#big-picture), ArrayFire ships with a series of CMake
+scripts to make finding and using our library easy.
+The scripts will automatically find all versions of the ArrayFire library
+and pick the most powerful of the installed backends (typically CUDA).
-## CMake
+First create a file called `CMakeLists.txt` in your project directory:
-This is the suggested method of using ArrayFire on Linux.
-ArrayFire ships with support for CMake by default, including a series of
-`Find` scripts installed in the `/usr/local/share/ArrayFire/cmake` (or similar)
-directory.
-These scripts will automatically find the CUDA, OpenCL, and CPU versions
-of ArrayFire and automatically choose the most powerful installed backend
-(typically CUDA).
+ cd your-project-directory
+ touch CMakeLists.txt
-To use ArrayFire, simply insert the `FIND_PACKAGE` command inside of your
-`CMakeLists.txt` file as follows:
+and populate it with the following code:
FIND_PACKAGE(ArrayFire)
INCLUDE_DIRECTORIES(${ArrayFire_INCLUDE_DIRS})
- ...
- ADD_EXECUTABLE(some_executable ...)
- TARGET_LINK_LIBRARIES(some_executable ${ArrayFire_LIBRARIES} )
+ ... [gather source files, etc.]
-The find script will automatically define several variables including:
+ # If you intend to use OpenCL, you need to find it
+ FIND_PACKAGE(OpenCL)
+ SET(EXTRA_LIBS ${CMAKE_THREAD_LIBS_INIT} ${OpenCL_LIBRARIES})
- ArrayFire_INCLUDE_DIRS - Location of ArrayFire's include directory.
- ArrayFire_LIBRARIES - Location of ArrayFire's libraries. This will default
- to a GPU backend if one
- ArrayFire_FOUND - True if ArrayFire has been located
+ # Or if you intend to use CUDA, you need it as well as NVVM:
+ FIND_PACKAGE(CUDA)
+ FIND_PACKAGE(NVVM) # this FIND script can be found in the ArrayFire CMake example repository
+ SET(EXTRA_LIBS ${CMAKE_THREAD_LIBS_INIT} ${CUDA_LIBRARIES} ${NVVM_LIB})
-If you wish to use a specific backend, the find script also defines these variables:
+ ADD_EXECUTABLE(my_executable [list your source files here])
+ TARGET_LINK_LIBRARIES(my_executable ${ArrayFire_LIBRARIES} ${EXTRA_LIBS})
- ArrayFire_CPU_FOUND - True of the ArrayFire CPU library has been found.
- ArrayFire_CPU_LIBRARIES - Location of ArrayFire's CPU library, if found
- ArrayFire_CUDA_FOUND - True of the ArrayFire CUDA library has been found.
- ArrayFire_CUDA_LIBRARIES - Location of ArrayFire's CUDA library, if found
- ArrayFire_OpenCL_FOUND - True of the ArrayFire OpenCL library has been found.
- ArrayFire_OpenCL_LIBRARIES - Location of ArrayFire's OpenCL library, if found
+where `my_executable` is the name of the executable you wish to create.
+See the [CMake documentation](https://cmake.org/documentation/) for more
+information on how to use CMake.
+Clearly the above code snippet precludes the use of both CUDA and OpenCL, see
+the
+[ArrayFire CMake Example](https://github.com/arrayfire/arrayfire-project-templates/tree/master/CMake);
+for an example of how to build executables for both backends from the same
+CMake script.
-Therefore, if you wish to target a specific specific backend, switch
-`${ArrayFire_LIBRARIES}` to `${ArrayFire_CPU}` `${ArrayFire_OPENCL}` or
-`${ArrayFire_CUDA}` in the `TARGET_LINK_LIBRARIES` command above.
+In the above code listing, the `FIND_PACKAGE` will find the ArrayFire include
+files, libraries, and define several variables including:
-Finally, if you have installed ArrayFire to a non-standard location, CMake can still help
-you out. When you execute CMake specify the path to the `ArrayFireConfig*` files that
-are found in the `share/ArrayFire/cmake` subdirectory of the installation folder.
-For example, if ArrayFire were installed locally to `/opt/ArrayFire` then you would
-modify the `cmake` command above to contain the following definition:
+ ArrayFire_INCLUDE_DIRS - Location of ArrayFire's include directory.
+ ArrayFire_LIBRARIES - Location of ArrayFire's libraries.
+ This will default to a GPU backend if one
+ is found
+ ArrayFire_FOUND - True if ArrayFire has been located
+
+If you wish to use a specific backend, the find script also defines these variables:
-```
-cmake -DArrayFire_DIR=/opt/ArrayFire/share/ArrayFire/cmake ...
-```
+ ArrayFire_CPU_FOUND - True of the ArrayFire CPU library has been found.
+ ArrayFire_CPU_LIBRARIES - Location of ArrayFire's CPU library, if found
+ ArrayFire_CUDA_FOUND - True of the ArrayFire CUDA library has been found.
+ ArrayFire_CUDA_LIBRARIES - Location of ArrayFire's CUDA library, if found
+ ArrayFire_OpenCL_FOUND - True of the ArrayFire OpenCL library has been found.
+ ArrayFire_OpenCL_LIBRARIES - Location of ArrayFire's OpenCL library, if found
+ ArrayFire_Unified_FOUND - True of the ArrayFire Unified library has been found.
+ ArrayFire_Unified_LIBRARIES - Location of ArrayFire's Unified library, if found
+
+Therefore, if you wish to target a specific specific backend, simply replace
+`${ArrayFire_LIBRARIES}` with `${ArrayFire_CPU}`, `${ArrayFire_OPENCL}`,
+`${ArrayFire_CUDA}`, or `${ArrayFire_Unified}` in the `TARGET_LINK_LIBRARIES`
+command above.
+If you intend on building your software to link with all of these backends,
+please see the
+[CMake Project Example](https://github.com/arrayfire/arrayfire-project-templates)
+which makes use of some fairly fun CMake tricks to avoid re-compiling code
+whenever possible.
+
+Next we need to instruct CMake to create build instructions and then compile.
+We suggest using CMake's out-of-source build functionality to keep your build
+and source files cleanly separated. To do this:
+
+ cd your-project-directory
+ mkdir build
+ cd build
+ cmake ..
+ make
+
+*NOTE:* If you have installed ArrayFire to a non-standard location, CMake can
+still help you out. When you execute CMake specify the path to the
+`ArrayFireConfig*` files that are found in the `share/ArrayFire/cmake`
+subdirectory of the installation folder.
+For example, if ArrayFire were installed locally to `/opt/ArrayFire` then you
+would modify the `cmake` command above to contain the following definition:
+
+ cmake -DArrayFire_DIR=/opt/ArrayFire/share/ArrayFire/cmake ..
+
+You can also specify this information in the ccmake command-line interface.
## MakeFiles
-Using ArrayFire with Makefiles is almost as easy as CMake, but you will
-need to specify paths manually. In your makefile specify the include path to
-the directory containing `arrayfire.h`. Typically this will be `-I /usr/include`
-or `-I /usr/local/include` if you installed ArrayFire using our installation
+Building ArrayFire projects with Makefiles is fairly similar to CMake except
+you must specify all paths and libraries manually.
+As with any make project, you need to specify the include path to the
+directory containing `arrayfire.h` file.
+This should be `-I /usr/local/include` if you followed our installation
instructions.
-Then, in your linker line specify the path to ArrayFire using the `-L` option
-(typically `-L/usr/lib` or `-L/usr/local/lib` and the specific ArrayFire backend
-you wish to use with the `-l` option (i.e. `-lafcpu`, `-lafopencl` or `-lafcuda`
-for the CPU, OpenCL and CUDA backends repsectively).
+Similarly, you will need to specify the path to the ArrayFire library using
+the `-L` option (e.g. `-L/usr/local/lib`) followed by the specific ArrayFire
+library you wish to use using the `-l` option (for example `-lafcpu`,
+`-lafopencl`, `-lafcuda`, or `-laf` for the CPU, OpenCL, CUDA, and unified
+backends respectively.
Here is a minimial example MakeFile which uses ArrayFire's CPU backend:
diff --git a/docs/pages/using_on_osx.md b/docs/pages/using_on_osx.md
index 0baa8c94e1..ccb0fb523a 100644
--- a/docs/pages/using_on_osx.md
+++ b/docs/pages/using_on_osx.md
@@ -1,82 +1,211 @@
Using ArrayFire on OSX {#using_on_osx}
=====
+Once you have [installed](\ref installing) ArrayFire on your system, the next
+thing to do is set up your build system.
+On OSX, you may create ArrayFire project using almost any editor, compiler,
+or build system.
+The only requirement is that you can include the ArrayFire header directory,
+and link with the ArrayFire library you intend to use.
-Among the many possible build systems on OSX we suggest using ArrayFire with
-either CMake or Makefiles.
+## The big picture
-## Pre-requisites
+By default, the ArrayFire OSX installer will place several files in your
+computer's `/usr/local` directory.
+The installer will populate this directory with files in the following
+sub-directories:
-Before you get started, make sure you have the necessary pre-requisites.
+ include/arrayfire.h - Primary ArrayFire include file
+ include/af/*.h - Additional include files
+ lib/libaf* - CPU, CUDA, and OpenCL libraries (.a, .so)
+ lib/libforge* - Visualization library
+ share/ArrayFire/cmake/* - CMake config (find) scripts
+ share/ArrayFire/examples/* - All ArrayFire examples
-- If you want to use ArrayFire with CUDA, please make sure you have [CUDA 7](https://developer.nvidia.com/cuda-downloads) installed on your system.
- - [Contact us](support@arrayfire.com) for custom builds (eg. different toolkits)
+Because ArrayFire follows standard installation practices, you can use basically
+any build system to create and compile projects that use ArrayFire.
+Among the many possible build systems on Linux we suggest using ArrayFire with
+either CMake or Makefiles with CMake being our preferred build system.
-- Install the latest Xcode from the App Store
+## XCode
-- Install [brew](http://brew.sh/)
+Although we recommend using CMake to build ArrayFire projects on OSX, you can
+use XCode if this is your preferred development platform.
+To save some time, we have created an sample XCode project in our
+[ArrayFire Project Templates repository](https://github.com/arrayfire/arrayfire-project-templates).
+
+To set up a basic C/C++ project in XCode do the following:
+
+1. Start up XCode. Choose OSX -> Application, Command Line Tool for the project:
+
+
+2. Fill in the details for your project and choose either C or C++ for the project:
+
+
+3. Next we need to configure the build settings. In the left-hand pane, click
+ on the project. In the center pane, click on "Build Settings" followed by
+ the "All" button:
+
+
+4. Now search for "Header Search Paths" and add `/usr/local/include` to the list:
+
+
+5. Then search for "Library Search Paths" and add `/usr/local/lib` to the list:
+
+
+6. Next, we need to make sure the executable is linked with an ArrayFire library:
+ To do this, click the "Build Phases" tab and expand the "Link with Binary Library"
+ menu:
+
+
+7. In the search dialog that pops up, choose the "Add Other" button from the
+ lower right. Specify the `/usr/local/lib` folder:
+
+
+8. Lastly, select the ArrayFire library with which you wish to link your program.
+ Your options will be:
+
+~~~~~
+libafcuda.*.dylib - CUDA backend
+libafopencl.*.dylib - OpenCL backend
+libafcpu.*.dylib - CPU backend
+libaf.*.dylib - Unified backend
+~~~~~
+
+In the picture below, we have elected to link with the OpenCL backend:
+
+
+
+9. Lastly, lets test ArrayFire's functionality. In the left hand pane open
+ the main.cpp` file and insert the following code:
+
+~~~~~
+// Include the ArrayFire header file
+#include
+
+int main(int argc, const char * argv[]) {
+ // Gather some information about the ArrayFire device
+ af::info();
+ return 0;
+}
+~~~~~
+
+Finally, click the build button and you should see some information about your
+graphics card in the lower-section of your screen:
+
+
## CMake
-This is the suggested method of using ArrayFire on OSX.
-ArrayFire ships with support for CMake by default, including a series of
-`Find` scripts installed in the `/usr/local/share/ArrayFire/cmake` (or similar)
-directory.
-These scripts will automatically find the CUDA, OpenCL, and CPU versions
-of ArrayFire and automatically choose the most powerful installed backend
-(typically CUDA).
+We recommend that the CMake build system be used to create ArrayFire projects.
+If you are writing a new ArrayFire project in C/C++ from scratch, we suggest
+you grab a copy of our
+[CMake Project Example](https://github.com/arrayfire/arrayfire-project-templates);
+however, it is useful to read the documentation below in case you need to add
+ArrayFire to an existing project.
+
+As [discussed above](#big-picture), ArrayFire ships with a series of CMake
+scripts to make finding and using our library easy.
+The scripts will automatically find all versions of the ArrayFire library
+and pick the most powerful of the installed backends (typically CUDA).
-To use ArrayFire, simply insert the `FIND_PACKAGE` command inside of your
-`CMakeLists.txt` file as follows:
+First create a file called `CMakeLists.txt` in your project directory:
+
+ cd your-project-directory
+ touch CMakeLists.txt
+
+and populate it with the following code:
FIND_PACKAGE(ArrayFire)
INCLUDE_DIRECTORIES(${ArrayFire_INCLUDE_DIRS})
- ...
- ADD_EXECUTABLE(some_executable ...)
- TARGET_LINK_LIBRARIES(some_executable ${ArrayFire_LIBRARIES} )
+ ... [gather source files, etc.]
-The find script will automatically define several variables including:
+ # If you intend to use OpenCL, you need to find it
+ FIND_PACKAGE(OpenCL)
+ SET(EXTRA_LIBS ${CMAKE_THREAD_LIBS_INIT} ${OpenCL_LIBRARIES})
- ArrayFire_INCLUDE_DIRS - Location of ArrayFire's include directory.
- ArrayFire_LIBRARIES - Location of ArrayFire's libraries. This will default
- to a GPU backend if one
- ArrayFire_FOUND - True if ArrayFire has been located
+ # Or if you intend to use CUDA, you need it as well as NVVM:
+ FIND_PACKAGE(CUDA)
+ FIND_PACKAGE(NVVM) # this FIND script can be found in the ArrayFire CMake example repository
+ SET(EXTRA_LIBS ${CMAKE_THREAD_LIBS_INIT} ${CUDA_LIBRARIES} ${NVVM_LIB})
-If you wish to use a specific backend, the find script also defines these variables:
+ ADD_EXECUTABLE(my_executable [list your source files here])
+ TARGET_LINK_LIBRARIES(my_executable ${ArrayFire_LIBRARIES} ${EXTRA_LIBS})
+
+where `my_executable` is the name of the executable you wish to create.
+See the [CMake documentation](https://cmake.org/documentation/) for more
+information on how to use CMake.
+Clearly the above code snippet precludes the use of both CUDA and OpenCL, see
+the
+[ArrayFire CMake Example](https://github.com/bkloppenborg/arrayfire-cmake-example)
+for an example of how to build executables for both backends from the same
+CMake script.
- ArrayFire_CPU_FOUND - True of the ArrayFire CPU library has been found.
- ArrayFire_CPU_LIBRARIES - Location of ArrayFire's CPU library, if found
- ArrayFire_CUDA_FOUND - True of the ArrayFire CUDA library has been found.
- ArrayFire_CUDA_LIBRARIES - Location of ArrayFire's CUDA library, if found
- ArrayFire_OpenCL_FOUND - True of the ArrayFire OpenCL library has been found.
- ArrayFire_OpenCL_LIBRARIES - Location of ArrayFire's OpenCL library, if found
+In the above code listing, the `FIND_PACKAGE` will find the ArrayFire include
+files, libraries, and define several variables including:
-Therefore, if you wish to target a specific specific backend, switch
-`${ArrayFire_LIBRARIES}` to `${ArrayFire_CPU}` `${ArrayFire_OPENCL}` or
-`${ArrayFire_CUDA}` in the `TARGET_LINK_LIBRARIES` command above.
+ ArrayFire_INCLUDE_DIRS - Location of ArrayFire's include directory.
+ ArrayFire_LIBRARIES - Location of ArrayFire's libraries.
+ This will default to a GPU backend if one
+ is found
+ ArrayFire_FOUND - True if ArrayFire has been located
-Finally, if you have installed ArrayFire to a non-standard location, CMake can still help
-you out. When you execute CMake specify the path to the `ArrayFireConfig*` files that
-are found in the `share/ArrayFire/cmake` subdirectory of the installation folder.
-For example, if ArrayFire were installed locally to `/opt/ArrayFire` then you would
-modify the `cmake` command above to contain the following definition:
+If you wish to use a specific backend, the find script also defines these variables:
-```
-cmake -DArrayFire_DIR=/opt/ArrayFire/share/ArrayFire/cmake ...
-```
+ ArrayFire_CPU_FOUND - True of the ArrayFire CPU library has been found.
+ ArrayFire_CPU_LIBRARIES - Location of ArrayFire's CPU library, if found
+ ArrayFire_CUDA_FOUND - True of the ArrayFire CUDA library has been found.
+ ArrayFire_CUDA_LIBRARIES - Location of ArrayFire's CUDA library, if found
+ ArrayFire_OpenCL_FOUND - True of the ArrayFire OpenCL library has been found.
+ ArrayFire_OpenCL_LIBRARIES - Location of ArrayFire's OpenCL library, if found
+ ArrayFire_Unified_FOUND - True of the ArrayFire Unified library has been found.
+ ArrayFire_Unified_LIBRARIES - Location of ArrayFire's Unified library, if found
+
+Therefore, if you wish to target a specific specific backend, simply replace
+`${ArrayFire_LIBRARIES}` with `${ArrayFire_CPU}`, `${ArrayFire_OPENCL}`,
+`${ArrayFire_CUDA}`, or `${ArrayFire_Unified}` in the `TARGET_LINK_LIBRARIES`
+command above.
+If you intend on building your software to link with all of these backends,
+please see the
+[CMake Project Example](https://github.com/arrayfire/arrayfire-project-templates)
+which makes use of some fairly fun CMake tricks to avoid re-compiling code
+whenever possible.
+
+Next we need to instruct CMake to create build instructions and then compile.
+We suggest using CMake's out-of-source build functionality to keep your build
+and source files cleanly separated. To do this:
+
+ cd your-project-directory
+ mkdir build
+ cd build
+ cmake ..
+ make
+
+*NOTE:* If you have installed ArrayFire to a non-standard location, CMake can
+still help you out. When you execute CMake specify the path to the
+`ArrayFireConfig*` files that are found in the `share/ArrayFire/cmake`
+subdirectory of the installation folder.
+For example, if ArrayFire were installed locally to `/opt/ArrayFire` then you
+would modify the `cmake` command above to contain the following definition:
+
+ cmake -DArrayFire_DIR=/opt/ArrayFire/share/ArrayFire/cmake ..
+
+You can also specify this information in the ccmake command-line interface.
## MakeFiles
-Using ArrayFire with Makefiles is almost as easy as CMake, but you will
-need to specify paths manually. In your makefile specify the include path to
-the directory containing `arrayfire.h`. Typically this will be `-I /usr/include`
-or `-I /usr/local/include` if you installed ArrayFire using our installation
+Building ArrayFire projects with Makefiles is fairly similar to CMake except
+you must specify all paths and libraries manually.
+As with any make project, you need to specify the include path to the
+directory containing `arrayfire.h` file.
+This should be `-I /usr/local/include` if you followed our installation
instructions.
-Then, in your linker line specify the path to ArrayFire using the `-L` option
-(typically `-L/usr/lib` or `-L/usr/local/lib` and the specific ArrayFire backend
-you wish to use with the `-l` option (i.e. `-lafcpu`, `-lafopencl` or `-lafcuda`
-for the CPU, OpenCL and CUDA backends repsectively).
+Similarly, you will need to specify the path to the ArrayFire library using
+the `-L` option (e.g. `-L/usr/local/lib`) followed by the specific ArrayFire
+library you wish to use using the `-l` option (for example `-lafcpu`,
+`-lafopencl`, `-lafcuda`, or `-laf` for the CPU, OpenCL, CUDA, and unified
+backends respectively.
Here is a minimial example MakeFile which uses ArrayFire's CPU backend:
diff --git a/docs/pages/using_on_windows.md b/docs/pages/using_on_windows.md
index aa4aeff2d0..92c7c2db92 100644
--- a/docs/pages/using_on_windows.md
+++ b/docs/pages/using_on_windows.md
@@ -1,108 +1,224 @@
Using ArrayFire with Microsoft Windows and Visual Studio {#using_on_windows}
=====
-## Pre-requisites
-
-Before you get started, make sure you have the necessary pre-requisites.
-
-- If you are using CUDA, please make sure you have [CUDA 7](https://developer.nvidia.com/cuda-downloads) installed on your system.
- - [Contact us](support@arrayfire.com) for custom builds (eg. different toolkits)
-
-- If you are using OpenCL, please make sure you have one of the following SDKs.
- - [AMD OpenCL SDK](http://developer.amd.com/tools-and-sdks/opencl-zone/amd-accelerated-parallel-processing-app-sdk/)
- - [Intel OpenCL SDK](https://software.intel.com/en-us/articles/download-the-latest-intel-amt-software-development-kit-sdk)
- - [NVIDIA CUDA](https://developer.nvidia.com/cuda-downloads)
-
-## Step 0: Running pre-built executables
+If you have not already done so, please make sure you have installed,
+configured, and tested ArrayFire following the
+[installation instructions](\ref installing).
+
+## The big picture
+The ArrayFire Windows installer creates the following:
+1. `AF_PATH` environment variable to point to the installation location. The
+ default install location is `C:\Program Files\ArrayFire\v3`
+2. `AF_PATH/include` : Header files for ArrayFire (include directory)
+3. `AF_PATH/lib` : All ArrayFire backends libraries, dlls and dependency dlls (library directory)
+4. `AF_PATH/examples` : Examples to get started. Some examples also have pre-built exectuables
+5. `AF_PATH/cmake` : CMake config files for automatic configuration by external projects
+6. `AF_PATH/uninstall.exe` : Uninstaller
+7. `AF_PATH/*` : Other miscellenous files including licenses, logos, copyrights
+
+The installer also appends `%%AF_PATH%/lib` to the User PATH variable.
+
+To add `%%AF_PATH%/lib` to PATH for all users see the windows section in
+[installation instructions](\ref installing).
+
+### Dealing with CUDA NMMV DLLs
+When using CUDA with ArrayFire you may encounter a linker error indicating the
+NVVM DLLs are missing. This is because the NVVM DLLs are not part of the
+standard `CUDA_PATH\bin` installation directory that is added to your `PATH`
+when the CUDA installer runs. Thus, NVVM will not be found during runtime. There
+are a few ways to deal with this issue:
+
+1. Copy the DLLs to the exectuable location. This is, by far, the cleanest
+ solution and we recommend doing this with ArrayFire projects. To do so,
+ create a post-build event to copy the NVVM DLL as discusses below in
+ [Step 3 - Part A](#s3partA).
+2. Copy `CUDA_PATH\nvvm\bin\nvvm64_30_0.dll` to `CUDA_PATH\bin`. This is a one time
+ copy such that the NVVM DLL is now with all the other CUDA dlls and in a
+ directory that is a part of PATH and hence the DLL can be detected automatically.
+3. Add `%%CUDA_PATH%\nvvm\bin` to the system PATH environment variable.
+ This will allow automatic detection by the system and No further copying will
+ be required. ArrayFire does not add this to PATH since the CUDA installer
+ doesn't add it to PATH.
+
+## Step 1: Running pre-built executables
The ArrayFire installer ships with a few pre-built executables with the examples.
-These should run out of the box.
-
-Note: For the CUDA executables, you will need to copy CUDA_PATH\nvvm\bin\nvvm64_30_0.dll
-to the location of the executables.
-
-## Step 1: Adding ArrayFire to PATH for all users
-
-The ArrayFire installer for Windows creates a user `PATH` variable containing
-`%%AF_PATH%/lib`. This is required so that Windows knows where to find the
-ArrayFire DLLs. This variable fixes the DLL finding only for the user that
-installs ArrayFire.
-
-To allow DLL detection for all users, it needs to be added to the system
-`PATH` variable. For this, follow the steps:
-
-1. Open Advanced System Settings:
- * Windows 8: Move the Mouse pointer to the bottom right corner of the screen,
- Right click, choose System. Then click "Advanced System Settings"
- * Windows 7: Open the Start Menu and Right Click on "Computer". Then choose
- Properties and click "Advanced System Settings"
-
-2. In _Advanced System Settings_ window, click on _Advanced_ tab
+These should run out of the box when double clicked.
-3. Click on _Environment Variables_, then under **System Variables**, find
- `PATH`, and click on it.
+Some prebuilt examples are:
+* Helloworld (examples/helloworld)
+* BLAS (examples/benchmarks)
+* FFT (examples/benchmarks)
+* Pi Estimation (examples/benchmarks)
+* Conway (Graphics) (examples/graphics)
-4. In edit mode, append `%%AF_PATH%/lib`. NOTE: Ensure that there is a semi-colon
- separating `%%AF_PATH%/lib` from any existing content (e.g.
- `EXISTING_PATHS;%%AF_PATH%/lib;`) otherwise other software may not function
- correctly.
+Note: For the CUDA executables, you will need to copy `CUDA_PATH\nvvm\bin\nvvm64_30_0.dll`
+to the location of the executables.
-## Step 2: Verify the path addition functions correctly
+## Step 2: Build and Run a Project
-1. Open Visual Studio 2013. Open the HelloWorld solution which is located at
+1. Open Visual Studio 2013. Load the HelloWorld solution which is located at
`AF_PATH/examples/helloworld/helloworld.sln`.
-2. Build and run the `helloworld` example. Be sure to, select the
- platform/configuration of your choice using the platform drop-down
- (the options are CPU, CUDA, and OpenCL) and Solution Configuration drop down
- (options of Release and Debug) menus.
-3. Run the `helloworld` example
-
-## Step 3: Creating your own Visual Studio Project
-
-### A new project from scratch
-
-If you are creating a new project which is intended to be platform-independent,
-the best option is to simply copy the existing `helloworld` solution files
-and modify them to suit your needs. This will retain all the platform based
-settings that have been configured in the examples.
-
-### Adding ArrayFire CPU/OpenCL to a new/existing project
-
-If you are adding ArrayFire to a new or existing project that will contain
-custom CPU or OpenCL kernels, you only need to make a few modifications to
-your project soultion:
+2. Build the `helloworld` example. Be sure to, select the platform/configuration
+ of your choice using the platform drop-down (the options are CPU, CUDA,
+ OpenCL, and Unified) and Solution Configuration drop down (options of Release
+ and Debug) menus.
+3. Run the `helloworld` example.
+
+## Step 3: Using ArrayFire within Visual Studio
+This is divided into 4 parts:
+* [Part A: Adding ArrayFire to an existing solution (Single Backend)](#s3partA)
+* [Part B: Adding ArrayFire CUDA to a new/existing CUDA project](#s3partB)
+* [Part C: Project with all ArrayFire backends](#s3partC)
+* [Part D: ArrayFire with CMake](#s3partD)
+
+### Part A: Adding ArrayFire to an existing solution (Single Backend)
+Note: If you plan on using Native CUDA code in the project, use the steps
+under [Part B](#s3partB).
+
+Adding a single backend to an existing project is quite simple.
+
+1. Add `"$(AF_PATH)/include;"` to
+ _Project Properties -> C/C++ -> General -> Additional Include Directories_.
+2. Add `"$(AF_PATH)/lib;"` to
+ _Project Properties -> Linker -> General -> Additional Library Directories_.
+3. Add `afcpu.lib` or `afcuda.lib` or `afopencl.lib` to
+ _Project Properties -> Linker -> Input -> Additional Dependencies_.
+ based on your preferred backend.
+4. (Optional) You may choose to define `NOMINMAX`, `AF_`
+ and/or `AF_` in your projects. This can be added to
+ _Project Properties -> C/C++ -> General -> Preprocessor-> Preprocessory definitions_.
+
+If you are using the CUDA backend, it is important to ensure that the CUDA NVVM
+DLLs are copied to the exectuable directory. This can be done by adding a post
+build event.
+
+Open the _Project Properties -> Build Events -> Post Build Events_ dialog and
+add the following lines to it.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~{.c}
+echo copy "$(CUDA_PATH)\nvvm\bin\nvvm64*.dll" "$(OutDir)"
+copy "$(CUDA_PATH)\nvvm\bin\nvvm64*.dll" "$(OutDir)"
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+### Part B: Adding ArrayFire CUDA to a new/existing CUDA project
+Lastly, if your project contains custom CUDA code, the instructions are slightly
+different as it requires using a CUDA NVCC Project:
-1. Open an existing project or create a new "Empty C/C++ project in Visual Studio"
-2. Add `$(AF_PATH)/include;` to
- _Project Properties -> C/C++ -> General -> Additional Include Directories_
-3. Add `$(AF_PATH)/lib;` to
- _Project Properties -> Linker -> General -> Additional Library Directories_
+1. Create a custom "CUDA NVCC project" in Visual Studio
+2. Add `"$(AF_PATH)/include;"` to
+ _Project Properties -> CUDA C/C++ -> General -> Additional Include Directories_.
+3. Add `"$(AF_PATH)/lib;"` to
+ _Project Properties -> Linker -> General -> Additional Library Directories_.
4. Add `afcpu.lib` or `afcuda.lib` or `afopencl.lib` to
- _Project Properties -> Linker -> Input -> Additional Dependencies_
- based on your preferred backend.
-5. (Optional) You make choose to define `NOMINMAX`, `AF_`
- and/or `AF_` in your projects. This can be added to
- _Project Properties -> C/C++ -> General -> Preprocessor-> Preprocessory definitions_.
+ _Project Properties -> Linker -> Input -> Additional Dependencies_.
+ based on your preferred backend.
+5. (Optional) You may choose to define `NOMINMAX`, `AF_CUDA`
+ and/or `AF_` in your projects. This can be added to
+ _Project Properties -> C/C++ -> General -> Preprocessor-> Preprocessory definitions_.
+6. Pick a solution to handle the NVVM DLLs. We recommend the post build event
+ method used in [Part A](#s3partA).
+
+### Part C: Project with all ArrayFire backends
+If you wish to create a project that allows you to use all the ArrayFire
+backends with ease, the best way to go is to copy the *HelloWorld sln/vcxproj/cpp*
+file trio and rename them to suit your project.
+
+All the ArrayFire examples are pre-configured for all ArrayFire backends as well
+as the Unified API. These can be chosen from the Solution/Platform configuration
+drop down boxes.
+
+You can alternately download the template project from
+[ArrayFire Template Projects](https://github.com/arrayfire/arrayfire-project-templates)
+
+### Part D: ArrayFire with CMake
+*NOTE:* The ArrayFire installer sets up CMake file and registry so that it can be found
+by CMake by simply using the `Find_PACKAGE(ArrayFire)` command.
+
+If you are writing a new ArrayFire project in C/C++ from scratch, we suggest
+you grab a copy of our
+[CMake Project Example](https://github.com/arrayfire/arrayfire-project-templates);
+however, it is useful to read the documentation below in case you need to add
+ArrayFire to an existing project.
+
+As [discussed above](#big-picture), ArrayFire ships with a series of CMake
+scripts to make finding and using our library easy.
+The scripts will automatically find all versions of the ArrayFire library
+and pick the most powerful of the installed backends (typically CUDA).
+
+First create a file called `CMakeLists.txt` in your project directory:
+
+ cd your-project-directory
+ touch CMakeLists.txt
+
+and populate it with the following code:
+
+ FIND_PACKAGE(ArrayFire)
+ INCLUDE_DIRECTORIES(${ArrayFire_INCLUDE_DIRS})
+
+ ... [gather source files, etc.]
+
+ # If you intend to use OpenCL, you need to find it
+ FIND_PACKAGE(OpenCL)
+ SET(EXTRA_LIBS ${CMAKE_THREAD_LIBS_INIT} ${OpenCL_LIBRARIES})
+
+ # Or if you intend to use CUDA, you need it as well as NVVM:
+ FIND_PACKAGE(CUDA)
+ FIND_PACKAGE(NVVM) # this FIND script can be found in the ArrayFire CMake example repository
+ SET(EXTRA_LIBS ${CMAKE_THREAD_LIBS_INIT} ${CUDA_LIBRARIES} ${NVVM_LIB})
+
+ ADD_EXECUTABLE(my_executable [list your source files here])
+ TARGET_LINK_LIBRARIES(my_executable ${ArrayFire_LIBRARIES} ${EXTRA_LIBS})
+
+where `my_executable` is the name of the executable you wish to create.
+See the [CMake documentation](https://cmake.org/documentation/) for more
+information on how to use CMake.
+Clearly the above code snippet precludes the use of both CUDA and OpenCL, see
+the
+[ArrayFire CMake Example](https://github.com/arrayfire/arrayfire-project-templates/tree/master/CMake)
+for an example of how to build executables for both backends from the same
+CMake script.
+
+In the above code listing, the `FIND_PACKAGE` will find the ArrayFire include
+files, libraries, and define several variables including:
+
+ ArrayFire_INCLUDE_DIRS - Location of ArrayFire's include directory.
+ ArrayFire_LIBRARIES - Location of ArrayFire's libraries.
+ This will default to a GPU backend if one
+ is found
+ ArrayFire_FOUND - True if ArrayFire has been located
+
+If you wish to use a specific backend, the find script also defines these variables:
+
+ ArrayFire_CPU_FOUND - True of the ArrayFire CPU library has been found.
+ ArrayFire_CPU_LIBRARIES - Location of ArrayFire's CPU library, if found
+ ArrayFire_CUDA_FOUND - True of the ArrayFire CUDA library has been found.
+ ArrayFire_CUDA_LIBRARIES - Location of ArrayFire's CUDA library, if found
+ ArrayFire_OpenCL_FOUND - True of the ArrayFire OpenCL library has been found.
+ ArrayFire_OpenCL_LIBRARIES - Location of ArrayFire's OpenCL library, if found
+ ArrayFire_Unified_FOUND - True of the ArrayFire Unified library has been found.
+ ArrayFire_Unified_LIBRARIES - Location of ArrayFire's Unified library, if found
+
+Therefore, if you wish to target a specific specific backend, simply replace
+`${ArrayFire_LIBRARIES}` with `${ArrayFire_CPU}`, `${ArrayFire_OPENCL}`,
+`${ArrayFire_CUDA}`, or `${ArrayFire_Unified}` in the `TARGET_LINK_LIBRARIES`
+command above.
+
+Next we need to instruct CMake to create build instructions and then compile.
+We suggest using CMake's out-of-source build functionality to keep your build
+and source files cleanly separated. To do this open the CMake GUI.
+
+* Under source directory, add the path to your project
+* Under build directory, add the path to your project and append /build
+* Click configure and choose Visual Studio 2013 Win 64 as the generator.
+* If configuration was successful, click generate. This will create a
+ my-project.sln file under build. You can open this in Visual Studio and
+ compile the ALL_BUILD project.
+
+
+The [ArrayFire CMake Example](https://github.com/arrayfire/arrayfire-project-templates/tree/master/CMake)
+is a CMake project used to demo how ArrayFire can be using with a CMake project.
-### Adding ArrayFire CUDA to a new/existing project
+Note: The CMake project does not add the post build event to copy the NVVM DLLs
+in case of CUDA backend. You will need to either copy it manually to the exectuable
+directory, or pick another solution for it.
-Lastly, if your project contains custom CUDA code, the instructions are slightly
-different:
-
-1. Create a custom "CUDA NVCC project" in Visual Studio
-2. Follow steps 2-5 from the _Adding ArrayFire CPU/OpenCL to a new/existing project_
- instructions above
-3. Add the following lines to the
- _Project Properties -> Build Events -> Post Build Events_
- dialog:
-
- ```
- echo copy "$(CUDA_PATH)\nvvm\bin\nvvm64*.dll" "$(OutDir)"
- copy "$(CUDA_PATH)\nvvm\bin\nvvm64*.dll" "$(OutDir)"
- ```
-
-4. Ensure that you use x64 based configurations.
-
-Please note that this method will not work with the ArrayFire examples as
-our implementations are built with the Visual Studio CL compiler rather than
-NVCC to ensure they are supported across various platforms.
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 0c66486080..a795916eb3 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -14,12 +14,12 @@ if(TARGET afcpu OR TARGET afcuda OR TARGET afopencl)
SET(ArrayFire_OpenCL_FOUND False)
SET(ASSETS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../assets")
IF(NOT EXISTS "${ASSETS_DIR}/LICENSE")
- MESSAGE(WARNING "Arrayfire assets are not available. Assets will not be installed.")
- MESSAGE("Did you miss the --recursive option when cloning?")
- MESSAGE("Run the following commands to correct this:")
- MESSAGE("git submodule init")
- MESSAGE("git submodule update")
- MESSAGE("git submodule foreach git pull origin master")
+ MESSAGE(STATUS "Assests submodule unavailable. Updating submodules.")
+ EXECUTE_PROCESS(
+ COMMAND git submodule update --init --recursive
+ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+ OUTPUT_QUIET
+ )
ENDIF()
else()
FIND_PACKAGE(ArrayFire REQUIRED)
@@ -81,6 +81,21 @@ else()
MESSAGE(STATUS "EXAMPLES: CPU backend is OFF. afcpu was not found.")
endif()
+# Next we build each example using every backend.
+if(${ArrayFire_Unified_FOUND}) # variable defined by FIND(ArrayFire ...)
+ MESSAGE(STATUS "EXAMPLES: UNIFIED backend is ON.")
+ BUILD_ALL("${FILES}" unified ${ArrayFire_Unified_LIBRARIES} "")
+elseif(TARGET af) # variable defined by the ArrayFire build tree
+ MESSAGE(STATUS "EXAMPLES: UNIFIED backend is ON.")
+ IF(WIN32)
+ BUILD_ALL("${FILES}" unified af "")
+ ELSE()
+ BUILD_ALL("${FILES}" unified af "dl")
+ ENDIF()
+else()
+ MESSAGE(STATUS "EXAMPLES: UNIFIED backend is OFF. af was not found.")
+endif()
+
if (${CUDA_FOUND})
if(${ArrayFire_CUDA_FOUND}) # variable defined by FIND(ArrayFire ...)
FIND_LIBRARY( CUDA_NVVM_LIBRARY
diff --git a/examples/common/progress.h b/examples/common/progress.h
index debb511e1a..6452aa2a5b 100644
--- a/examples/common/progress.h
+++ b/examples/common/progress.h
@@ -36,7 +36,7 @@ static bool progress(unsigned iter_curr, af::timer t, double time_total)
if (time_curr < time_total) return true;
- printf(" ### vortex %f iterations per second (max)\n", max_rate);
+ printf(" ### %f iterations per second (max)\n", max_rate);
return false;
}
diff --git a/examples/graphics/plot3.cpp b/examples/graphics/plot3.cpp
new file mode 100644
index 0000000000..ea2ca8d53d
--- /dev/null
+++ b/examples/graphics/plot3.cpp
@@ -0,0 +1,58 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include
+#include
+#include
+
+using namespace af;
+
+static const int ITERATIONS = 200;
+static const float PRECISION = 1.0f/ITERATIONS;
+
+int main(int argc, char *argv[])
+{
+ try {
+ // Initialize the kernel array just once
+ af::info();
+ af::Window myWindow(800, 800, "3D Line Plot example: ArrayFire");
+
+ static float t=0.1;
+ array Z = seq( 0.1f, 10.f, PRECISION);
+ array bounds = constant(1, Z.dims());
+
+ do{
+ array Y = sin((Z*t) + t) / Z;
+ array X = cos((Z*t) + t) / Z;
+ X = max(min(X, bounds),-bounds);
+ Y = max(min(Y, bounds),-bounds);
+
+ array Pts = join(1, X, Y, Z);
+ //Pts can be passed in as a matrix in the form n x 3, 3 x n
+ //or in the flattened xyz-triplet array with size 3n x 1
+ myWindow.plot3(Pts);
+
+ t+=0.01;
+ } while(!myWindow.close());
+
+ } catch (af::exception& e) {
+ fprintf(stderr, "%s\n", e.what());
+ throw;
+ }
+
+ #ifdef WIN32 // pause in Windows
+ if (!(argc == 2 && argv[1][0] == '-')) {
+ printf("hit [enter]...");
+ fflush(stdout);
+ getchar();
+ }
+ #endif
+ return 0;
+}
+
diff --git a/examples/graphics/surface.cpp b/examples/graphics/surface.cpp
new file mode 100644
index 0000000000..92d5185d16
--- /dev/null
+++ b/examples/graphics/surface.cpp
@@ -0,0 +1,55 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include
+#include
+#include
+
+using namespace af;
+
+static const int ITERATIONS = 30;
+static const float PRECISION = 1.0f/ITERATIONS;
+
+int main(int argc, char *argv[])
+{
+ try {
+ // Initialize the kernel array just once
+ af::info();
+ af::Window myWindow(800, 800, "3D Surface example: ArrayFire");
+
+ array X = seq(-1, 1, PRECISION);
+ array Y = seq(-1, 1, PRECISION);
+ array Z = randn(X.dims(0), Y.dims(0));
+
+ static float t=0;
+ while(!myWindow.close()) {
+ t+=0.07;
+ //Z = sin(tile(X,1, Y.dims(0))*t + t) + cos(transpose(tile(Y, 1, X.dims(0)))*t + t);
+ array x = tile(X,1, Y.dims(0));
+ array y = transpose(tile(Y, 1, X.dims(0)));
+ Z = 10*x*-abs(y) * cos(x*x*(y+t))+sin(y*(x+t))-1.5;
+
+ myWindow.surface(X, Y, Z, NULL);
+ }
+
+ } catch (af::exception& e) {
+ fprintf(stderr, "%s\n", e.what());
+ throw;
+ }
+
+ #ifdef WIN32 // pause in Windows
+ if (!(argc == 2 && argv[1][0] == '-')) {
+ printf("hit [enter]...");
+ fflush(stdout);
+ getchar();
+ }
+ #endif
+ return 0;
+}
+
diff --git a/examples/pde/swe.cpp b/examples/pde/swe.cpp
new file mode 100644
index 0000000000..84ce1ff4de
--- /dev/null
+++ b/examples/pde/swe.cpp
@@ -0,0 +1,86 @@
+#include
+#include
+#include
+#include
+#include
+#include "../common/progress.h"
+
+using namespace af;
+
+Window *win;
+
+array normalize(array a, float max)
+{
+ float mx = max * 0.5;
+ float mn = -max * 0.5;
+ return (a-mn)/(mx-mn);
+}
+
+static void swe(bool console)
+{
+ double time_total = 20; // run for N seconds
+ // Grid length, number and spacing
+ const unsigned Lx = 512, nx = Lx + 1;
+ const unsigned Ly = 512, ny = Ly + 1;
+ const float dx = Lx / (nx - 1);
+ const float dy = Ly / (ny - 1);
+
+ array ZERO = constant(0, nx, ny);
+ array um = ZERO, vm = ZERO;
+ unsigned io = (unsigned)floor(Lx / 5.0f),
+ jo = (unsigned)floor(Ly / 5.0f),
+ k = 20;
+ array x = tile(moddims(seq(nx),nx,1), 1,ny);
+ array y = tile(moddims(seq(ny),1,ny), nx,1);
+
+ // Initial condition
+ array etam = 0.01f * exp((-((x - io) * (x - io) + (y - jo) * (y - jo))) / (k * k));
+ float m_eta = max(etam);
+ array eta = etam;
+ float dt = 0.5;
+
+ // conv kernels
+ float h_diff_kernel[] = {9.81f * (dt / dx), 0, -9.81f * (dt / dx)};
+ float h_lap_kernel[] = {0, 1, 0, 1, -4, 1, 0, 1, 0};
+
+ array h_diff_kernel_arr(3, h_diff_kernel);
+ array h_lap_kernel_arr(3, 3, h_lap_kernel);
+
+ if(!console) {
+ win = new Window(512, 512,"Shallow Water Equations");
+ win->setColorMap(AF_COLORMAP_MOOD);
+ }
+
+ timer t = timer::start();
+ unsigned iter = 0;
+ while (progress(iter, t, time_total)) {
+ // compute
+ array up = um + convolve(eta, h_diff_kernel_arr);
+ array vp = um + convolve(eta, h_diff_kernel_arr.T());
+ array e = convolve(eta, h_lap_kernel_arr);
+ array etap = 2 * eta - etam + (2 * dt * dt) / (dx * dy) * e;
+
+ etam = eta;
+ eta = etap;
+ if (!console) {
+ win->image(normalize(eta, m_eta));
+ // viz
+ } else eval(eta, up, vp);
+ iter++;
+ }
+}
+int main(int argc, char* argv[])
+{
+ int device = argc > 1 ? atoi(argv[1]) : 0;
+ bool console = argc > 2 ? argv[2][0] == '-' : false;
+ try {
+ af::setDevice(device);
+ af::info();
+ printf("Simulation of shallow water equations\n");
+ swe(console);
+ } catch (af::exception& e) {
+ fprintf(stderr, "%s\n", e.what());
+ throw;
+ }
+ return 0;
+}
diff --git a/examples/unified/basic.cpp b/examples/unified/basic.cpp
new file mode 100644
index 0000000000..31d1eacfca
--- /dev/null
+++ b/examples/unified/basic.cpp
@@ -0,0 +1,78 @@
+/*******************************************************
+ * Copyright (c) 2015, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include
+#include
+#include
+
+using namespace af;
+
+std::vector input(100);
+
+// Generate a random number between 0 and 1
+// return a uniform number in [0,1].
+double unifRand()
+{
+ return rand() / double(RAND_MAX);
+}
+
+void testBackend()
+{
+ af::info();
+
+ af::dim4 dims(10, 10, 1, 1);
+
+ af::array A(dims, &input.front());
+ af_print(A);
+
+ af::array B = af::constant(0.5, dims, f32);
+ af_print(B);
+}
+
+int main(int argc, char *argv[])
+{
+ std::generate(input.begin(), input.end(), unifRand);
+
+ try {
+ printf("Trying CPU Backend\n");
+ af::setBackend(AF_BACKEND_CPU);
+ testBackend();
+ } catch (af::exception& e) {
+ printf("Caught exception when trying CPU backend\n");
+ fprintf(stderr, "%s\n", e.what());
+ }
+
+ try {
+ printf("Trying CUDA Backend\n");
+ af::setBackend(AF_BACKEND_CUDA);
+ testBackend();
+ } catch (af::exception& e) {
+ printf("Caught exception when trying CUDA backend\n");
+ fprintf(stderr, "%s\n", e.what());
+ }
+
+ try {
+ printf("Trying OpenCL Backend\n");
+ af::setBackend(AF_BACKEND_OPENCL);
+ testBackend();
+ } catch (af::exception& e) {
+ printf("Caught exception when trying OpenCL backend\n");
+ fprintf(stderr, "%s\n", e.what());
+ }
+
+ #ifdef WIN32 // pause in Windows
+ if (!(argc == 2 && argv[1][0] == '-')) {
+ printf("hit [enter]...");
+ fflush(stdout);
+ getchar();
+ }
+ #endif
+
+ return 0;
+}
diff --git a/include/af/arith.h b/include/af/arith.h
index fc2cdc2a82..b5f6f17ba9 100644
--- a/include/af/arith.h
+++ b/include/af/arith.h
@@ -578,7 +578,7 @@ extern "C" {
/**
C Interface for dividing an array by another
- \param[out] out will contain result of \p lhs / \p rhs
+ \param[out] out will contain result of \p lhs / \p rhs. out is of type b8
\param[in] lhs first input
\param[in] rhs second input
\param[in] batch specifies if operations need to be performed in batch mode
@@ -591,7 +591,7 @@ extern "C" {
/**
C Interface for checking if an array is less than another
- \param[out] out will contain result of \p lhs < \p rhs
+ \param[out] out will contain result of \p lhs < \p rhs. out is of type b8
\param[in] lhs first input
\param[in] rhs second input
\param[in] batch specifies if operations need to be performed in batch mode
@@ -604,7 +604,7 @@ extern "C" {
/**
C Interface for checking if an array is greater than another
- \param[out] out will contain result of \p lhs > \p rhs
+ \param[out] out will contain result of \p lhs > \p rhs. out is of type b8
\param[in] lhs first input
\param[in] rhs second input
\param[in] batch specifies if operations need to be performed in batch mode
@@ -617,7 +617,7 @@ extern "C" {
/**
C Interface for checking if an array is less or equal to another
- \param[out] out will contain result of \p lhs <= \p rhs
+ \param[out] out will contain result of \p lhs <= \p rhs. out is of type b8
\param[in] lhs first input
\param[in] rhs second input
\param[in] batch specifies if operations need to be performed in batch mode
@@ -630,7 +630,7 @@ extern "C" {
/**
C Interface for checking if an array is greater or equal to another
- \param[out] out will contain result of \p lhs >= \p rhs
+ \param[out] out will contain result of \p lhs >= \p rhs. out is of type b8
\param[in] lhs first input
\param[in] rhs second input
\param[in] batch specifies if operations need to be performed in batch mode
@@ -643,7 +643,7 @@ extern "C" {
/**
C Interface for checking if an array is equal to another
- \param[out] out will contain result of \p lhs == \p rhs
+ \param[out] out will contain result of \p lhs == \p rhs. out is of type b8
\param[in] lhs first input
\param[in] rhs second input
\param[in] batch specifies if operations need to be performed in batch mode
@@ -656,7 +656,7 @@ extern "C" {
/**
C Interface for checking if an array is not equal to another
- \param[out] out will contain result of \p lhs != \p rhs
+ \param[out] out will contain result of \p lhs != \p rhs. out is of type b8
\param[in] lhs first input
\param[in] rhs second input
\param[in] batch specifies if operations need to be performed in batch mode
@@ -669,7 +669,7 @@ extern "C" {
/**
C Interface for performing logical and on two arrays
- \param[out] out will contain result of \p lhs && \p rhs
+ \param[out] out will contain result of \p lhs && \p rhs. out is of type b8
\param[in] lhs first input
\param[in] rhs second input
\param[in] batch specifies if operations need to be performed in batch mode
@@ -682,7 +682,7 @@ extern "C" {
/**
C Interface for performing logical or on two arrays
- \param[out] out will contain result of \p lhs || \p rhs
+ \param[out] out will contain result of \p lhs || \p rhs. out is of type b8
\param[in] lhs first input
\param[in] rhs second input
\param[in] batch specifies if operations need to be performed in batch mode
@@ -695,7 +695,7 @@ extern "C" {
/**
C Interface for performing logical not on input
- \param[out] out will contain result of logical not of \p in
+ \param[out] out will contain result of logical not of \p in. out is of type b8
\param[in] in is the input
\return \ref AF_SUCCESS if the execution completes properly
diff --git a/include/af/array.h b/include/af/array.h
index bdc6502208..a5f39e7793 100644
--- a/include/af/array.h
+++ b/include/af/array.h
@@ -84,6 +84,19 @@ namespace af
ASSIGN(/=)
#undef ASSIGN
+#if AF_API_VERSION >= 32
+#define ASSIGN(OP) \
+ array_proxy& operator OP(const short &a); \
+ array_proxy& operator OP(const unsigned short &a); \
+
+ ASSIGN(=)
+ ASSIGN(+=)
+ ASSIGN(-=)
+ ASSIGN(*=)
+ ASSIGN(/=)
+#undef ASSIGN
+#endif
+
// af::array member functions. same behavior as those below
af_array get();
af_array get() const;
@@ -627,7 +640,7 @@ namespace af
bool isfloating() const;
/**
- \brief Returns true if the array type is \ref u8, \ref b8, \ref s32 \ref u32, \ref s64, \ref u64
+ \brief Returns true if the array type is \ref u8, \ref b8, \ref s32 \ref u32, \ref s64, \ref u64, \ref s16, \ref u16
*/
bool isinteger() const;
@@ -813,7 +826,7 @@ namespace af
/// \ingroup method_mat
array H() const;
-#define ASSIGN(OP) \
+#define ASSIGN_(OP) \
array& OP(const array &val); \
array& OP(const double &val); /**< \copydoc OP (const array &) */ \
array& OP(const cdouble &val); /**< \copydoc OP (const array &) */ \
@@ -829,6 +842,17 @@ namespace af
array& OP(const long long &val); /**< \copydoc OP (const array &) */ \
array& OP(const unsigned long long &val); /**< \copydoc OP (const array &) */ \
+#if AF_API_VERSION >= 32
+#define ASSIGN(OP) \
+ ASSIGN_(OP) \
+ array& OP(const short &val); /**< \copydoc OP (const array &) */ \
+ array& OP(const unsigned short &val); /**< \copydoc OP (const array &) */ \
+
+#else
+#define ASSIGN(OP) ASSIGN_(OP)
+#endif
+
+
/// \ingroup array_mem_operator_eq
/// @{
/// \brief Assignes the value(s) of val to the elements of the array.
@@ -892,6 +916,7 @@ namespace af
#undef ASSIGN
+#undef ASSIGN_
///
/// \brief Negates the values of the array
@@ -930,7 +955,7 @@ namespace af
};
// end of class array
-#define BIN_OP(OP) \
+#define BIN_OP_(OP) \
AFAPI array OP (const array& lhs, const array& rhs); \
AFAPI array OP (const bool& lhs, const array& rhs); /**< \copydoc OP (const array&, const array&) */ \
AFAPI array OP (const int& lhs, const array& rhs); /**< \copydoc OP (const array&, const array&) */ \
@@ -959,6 +984,18 @@ namespace af
AFAPI array OP (const array& lhs, const cfloat& rhs); /**< \copydoc OP (const array&, const array&) */ \
AFAPI array OP (const array& lhs, const cdouble& rhs); /**< \copydoc OP (const array&, const array&) */ \
+#if AF_API_VERSION >= 32
+#define BIN_OP(OP) \
+ BIN_OP_(OP) \
+ AFAPI array OP (const short& lhs, const array& rhs); /**< \copydoc OP (const array&, const array&) */ \
+ AFAPI array OP (const unsigned short& lhs, const array& rhs); /**< \copydoc OP (const array&, const array&) */ \
+ AFAPI array OP (const array& lhs, const short& rhs); /**< \copydoc OP (const array&, const array&) */ \
+ AFAPI array OP (const array& lhs, const unsigned short& rhs); /**< \copydoc OP (const array&, const array&) */ \
+
+#else
+#define BIN_OP(OP) BIN_OP_(OP)
+#endif
+
/// \ingroup arith_func_add
/// @{
/// \brief Adds two arrays or an array and a value.
@@ -1010,7 +1047,7 @@ namespace af
/// \param[in] lhs the left hand side value of the operand
/// \param[in] rhs the right hand side value of the operand
///
- /// \returns an array with the equality operation performed on each element
+ /// \returns an array of type b8 with the equality operation performed on each element
BIN_OP(operator==)
/// @}
@@ -1021,7 +1058,7 @@ namespace af
/// \param[in] lhs the left hand side value of the operand
/// \param[in] rhs the right hand side value of the operand
///
- /// \returns an array with the != operation performed on each element
+ /// \returns an array of type b8 with the != operation performed on each element
/// of \p lhs and \p rhs
BIN_OP(operator!=)
/// @}
@@ -1033,7 +1070,7 @@ namespace af
/// \param[in] lhs the left hand side value of the operand
/// \param[in] rhs the right hand side value of the operand
///
- /// \returns an array with the < operation performed on each element
+ /// \returns an array of type b8 with the < operation performed on each element
/// of \p lhs and \p rhs
BIN_OP(operator< )
/// @}
@@ -1045,7 +1082,7 @@ namespace af
/// \param[in] lhs the left hand side value of the operand
/// \param[in] rhs the right hand side value of the operand
///
- /// \returns an array with the <= operation performed on each element
+ /// \returns an array of type b8 with the <= operation performed on each element
/// of \p lhs and \p rhs
BIN_OP(operator<=)
/// @}
@@ -1057,7 +1094,7 @@ namespace af
/// \param[in] lhs the left hand side value of the operand
/// \param[in] rhs the right hand side value of the operand
///
- /// \returns an array with the > operation performed on each element
+ /// \returns an array of type b8 with the > operation performed on each element
/// of \p lhs and \p rhs
BIN_OP(operator> )
/// @}
@@ -1069,7 +1106,7 @@ namespace af
/// \param[in] lhs the left hand side value of the operand
/// \param[in] rhs the right hand side value of the operand
///
- /// \returns an array with the >= operation performed on each element
+ /// \returns an array of type b8 with the >= operation performed on each element
/// of \p lhs and \p rhs
BIN_OP(operator>=)
/// @}
@@ -1082,7 +1119,7 @@ namespace af
/// \param[in] lhs the left hand side value of the operand
/// \param[in] rhs the right hand side value of the operand
///
- /// \returns an array with a logical AND operation performed on each
+ /// \returns an array of type b8 with a logical AND operation performed on each
/// element of \p lhs and \p rhs
BIN_OP(operator&&)
/// @}
@@ -1095,7 +1132,7 @@ namespace af
/// \param[in] lhs the left hand side value of the operand
/// \param[in] rhs the right hand side value of the operand
///
- /// \returns an array with a logical OR operation performed on each
+ /// \returns an array of type b8 with a logical OR operation performed on each
/// element of \p lhs and \p rhs
BIN_OP(operator||)
/// @}
@@ -1178,6 +1215,7 @@ namespace af
/// @}
#undef BIN_OP
+#undef BIN_OP_
/// Evaluate an expression (nonblocking).
/**
diff --git a/include/af/backend.h b/include/af/backend.h
new file mode 100644
index 0000000000..93d8d8de58
--- /dev/null
+++ b/include/af/backend.h
@@ -0,0 +1,105 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#pragma once
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if AF_API_VERSION >= 32
+/**
+ \param[in] bknd takes one of the values of enum \ref af_backend
+ \returns \ref af_err error code
+
+ \ingroup unified_func_setbackend
+ */
+AFAPI af_err af_set_backend(const af_backend bknd);
+#endif
+
+#if AF_API_VERSION >= 32
+/**
+ \param[out] num_backends Number of available backends
+ \returns \ref af_err error code
+
+ \ingroup unified_func_getbackendcount
+ */
+AFAPI af_err af_get_backend_count(unsigned* num_backends);
+#endif
+
+#if AF_API_VERSION >= 32
+/**
+ \param[out] backends is the OR sum of the backends available.
+ \returns \ref af_err error code
+
+ \ingroup unified_func_getavailbackends
+ */
+AFAPI af_err af_get_available_backends(int* backends);
+#endif
+
+#if AF_API_VERSION >= 32
+/**
+ \param[out] backend takes one of the values of enum \ref af_backend
+ \param[in] in is the array who's backend is to be queried
+ \returns \ref af_err error code
+
+ \ingroup unified_func_getbackendid
+ */
+AFAPI af_err af_get_backend_id(af_backend *backend, const af_array in);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __cplusplus
+namespace af
+{
+class array;
+
+#if AF_API_VERSION >= 32
+/**
+ \param[in] bknd takes one of the values of enum \ref af_backend
+
+ \ingroup unified_func_setbackend
+ */
+AFAPI void setBackend(const Backend bknd);
+#endif
+
+#if AF_API_VERSION >= 32
+/**
+ \returns Number of available backends
+
+ \ingroup unified_func_getbackendcount
+ */
+AFAPI unsigned getBackendCount();
+#endif
+
+#if AF_API_VERSION >= 32
+/**
+ \returns OR sum of the backends available
+
+ \ingroup unified_func_getavailbackends
+ */
+AFAPI int getAvailableBackends();
+#endif
+
+#if AF_API_VERSION >= 32
+/**
+ \param[in] in is the array who's backend is to be queried
+ \returns \ref af_backend which is the backend on which the array is created
+
+ \ingroup unified_func_getbackendid
+ */
+AFAPI af::Backend getBackendId(const array &in);
+#endif
+
+}
+#endif
diff --git a/include/af/cuda.h b/include/af/cuda.h
index 7cc3cd6501..5b5e25bb65 100644
--- a/include/af/cuda.h
+++ b/include/af/cuda.h
@@ -42,6 +42,18 @@ AFAPI af_err afcu_get_stream(cudaStream_t* stream, int id);
AFAPI af_err afcu_get_native_id(int* nativeid, int id);
#endif
+#if AF_API_VERSION >= 32
+/**
+ Set the CUDA device with given native id as the active device for ArrayFire
+
+ \param[in] nativeid native device id of the CUDA device
+ \returns \ref af_err error code
+
+ \ingroup cuda_mat
+ */
+AFAPI af_err afcu_set_native_id(int nativeid);
+#endif
+
#ifdef __cplusplus
}
#endif
@@ -89,5 +101,21 @@ static inline int getNativeId(int id)
}
#endif
+#if AF_API_VERSION >= 32
+/**
+ Set the CUDA device with given native id as the active device for ArrayFire
+
+ \param[in] nativeId native device id of the CUDA device
+
+ \ingroup cuda_mat
+ */
+static inline void setNativeId(int nativeId)
+{
+ af_err err = afcu_set_native_id(nativeId);
+ if (err!=AF_SUCCESS)
+ throw af::exception("Failed to change active CUDA device to the device with given native id");
+}
+#endif
+
}
#endif
diff --git a/include/af/defines.h b/include/af/defines.h
index c2b8eadc37..a25d23996d 100644
--- a/include/af/defines.h
+++ b/include/af/defines.h
@@ -132,10 +132,12 @@ typedef enum {
///
AF_ERR_NOT_CONFIGURED = 302,
+#if AF_API_VERSION >= 32
///
/// This build of ArrayFire is not compiled with "nonfree" algorithms
///
- AFF_ERR_NONFREE = 303,
+ AF_ERR_NONFREE = 303,
+#endif
// 400-499 Errors for missing hardware features
@@ -149,6 +151,30 @@ typedef enum {
/// not support graphics
///
AF_ERR_NO_GFX = 402,
+
+ // 500-599 Errors specific to heterogenous API
+
+#if AF_API_VERSION >= 32
+ ///
+ /// There was an error when loading the libraries
+ ///
+ AF_ERR_LOAD_LIB = 501,
+#endif
+
+#if AF_API_VERSION >= 32
+ ///
+ /// There was an error when loading the symbols
+ ///
+ AF_ERR_LOAD_SYM = 502,
+#endif
+
+#if AF_API_VERSION >= 32
+ ///
+ /// There was a mismatch between the input array and the active backend
+ ///
+ AF_ERR_ARR_BKND_MISMATCH = 503,
+#endif
+
// 900-999 Errors from upstream libraries and runtimes
///
@@ -168,12 +194,18 @@ typedef enum {
c32, ///< 32-bit complex floating point values
f64, ///< 64-bit complex floating point values
c64, ///< 64-bit complex floating point values
- b8, ///< 8-bit boolean values
+ b8 , ///< 8-bit boolean values
s32, ///< 32-bit signed integral values
u32, ///< 32-bit unsigned integral values
- u8, ///< 8-bit unsigned integral values
+ u8 , ///< 8-bit unsigned integral values
s64, ///< 64-bit signed integral values
- u64 ///< 64-bit unsigned integral values
+ u64, ///< 64-bit unsigned integral values
+#if AF_API_VERSION >= 32
+ s16, ///< 16-bit signed integral values
+#endif
+#if AF_API_VERSION >= 32
+ u16, ///< 16-bit unsigned integral values
+#endif
} af_dtype;
typedef enum {
@@ -249,17 +281,21 @@ typedef enum {
AF_SHD ///< Match based on Sum of Hamming Distances (SHD)
} af_match_type;
+#if AF_API_VERSION >= 31
typedef enum {
AF_YCC_601 = 601, ///< ITU-R BT.601 (formerly CCIR 601) standard
AF_YCC_709 = 709, ///< ITU-R BT.709 standard
AF_YCC_2020 = 2020 ///< ITU-R BT.2020 standard
} af_ycc_std;
+#endif
typedef enum {
AF_GRAY = 0, ///< Grayscale
AF_RGB, ///< 3-channel RGB
AF_HSV, ///< 3-channel HSV
+#if AF_API_VERSION >= 31
AF_YCbCr ///< 3-channel YCbCr
+#endif
} af_cspace_t;
typedef enum {
@@ -300,6 +336,7 @@ typedef enum {
AF_COLORMAP_BLUE = 6 ///< Blue hue map
} af_colormap;
+#if AF_API_VERSION >= 31
typedef enum {
AF_FIF_BMP = 0, ///< FreeImage Enum for Bitmap File
AF_FIF_ICO = 1, ///< FreeImage Enum for Windows Icon File
@@ -315,6 +352,24 @@ typedef enum {
AF_FIF_JP2 = 31, ///< FreeImage Enum for JPEG-2000 File
AF_FIF_RAW = 34 ///< FreeImage Enum for RAW Camera Image File
} af_image_format;
+#endif
+
+#if AF_API_VERSION >= 32
+typedef enum {
+ AF_HOMOGRAPHY_RANSAC = 0, ///< Computes homography using RANSAC
+ AF_HOMOGRAPHY_LMEDS = 1 ///< Computes homography using Least Median of Squares
+} af_homography_type;
+#endif
+
+#if AF_API_VERSION >= 32
+// These enums should be 2^x
+typedef enum {
+ AF_BACKEND_DEFAULT = 0, ///< Default backend order: OpenCL -> CUDA -> CPU
+ AF_BACKEND_CPU = 1, ///< CPU a.k.a sequential algorithms
+ AF_BACKEND_CUDA = 2, ///< CUDA Compute Backend
+ AF_BACKEND_OPENCL = 4, ///< OpenCL Compute Backend
+} af_backend;
+#endif
// Below enum is purely added for example purposes
// it doesn't and shoudn't be used anywhere in the
@@ -340,8 +395,15 @@ namespace af
typedef af_mat_prop matProp;
typedef af_colormap ColorMap;
typedef af_norm_type normType;
+#if AF_API_VERSION >= 31
typedef af_ycc_std YCCStd;
+#endif
+#if AF_API_VERSION >= 31
typedef af_image_format imageFormat;
+#endif
+#if AF_API_VERSION >= 32
+ typedef af_backend Backend;
+#endif
}
#endif
diff --git a/include/af/graphics.h b/include/af/graphics.h
index 1fd9108d7a..5c143c721e 100644
--- a/include/af/graphics.h
+++ b/include/af/graphics.h
@@ -47,6 +47,8 @@ class AFAPI Window {
/**
Creates a window object with default width
and height with title set to "ArrayFire"
+
+ \ingroup gfx_func_window
*/
Window();
@@ -55,6 +57,8 @@ class AFAPI Window {
and height using the title provided by the user
\param[in] title is the window title
+
+ \ingroup gfx_func_window
*/
Window(const char* const title);
@@ -65,6 +69,8 @@ class AFAPI Window {
\param[in] width is the window width
\param[in] height is the window height
\param[in] title is the window title with default value as "ArrayFire"
+
+ \ingroup gfx_func_window
*/
Window(const int width, const int height, const char* const title="ArrayFire");
@@ -74,10 +80,14 @@ class AFAPI Window {
\param[in] wnd is an \ref af_window handle which can be retrieved by
doing a get call on any \ref Window object
+
+ \ingroup gfx_func_window
*/
Window(const af_window wnd);
/**
Destroys the window handle
+
+ \ingroup gfx_func_window
*/
~Window();
@@ -85,6 +95,8 @@ class AFAPI Window {
/**
\return Returns the \ref af_window window handle.
+
+ \ingroup gfx_func_window
*/
af_window get() const { return wnd; }
@@ -93,6 +105,8 @@ class AFAPI Window {
\param[in] x is horizontal coordinate
\param[in] y is vertical coordinate
+
+ \ingroup gfx_func_window
*/
void setPos(const unsigned x, const unsigned y);
@@ -100,6 +114,8 @@ class AFAPI Window {
Set the window title
\param[in] title is the window title
+
+ \ingroup gfx_func_window
*/
void setTitle(const char* const title);
@@ -109,6 +125,8 @@ class AFAPI Window {
\param[in] w is target width of the window
\param[in] h is target height of the window
+
+ \ingroup gfx_func_window
*/
void setSize(const unsigned w, const unsigned h);
#endif
@@ -117,6 +135,8 @@ class AFAPI Window {
Set the colormap to be used for subsequent rendering calls
\param[in] cmap should be one of the enum values from \ref ColorMap
+
+ \ingroup gfx_func_window
*/
void setColorMap(const ColorMap cmap);
@@ -127,9 +147,25 @@ class AFAPI Window {
\param[in] title parameter is used when this function is called in grid mode
\note \p in should be 2d array or 3d array with 3 channels.
+
+ \ingroup gfx_func_draw
*/
void image(const array& in, const char* title=NULL);
+#if AF_API_VERSION >= 32
+ /**
+ Renders the input array as an 3d line plot to the window
+
+ \param[in] in is an \ref array
+ \param[in] title parameter is used when this function is called in grid mode
+
+ \note \p in should be 1d array of size 3n or 2d array with (3 x n) or (n x 3) channels.
+
+ \ingroup gfx_func_draw
+ */
+ void plot3(const array& in, const char* title=NULL);
+#endif
+
/**
Renders the input arrays as a 2D plot to the window
@@ -138,7 +174,10 @@ class AFAPI Window {
\param[in] title parameter is used when this function is called in grid mode
\note \p X and \p Y should be vectors.
+
+ \ingroup gfx_func_draw
*/
+
void plot(const array& X, const array& Y, const char* const title=NULL);
/**
@@ -150,20 +189,56 @@ class AFAPI Window {
\param[in] title parameter is used when this function is called in grid mode
\note \p X should be a vector.
+
+ \ingroup gfx_func_draw
*/
void hist(const array& X, const double minval, const double maxval, const char* const title=NULL);
+#if AF_API_VERSION >= 32
+ /**
+ Renders the input arrays as a 3D surface plot to the window
+
+ \param[in] S is an \ref array with the z-axis data points
+ \param[in] title parameter is used when this function is called in grid mode
+
+ \note \p S should be a 2D array
+
+ \ingroup gfx_func_draw
+ */
+ void surface(const array& S, const char* const title);
+#endif
+
+#if AF_API_VERSION >= 32
+ /**
+ Renders the input arrays as a 3D surface plot to the window
+
+ \param[in] xVals is an \ref array with the x-axis data points
+ \param[in] yVals is an \ref array with the y-axis data points
+ \param[in] S is an \ref array with the z-axis data points
+ \param[in] title parameter is used when this function is called in grid mode
+
+ \note \p X and \p Y should be vectors or 2D arrays \p S should be s 2D array
+
+ \ingroup gfx_func_draw
+ */
+ void surface(const array& xVals, const array& yVals, const array& S, const char* const title);
+#endif
+
/**
Setup grid layout for multiview mode in a window
\param[in] rows is number of rows you want to show in a window
\param[in] cols is number of coloumns you want to show in a window
+
+ \ingroup gfx_func_window
*/
void grid(const int rows, const int cols);
/**
This function swaps the background buffer to current view
and polls for any key strokes while the window was in focus
+
+ \ingroup gfx_func_window
*/
void show();
@@ -173,6 +248,8 @@ class AFAPI Window {
\return \ref AF_SUCCESS if window show is successful, otherwise an appropriate error code
is returned.
+
+ \ingroup gfx_func_window
*/
bool close();
@@ -185,6 +262,8 @@ class AFAPI Window {
\return a reference to the object pointed by this
to enable cascading this call with rendering functions.
+
+ \ingroup gfx_window_func
*/
inline Window& operator()(const int r, const int c) {
_r = r; _c = c;
@@ -210,7 +289,7 @@ extern "C" {
\return \ref AF_SUCCESS if window creation is successful, otherwise an appropriate error code
is returned.
- \ingroup gfx_window_func
+ \ingroup gfx_func_window
*/
AFAPI af_err af_create_window(af_window *out, const int width, const int height, const char* const title);
@@ -292,6 +371,25 @@ AFAPI af_err af_draw_image(const af_window wind, const af_array in, const af_cel
*/
AFAPI af_err af_draw_plot(const af_window wind, const af_array X, const af_array Y, const af_cell* const props);
+#if AF_API_VERSION >= 32
+/**
+ C Interface wrapper for drawing an array as a plot
+
+ \param[in] wind is the window handle
+ \param[in] P is an \ref af_array or matrix with the xyz-values of the points
+ \param[in] props is structure \ref af_cell that has the properties that are used
+ for the current rendering.
+
+ \return \ref AF_SUCCESS if rendering is successful, otherwise an appropriate error code
+ is returned.
+
+ \note \p P should be a 3n x 1 vector or one of a 3xn or nx3 matrices.
+
+ \ingroup gfx_func_draw
+*/
+AFAPI af_err af_draw_plot3(const af_window wind, const af_array P, const af_cell* const props);
+#endif
+
/**
C Interface wrapper for drawing an array as a histogram
@@ -311,6 +409,27 @@ AFAPI af_err af_draw_plot(const af_window wind, const af_array X, const af_array
*/
AFAPI af_err af_draw_hist(const af_window wind, const af_array X, const double minval, const double maxval, const af_cell* const props);
+#if AF_API_VERSION >= 32
+/**
+ C Interface wrapper for drawing arrayis as a surface
+
+ \param[in] wind is the window handle
+ \param[in] xVals is an \ref af_array with the x-axis data points
+ \param[in] yVals is an \ref af_array with the y-axis data points
+ \param[in] S is an \ref af_array with the z-axis data points
+ \param[in] props is structure \ref af_cell that has the properties that are used
+ for the current rendering.
+
+ \return \ref AF_SUCCESS if rendering is successful, otherwise an appropriate error code
+ is returned.
+
+ \note \p X and \p Y should be vectors. \p S should be a 2D array
+
+ \ingroup gfx_func_draw
+*/
+af_err af_draw_surface(const af_window wind, const af_array xVals, const af_array yVals, const af_array S, const af_cell* const props);
+#endif
+
/**
C Interface wrapper for grid setup in a window
diff --git a/include/af/image.h b/include/af/image.h
index 1c16280c12..f38bb41694 100644
--- a/include/af/image.h
+++ b/include/af/image.h
@@ -96,6 +96,57 @@ AFAPI void* saveImageMem(const array& in, const imageFormat format = AF_FIF_PNG)
AFAPI void deleteImageMem(void *ptr);
#endif
+#if AF_API_VERSION >= 32
+/**
+ C++ Interface for loading an image as its original type
+
+ This load image function allows you to load images as u8, u16 or f32
+ depending on the type of input image as shown by the table below.
+
+ Bits per Color (Gray/RGB/RGBA Bits Per Pixel) | Array Type | Range
+ -----------------------------------------------|-------------|---------------
+ 8 ( 8/24/32 BPP) | u8 | 0 - 255
+ 16 (16/48/64 BPP) | u16 | 0 - 65535
+ 32 (32/96/128 BPP) | f32 | 0 - 1
+
+ \param[in] filename is name of file to be loaded
+ \return image loaded as \ref af::array()
+
+ \ingroup imageio_func_load
+*/
+AFAPI array loadImageNative(const char* filename);
+#endif
+
+#if AF_API_VERSION >= 32
+/**
+ C++ Interface for saving an image without modifications
+
+ This function only accepts u8, u16, f32 arrays. These arrays are saved to
+ images without any modifications.
+
+ You must also note that note all image type support 16 or 32 bit images.
+
+ The best options for 16 bit images are PNG, PPM and TIFF.
+ The best option for 32 bit images is TIFF.
+ These allow lossless storage.
+
+ The images stored have the following properties:
+
+ Array Type | Bits per Color (Gray/RGB/RGBA Bits Per Pixel) | Range
+ -------------|-----------------------------------------------|---------------
+ u8 | 8 ( 8/24/32 BPP) | 0 - 255
+ u16 | 16 (16/48/64 BPP) | 0 - 65535
+ f32 | 32 (32/96/128 BPP) | 0 - 1
+
+ \param[in] filename is name of file to be saved
+ \param[in] in is the array to be saved. Should be u8 for saving 8-bit image,
+ u16 for 16-bit image, and f32 for 32-bit image.
+
+ \ingroup imageio_func_save
+*/
+AFAPI void saveImageNative(const char* filename, const array& in);
+#endif
+
/**
C++ Interface for resizing an image to specified dimensions
@@ -230,7 +281,7 @@ AFAPI array bilateral(const array &in, const float spatial_sigma, const float ch
\param[in] nbins Number of bins to populate between min and max
\param[in] minval minimum bin value (accumulates -inf to min)
\param[in] maxval minimum bin value (accumulates max to +inf)
- \return histogram array
+ \return histogram array of type u32
\ingroup image_func_histogram
*/
@@ -243,7 +294,7 @@ AFAPI array histogram(const array &in, const unsigned nbins, const double minval
\param[in] in is the input array
\param[in] nbins Number of bins to populate between min and max
- \return histogram array
+ \return histogram array of type u32
\ingroup image_func_histogram
*/
@@ -689,6 +740,60 @@ extern "C" {
AFAPI af_err af_delete_image_memory(void* ptr);
#endif
+#if AF_API_VERSION >= 32
+ /**
+ C Interface for loading an image as is original type
+
+ This load image function allows you to load images as u8, u16 or f32
+ depending on the type of input image as shown by the table below.
+
+ Bits per Color (Gray/RGB/RGBA Bits Per Pixel) | Array Type | Range
+ -----------------------------------------------|-------------|---------------
+ 8 ( 8/24/32 BPP) | u8 | 0 - 255
+ 16 (16/48/64 BPP) | u16 | 0 - 65535
+ 32 (32/96/128 BPP) | f32 | 0 - 1
+
+ \param[out] out contains them image
+ \param[in] filename is name of file to be loaded
+ \return \ref AF_SUCCESS if successful
+
+ \ingroup imageio_func_load
+ */
+ AFAPI af_err af_load_image_native(af_array *out, const char* filename);
+#endif
+
+#if AF_API_VERSION >= 32
+ /**
+ C Interface for saving an image without modifications
+
+ This function only accepts u8, u16, f32 arrays. These arrays are saved to
+ images without any modifications.
+
+ You must also note that note all image type support 16 or 32 bit images.
+
+ The best options for 16 bit images are PNG, PPM and TIFF.
+ The best option for 32 bit images is TIFF.
+ These allow lossless storage.
+
+ The images stored have the following properties:
+
+ Array Type | Bits per Color (Gray/RGB/RGBA Bits Per Pixel) | Range
+ -------------|-----------------------------------------------|---------------
+ u8 | 8 ( 8/24/32 BPP) | 0 - 255
+ u16 | 16 (16/48/64 BPP) | 0 - 65535
+ f32 | 32 (32/96/128 BPP) | 0 - 1
+
+ \param[in] filename is name of file to be saved
+ \param[in] in is the array to be saved. Should be u8 for saving 8-bit image,
+ u16 for 16-bit image, and f32 for 32-bit image.
+
+ \return \ref AF_SUCCESS if successful
+
+ \ingroup imageio_func_save
+ */
+ AFAPI af_err af_save_image_native(const char* filename, const af_array in);
+#endif
+
/**
C Interface for resizing an image to specified dimensions
@@ -796,7 +901,7 @@ extern "C" {
/**
C Interface for histogram
- \param[out] out is the histogram for input array in
+ \param[out] out (type u32) is the histogram for input array in
\param[in] in is the input array
\param[in] nbins Number of bins to populate between min and max
\param[in] minval minimum bin value (accumulates -inf to min)
diff --git a/include/af/index.h b/include/af/index.h
index e3bb77b0fd..79bf1229a5 100644
--- a/include/af/index.h
+++ b/include/af/index.h
@@ -289,6 +289,78 @@ extern "C" {
const dim_t ndims, const af_index_t* indices,
const af_array rhs);
+#if AF_API_VERSION >= 32
+ ///
+ /// \brief Create an quadruple of af_index_t array
+ ///
+ /// \param[out] indexers pointer to location where quadruple af_index_t array is created
+ /// \returns \ref af_err error code
+ ///
+ /// \ingroup index_func_util
+ ///
+ AFAPI af_err af_create_indexers(af_index_t** indexers);
+#endif
+
+#if AF_API_VERSION >= 32
+ ///
+ /// \brief set \p dim to given indexer af_array \p idx
+ ///
+ /// \param[in] indexer pointer to location where quadruple af_index_t array was created
+ /// \param[in] idx is the af_array indexer for given dimension \p dim
+ /// \param[in] dim is the dimension to be indexed
+ /// \returns \ref af_err error code
+ ///
+ /// \ingroup index_func_util
+ ///
+ AFAPI af_err af_set_array_indexer(af_index_t* indexer, const af_array idx, const dim_t dim);
+#endif
+
+#if AF_API_VERSION >= 32
+ ///
+ /// \brief set \p dim to given indexer af_array \p idx
+ ///
+ /// \param[in] indexer pointer to location where quadruple af_index_t array was created
+ /// \param[in] idx is the af_seq indexer for given dimension \p dim
+ /// \param[in] dim is the dimension to be indexed
+ /// \param[in] is_batch indicates if the sequence based indexing is inside a batch operation
+ ///
+ /// \ingroup index_func_util
+ ///
+ AFAPI af_err af_set_seq_indexer(af_index_t* indexer, const af_seq* idx,
+ const dim_t dim, const bool is_batch);
+#endif
+
+#if AF_API_VERSION >= 32
+ ///
+ /// \brief set \p dim to given indexer af_array \p idx
+ ///
+ /// \param[in] indexer pointer to location where quadruple af_index_t array was created
+ /// \param[in] begin is the beginning index of along dimension \p dim
+ /// \param[in] end is the beginning index of along dimension \p dim
+ /// \param[in] step size along dimension \p dim
+ /// \param[in] dim is the dimension to be indexed
+ /// \param[in] is_batch indicates if the sequence based indexing is inside a batch operation
+ /// \returns \ref af_err error code
+ ///
+ /// \ingroup index_func_util
+ ///
+ AFAPI af_err af_set_seq_param_indexer(af_index_t* indexer,
+ const double begin, const double end, const double step,
+ const dim_t dim, const bool is_batch);
+#endif
+
+#if AF_API_VERSION >= 32
+ ///
+ /// \brief Release's the memory resource used by the quadruple af_index_t array
+ ///
+ /// \param[in] indexers is pointer to location where quadruple af_index_t array is created
+ // \returns \ref af_err error code
+ ///
+ /// \ingroup index_func_util
+ ///
+ AFAPI af_err af_release_indexers(af_index_t* indexers);
+#endif
+
#ifdef __cplusplus
}
#endif
diff --git a/include/af/macros.h b/include/af/macros.h
new file mode 100644
index 0000000000..42a4219ac8
--- /dev/null
+++ b/include/af/macros.h
@@ -0,0 +1,24 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#pragma once
+#include
+
+///
+/// Print a line on screen using printf syntax.
+/// Usage: Uses same syntax and semantics as printf.
+/// Output: \:\: \
+///
+#ifndef AF_MSG
+#define AF_MSG(fmt,...) do { \
+ printf("%s:%d: " fmt "\n", \
+ __FILE__, __LINE__, ##__VA_ARGS__); \
+ } while (0);
+#endif
+
diff --git a/include/af/opencl.h b/include/af/opencl.h
index c9f245e30a..271879fdc9 100644
--- a/include/af/opencl.h
+++ b/include/af/opencl.h
@@ -19,43 +19,54 @@
extern "C" {
#endif
- /**
- \ingroup opencl_mat
- @{
- */
- /**
- Get a handle to ArrayFire's OpenCL context
-
- \param[out] ctx the current context being used by ArrayFire
- \param[in] retain if true calls clRetainContext prior to returning the context
- \returns \ref af_err error code
-
- \note Set \p retain to true if this value will be passed to a cl::Context constructor
- */
- AFAPI af_err afcl_get_context(cl_context *ctx, const bool retain);
-
- /**
- Get a handle to ArrayFire's OpenCL command queue
-
- \param[out] queue the current command queue being used by ArrayFire
- \param[in] retain if true calls clRetainCommandQueue prior to returning the context
- \returns \ref af_err error code
-
- \note Set \p retain to true if this value will be passed to a cl::CommandQueue constructor
- */
- AFAPI af_err afcl_get_queue(cl_command_queue *queue, const bool retain);
-
- /**
- Get the device ID for ArrayFire's current active device
-
- \param[out] id the cl_device_id of the current device
- \returns \ref af_err error code
- */
- AFAPI af_err afcl_get_device_id(cl_device_id *id);
-
- /**
- @}
- */
+/**
+ \ingroup opencl_mat
+ @{
+*/
+/**
+ Get a handle to ArrayFire's OpenCL context
+
+ \param[out] ctx the current context being used by ArrayFire
+ \param[in] retain if true calls clRetainContext prior to returning the context
+ \returns \ref af_err error code
+
+ \note Set \p retain to true if this value will be passed to a cl::Context constructor
+*/
+AFAPI af_err afcl_get_context(cl_context *ctx, const bool retain);
+
+/**
+ Get a handle to ArrayFire's OpenCL command queue
+
+ \param[out] queue the current command queue being used by ArrayFire
+ \param[in] retain if true calls clRetainCommandQueue prior to returning the context
+ \returns \ref af_err error code
+
+ \note Set \p retain to true if this value will be passed to a cl::CommandQueue constructor
+*/
+AFAPI af_err afcl_get_queue(cl_command_queue *queue, const bool retain);
+
+/**
+ Get the device ID for ArrayFire's current active device
+
+ \param[out] id the cl_device_id of the current device
+ \returns \ref af_err error code
+*/
+AFAPI af_err afcl_get_device_id(cl_device_id *id);
+
+#if AF_API_VERSION >= 32
+/**
+ Set ArrayFire's active device based on \p id of type cl_device_id
+
+ \param[in] id the cl_device_id of the device to be set as active device
+ \returns \ref af_err error code
+*/
+AFAPI af_err afcl_set_device_id(cl_device_id id);
+#endif
+
+/**
+ @}
+*/
+
#ifdef __cplusplus
}
#endif
@@ -70,187 +81,205 @@ extern "C" {
namespace afcl
{
- /**
-
- */
- /**
- \ingroup opencl_mat
- @{
- */
- /**
- Get a handle to ArrayFire's OpenCL context
-
- \param[in] retain if true calls clRetainContext prior to returning the context
- \returns the current context being used by ArrayFire
-
- \note Set \p retain to true if this value will be passed to a cl::Context constructor
- */
- static inline cl_context getContext(bool retain = false)
- {
- cl_context ctx;
- af_err err = afcl_get_context(&ctx, retain);
- if (err != AF_SUCCESS) throw af::exception("Failed to get OpenCL context from arrayfire");
- return ctx;
- }
-
- /**
- Get a handle to ArrayFire's OpenCL command queue
-
- \param[in] retain if true calls clRetainCommandQueue prior to returning the context
- \returns the current command queue being used by ArrayFire
-
- \note Set \p retain to true if this value will be passed to a cl::CommandQueue constructor
- */
- static inline cl_command_queue getQueue(bool retain = false)
- {
- cl_command_queue queue;
- af_err err = afcl_get_queue(&queue, retain);
- if (err != AF_SUCCESS) throw af::exception("Failed to get OpenCL command queue from arrayfire");
- return queue;
- }
-
- /**
- Get the device ID for ArrayFire's current active device
- \returns the cl_device_id of the current device
- */
- static inline cl_device_id getDeviceId()
- {
- cl_device_id id;
- af_err err = afcl_get_device_id(&id);
- if (err != AF_SUCCESS) throw af::exception("Failed to get OpenCL device ID");
-
- return id;
- }
-
- /**
- Create an af::array object from an OpenCL cl_mem buffer
-
- \param[in] idims the dimensions of the buffer
- \param[in] buf the OpenCL memory object
- \param[in] type the data type contained in the buffer
- \param[in] retain if true, instructs ArrayFire to retain the memory object
- \returns an array object created from the OpenCL buffer
-
- \note Set \p retain to true if the memory originates from a cl::Buffer object
- */
- static inline af::array array(af::dim4 idims, cl_mem buf, af::dtype type, bool retain=false)
- {
- const unsigned ndims = (unsigned)idims.ndims();
- const dim_t *dims = idims.get();
-
- cl_context context;
- cl_int clerr = clGetMemObjectInfo(buf, CL_MEM_CONTEXT, sizeof(cl_context), &context, NULL);
- if (clerr != CL_SUCCESS) {
- throw af::exception("Failed to get context from cl_mem object \"buf\" ");
- }
-
- if (context != getContext()) {
- throw(af::exception("Context mismatch between input \"buf\" and arrayfire"));
- }
-
-
- if (retain) clerr = clRetainMemObject(buf);
-
- af_array out;
- af_err err = af_device_array(&out, buf, ndims, dims, type);
-
- if (err != AF_SUCCESS || clerr != CL_SUCCESS) {
- if (retain && clerr == CL_SUCCESS) clReleaseMemObject(buf);
- throw af::exception("Failed to create device array");
- }
-
- return af::array(out);
- }
-
- /**
- Create an af::array object from an OpenCL cl_mem buffer
-
- \param[in] dim0 the length of the first dimension of the buffer
- \param[in] buf the OpenCL memory object
- \param[in] type the data type contained in the buffer
- \param[in] retain if true, instructs ArrayFire to retain the memory object
- \returns an array object created from the OpenCL buffer
-
- \note Set \p retain to true if the memory originates from a cl::Buffer object
- */
- static inline af::array array(dim_t dim0,
- cl_mem buf, af::dtype type, bool retain=false)
- {
- return afcl::array(af::dim4(dim0), buf, type, retain);
- }
-
- /**
- Create an af::array object from an OpenCL cl_mem buffer
-
- \param[in] dim0 the length of the first dimension of the buffer
- \param[in] dim1 the length of the second dimension of the buffer
- \param[in] buf the OpenCL memory object
- \param[in] type the data type contained in the buffer
- \param[in] retain if true, instructs ArrayFire to retain the memory object
- \returns an array object created from the OpenCL buffer
-
- \note Set \p retain to true if the memory originates from a cl::Buffer object
- */
- static inline af::array array(dim_t dim0, dim_t dim1,
- cl_mem buf, af::dtype type, bool retain=false)
- {
- return afcl::array(af::dim4(dim0, dim1), buf, type, retain);
- }
-
- /**
- Create an af::array object from an OpenCL cl_mem buffer
-
- \param[in] dim0 the length of the first dimension of the buffer
- \param[in] dim1 the length of the second dimension of the buffer
- \param[in] dim2 the length of the third dimension of the buffer
- \param[in] buf the OpenCL memory object
- \param[in] type the data type contained in the buffer
- \param[in] retain if true, instructs ArrayFire to retain the memory object
- \returns an array object created from the OpenCL buffer
-
- \note Set \p retain to true if the memory originates from a cl::Buffer object
- */
- static inline af::array array(dim_t dim0, dim_t dim1,
- dim_t dim2,
- cl_mem buf, af::dtype type, bool retain=false)
- {
- return afcl::array(af::dim4(dim0, dim1, dim2), buf, type, retain);
- }
-
- /**
- Create an af::array object from an OpenCL cl_mem buffer
-
- \param[in] dim0 the length of the first dimension of the buffer
- \param[in] dim1 the length of the second dimension of the buffer
- \param[in] dim2 the length of the third dimension of the buffer
- \param[in] dim3 the length of the fourth dimension of the buffer
- \param[in] buf the OpenCL memory object
- \param[in] type the data type contained in the buffer
- \param[in] retain if true, instructs ArrayFire to retain the memory object
- \returns an array object created from the OpenCL buffer
-
- \note Set \p retain to true if the memory originates from a cl::Buffer object
- */
- static inline af::array array(dim_t dim0, dim_t dim1,
- dim_t dim2, dim_t dim3,
- cl_mem buf, af::dtype type, bool retain=false)
- {
- return afcl::array(af::dim4(dim0, dim1, dim2, dim3), buf, type, retain);
- }
-
- /**
- @}
- */
+
+/**
+
+ */
+ /**
+ \ingroup opencl_mat
+ @{
+ */
+ /**
+ Get a handle to ArrayFire's OpenCL context
+
+ \param[in] retain if true calls clRetainContext prior to returning the context
+ \returns the current context being used by ArrayFire
+
+ \note Set \p retain to true if this value will be passed to a cl::Context constructor
+ */
+ static inline cl_context getContext(bool retain = false)
+ {
+ cl_context ctx;
+ af_err err = afcl_get_context(&ctx, retain);
+ if (err != AF_SUCCESS) throw af::exception("Failed to get OpenCL context from arrayfire");
+ return ctx;
+ }
+
+ /**
+ Get a handle to ArrayFire's OpenCL command queue
+
+ \param[in] retain if true calls clRetainCommandQueue prior to returning the context
+ \returns the current command queue being used by ArrayFire
+
+ \note Set \p retain to true if this value will be passed to a cl::CommandQueue constructor
+ */
+ static inline cl_command_queue getQueue(bool retain = false)
+ {
+ cl_command_queue queue;
+ af_err err = afcl_get_queue(&queue, retain);
+ if (err != AF_SUCCESS) throw af::exception("Failed to get OpenCL command queue from arrayfire");
+ return queue;
+ }
+
+ /**
+ Get the device ID for ArrayFire's current active device
+ \returns the cl_device_id of the current device
+ */
+ static inline cl_device_id getDeviceId()
+ {
+ cl_device_id id;
+ af_err err = afcl_get_device_id(&id);
+ if (err != AF_SUCCESS) throw af::exception("Failed to get OpenCL device ID");
+
+ return id;
+ }
+
+#if AF_API_VERSION >= 32
+ /**
+ Set ArrayFire's active device based on \p id of type cl_device_id
+
+ \param[in] id the cl_device_id of the device to be set as active device
+ */
+ static inline void setDeviceId(cl_device_id id)
+ {
+ af_err err = afcl_set_device_id(id);
+ if (err != AF_SUCCESS) throw af::exception("Failed to set OpenCL device as active device");
+ }
+#endif
+
+ /**
+ Create an af::array object from an OpenCL cl_mem buffer
+
+ \param[in] idims the dimensions of the buffer
+ \param[in] buf the OpenCL memory object
+ \param[in] type the data type contained in the buffer
+ \param[in] retain if true, instructs ArrayFire to retain the memory object
+ \returns an array object created from the OpenCL buffer
+
+ \note Set \p retain to true if the memory originates from a cl::Buffer object
+ */
+ static inline af::array array(af::dim4 idims, cl_mem buf, af::dtype type, bool retain=false)
+ {
+ const unsigned ndims = (unsigned)idims.ndims();
+ const dim_t *dims = idims.get();
+
+ cl_context context;
+ cl_int clerr = clGetMemObjectInfo(buf, CL_MEM_CONTEXT, sizeof(cl_context), &context, NULL);
+ if (clerr != CL_SUCCESS) {
+ throw af::exception("Failed to get context from cl_mem object \"buf\" ");
+ }
+
+ if (context != getContext()) {
+ throw(af::exception("Context mismatch between input \"buf\" and arrayfire"));
+ }
+
+
+ if (retain) clerr = clRetainMemObject(buf);
+
+ af_array out;
+ af_err err = af_device_array(&out, buf, ndims, dims, type);
+
+ if (err != AF_SUCCESS || clerr != CL_SUCCESS) {
+ if (retain && clerr == CL_SUCCESS) clReleaseMemObject(buf);
+ throw af::exception("Failed to create device array");
+ }
+
+ return af::array(out);
+ }
+
+ /**
+ Create an af::array object from an OpenCL cl_mem buffer
+
+ \param[in] dim0 the length of the first dimension of the buffer
+ \param[in] buf the OpenCL memory object
+ \param[in] type the data type contained in the buffer
+ \param[in] retain if true, instructs ArrayFire to retain the memory object
+ \returns an array object created from the OpenCL buffer
+
+ \note Set \p retain to true if the memory originates from a cl::Buffer object
+ */
+ static inline af::array array(dim_t dim0,
+ cl_mem buf, af::dtype type, bool retain=false)
+ {
+ return afcl::array(af::dim4(dim0), buf, type, retain);
+ }
+
+ /**
+ Create an af::array object from an OpenCL cl_mem buffer
+
+ \param[in] dim0 the length of the first dimension of the buffer
+ \param[in] dim1 the length of the second dimension of the buffer
+ \param[in] buf the OpenCL memory object
+ \param[in] type the data type contained in the buffer
+ \param[in] retain if true, instructs ArrayFire to retain the memory object
+ \returns an array object created from the OpenCL buffer
+
+ \note Set \p retain to true if the memory originates from a cl::Buffer object
+ */
+ static inline af::array array(dim_t dim0, dim_t dim1,
+ cl_mem buf, af::dtype type, bool retain=false)
+ {
+ return afcl::array(af::dim4(dim0, dim1), buf, type, retain);
+ }
+
+ /**
+ Create an af::array object from an OpenCL cl_mem buffer
+
+ \param[in] dim0 the length of the first dimension of the buffer
+ \param[in] dim1 the length of the second dimension of the buffer
+ \param[in] dim2 the length of the third dimension of the buffer
+ \param[in] buf the OpenCL memory object
+ \param[in] type the data type contained in the buffer
+ \param[in] retain if true, instructs ArrayFire to retain the memory object
+ \returns an array object created from the OpenCL buffer
+
+ \note Set \p retain to true if the memory originates from a cl::Buffer object
+ */
+ static inline af::array array(dim_t dim0, dim_t dim1,
+ dim_t dim2,
+ cl_mem buf, af::dtype type, bool retain=false)
+ {
+ return afcl::array(af::dim4(dim0, dim1, dim2), buf, type, retain);
+ }
+
+ /**
+ Create an af::array object from an OpenCL cl_mem buffer
+
+ \param[in] dim0 the length of the first dimension of the buffer
+ \param[in] dim1 the length of the second dimension of the buffer
+ \param[in] dim2 the length of the third dimension of the buffer
+ \param[in] dim3 the length of the fourth dimension of the buffer
+ \param[in] buf the OpenCL memory object
+ \param[in] type the data type contained in the buffer
+ \param[in] retain if true, instructs ArrayFire to retain the memory object
+ \returns an array object created from the OpenCL buffer
+
+ \note Set \p retain to true if the memory originates from a cl::Buffer object
+ */
+ static inline af::array array(dim_t dim0, dim_t dim1,
+ dim_t dim2, dim_t dim3,
+ cl_mem buf, af::dtype type, bool retain=false)
+ {
+ return afcl::array(af::dim4(dim0, dim1, dim2, dim3), buf, type, retain);
+ }
+
+ /**
+ @}
+ */
+
+}
+
+namespace af
+{
+
+template<> AFAPI cl_mem *array::device() const
+{
+ cl_mem *mem = new cl_mem;
+ af_err err = af_get_device_ptr((void **)mem, get());
+ if (err != AF_SUCCESS) throw af::exception("Failed to get cl_mem from array object");
+ return mem;
}
-namespace af {
- template<> AFAPI cl_mem *array::device() const
- {
- cl_mem *mem = new cl_mem;
- af_err err = af_get_device_ptr((void **)mem, get());
- if (err != AF_SUCCESS) throw af::exception("Failed to get cl_mem from array object");
- return mem;
- }
}
#endif
diff --git a/include/af/statistics.h b/include/af/statistics.h
index fd35bc5a86..4d02d4aea0 100644
--- a/include/af/statistics.h
+++ b/include/af/statistics.h
@@ -205,7 +205,7 @@ extern "C" {
\param[out] out will contain the mean of the input array along dimension \p dim
\param[in] in is the input array
\param[in] dim the dimension along which the mean is extracted
- \return \ref AF_SUCCESS if the color transformation is successful,
+ \return \ref AF_SUCCESS if the operation is successful,
otherwise an appropriate error code is returned.
\ingroup stat_func_mean
@@ -219,7 +219,7 @@ AFAPI af_err af_mean(af_array *out, const af_array in, const dim_t dim);
\param[in] in is the input array
\param[in] weights is used to scale input \p in before getting mean
\param[in] dim the dimension along which the mean is extracted
- \return \ref AF_SUCCESS if the color transformation is successful,
+ \return \ref AF_SUCCESS if the operation is successful,
otherwise an appropriate error code is returned.
\ingroup stat_func_mean
@@ -233,7 +233,7 @@ AFAPI af_err af_mean_weighted(af_array *out, const af_array in, const af_array w
\param[in] in is the input array
\param[in] isbiased is boolean denoting Population variance (false) or Sample Variance (true)
\param[in] dim the dimension along which the variance is extracted
- \return \ref AF_SUCCESS if the color transformation is successful,
+ \return \ref AF_SUCCESS if the operation is successful,
otherwise an appropriate error code is returned.
\ingroup stat_func_var
@@ -248,7 +248,7 @@ AFAPI af_err af_var(af_array *out, const af_array in, const bool isbiased, const
\param[in] in is the input array
\param[in] weights is used to scale input \p in before getting variance
\param[in] dim the dimension along which the variance is extracted
- \return \ref AF_SUCCESS if the color transformation is successful,
+ \return \ref AF_SUCCESS if the operation is successful,
otherwise an appropriate error code is returned.
\ingroup stat_func_var
@@ -262,7 +262,7 @@ AFAPI af_err af_var_weighted(af_array *out, const af_array in, const af_array we
\param[out] out will contain the standard deviation of the input array along dimension \p dim
\param[in] in is the input array
\param[in] dim the dimension along which the standard deviation is extracted
- \return \ref AF_SUCCESS if the color transformation is successful,
+ \return \ref AF_SUCCESS if the operation is successful,
otherwise an appropriate error code is returned.
\ingroup stat_func_stdev
@@ -277,7 +277,7 @@ AFAPI af_err af_stdev(af_array *out, const af_array in, const dim_t dim);
\param[in] X is the first input array
\param[in] Y is the second input array
\param[in] isbiased is boolean specifying if biased estimate should be taken (default: false)
- \return \ref AF_SUCCESS if the color transformation is successful,
+ \return \ref AF_SUCCESS if the operation is successful,
otherwise an appropriate error code is returned.
\ingroup stat_func_cov
@@ -290,7 +290,7 @@ AFAPI af_err af_cov(af_array* out, const af_array X, const af_array Y, const boo
\param[out] out will contain the median of the input array along dimension \p dim
\param[in] in is the input array
\param[in] dim the dimension along which the median is extracted
- \return \ref AF_SUCCESS if the color transformation is successful,
+ \return \ref AF_SUCCESS if the operation is successful,
otherwise an appropriate error code is returned.
\ingroup stat_func_median
@@ -303,7 +303,7 @@ AFAPI af_err af_median(af_array* out, const af_array in, const dim_t dim);
\param[out] real will contain the real part of mean of the entire input array
\param[out] imag will contain the imaginary part of mean of the entire input array
\param[in] in is the input array
- \return \ref AF_SUCCESS if the color transformation is successful,
+ \return \ref AF_SUCCESS if the operation is successful,
otherwise an appropriate error code is returned.
\ingroup stat_func_mean
@@ -317,7 +317,7 @@ AFAPI af_err af_mean_all(double *real, double *imag, const af_array in);
\param[out] imag will contain the imaginary part of mean of the entire weighted input array
\param[in] in is the input array
\param[in] weights is used to scale input \p in before getting mean
- \return \ref AF_SUCCESS if the color transformation is successful,
+ \return \ref AF_SUCCESS if the operation is successful,
otherwise an appropriate error code is returned.
\ingroup stat_func_mean
@@ -332,7 +332,7 @@ AFAPI af_err af_mean_all_weighted(double *real, double *imag, const af_array in,
\param[out] imagVal will contain the imaginary part of variance of the entire input array
\param[in] in is the input array
\param[in] isbiased is boolean denoting Population variance (false) or Sample Variance (true)
- \return \ref AF_SUCCESS if the color transformation is successful,
+ \return \ref AF_SUCCESS if the operation is successful,
otherwise an appropriate error code is returned.
\ingroup stat_func_var
@@ -346,7 +346,7 @@ AFAPI af_err af_var_all(double *realVal, double *imagVal, const af_array in, con
\param[out] imagVal will contain the imaginary part of variance of the entire weighted input array
\param[in] in is the input array
\param[in] weights is used to scale input \p in before getting variance
- \return \ref AF_SUCCESS if the color transformation is successful,
+ \return \ref AF_SUCCESS if the operation is successful,
otherwise an appropriate error code is returned.
\ingroup stat_func_var
@@ -359,7 +359,7 @@ AFAPI af_err af_var_all_weighted(double *realVal, double *imagVal, const af_arra
\param[out] real will contain the real part of standard deviation of the entire input array
\param[out] imag will contain the imaginary part of standard deviation of the entire input array
\param[in] in is the input array
- \return \ref AF_SUCCESS if the color transformation is successful,
+ \return \ref AF_SUCCESS if the operation is successful,
otherwise an appropriate error code is returned.
\ingroup stat_func_stdev
@@ -372,7 +372,7 @@ AFAPI af_err af_stdev_all(double *real, double *imag, const af_array in);
\param[out] realVal will contain the real part of median of the entire input array
\param[out] imagVal will contain the imaginary part of median of the entire input array
\param[in] in is the input array
- \return \ref AF_SUCCESS if the color transformation is successful,
+ \return \ref AF_SUCCESS if the operation is successful,
otherwise an appropriate error code is returned.
\ingroup stat_func_median
@@ -386,7 +386,7 @@ AFAPI af_err af_median_all(double *realVal, double *imagVal, const af_array in);
\param[out] imagVal will contain the imaginary part of correlation coefficient of the inputs
\param[in] X is the first input array
\param[in] Y is the second input array
- \return \ref AF_SUCCESS if the color transformation is successful,
+ \return \ref AF_SUCCESS if the operation is successful,
otherwise an appropriate error code is returned.
\note There are many ways correlation coefficient is calculated. This algorithm returns Pearson product-moment correlation coefficient.
diff --git a/include/af/traits.hpp b/include/af/traits.hpp
index 5f7fed381c..29a1a58ea4 100644
--- a/include/af/traits.hpp
+++ b/include/af/traits.hpp
@@ -139,6 +139,30 @@ struct dtype_traits {
static const char* getName() { return "ulong"; }
};
+#if AF_API_VERSION >= 32
+template<>
+struct dtype_traits {
+ enum {
+ af_type = s16 ,
+ ctype = s16
+ };
+ typedef short base_type;
+ static const char* getName() { return "short"; }
+};
+#endif
+
+#if AF_API_VERSION >= 32
+template<>
+struct dtype_traits {
+ enum {
+ af_type = u16 ,
+ ctype = u16
+ };
+ typedef unsigned short base_type;
+ static const char* getName() { return "ushort"; }
+};
+#endif
+
}
#endif
diff --git a/include/af/util.h b/include/af/util.h
index 97e939e3e6..c1fd96ab24 100644
--- a/include/af/util.h
+++ b/include/af/util.h
@@ -121,11 +121,11 @@ namespace af
#define af_print(...) GET_PRINT_MACRO(__VA_ARGS__, AF_PRINT2, AF_PRINT1)(__VA_ARGS__)
-#else
+#else // AF_API_VERSION
#define af_print(exp) af::print(#exp, exp);
-#endif
+#endif // AF_API_VERSION
#endif //__cplusplus
diff --git a/include/af/vision.h b/include/af/vision.h
index 470a12d980..78cc107ac5 100644
--- a/include/af/vision.h
+++ b/include/af/vision.h
@@ -39,7 +39,9 @@ class array;
\ingroup cv_func_fast
*/
-AFAPI features fast(const array& in, const float thr=20.0f, const unsigned arc_length=9, const bool non_max=true, const float feature_ratio=0.05, const unsigned edge=3);
+AFAPI features fast(const array& in, const float thr=20.0f, const unsigned arc_length=9,
+ const bool non_max=true, const float feature_ratio=0.05,
+ const unsigned edge=3);
#if AF_API_VERSION >= 31
/**
@@ -68,7 +70,9 @@ AFAPI features fast(const array& in, const float thr=20.0f, const unsigned arc_l
\ingroup cv_func_harris
*/
-AFAPI features harris(const array& in, const unsigned max_corners=500, const float min_response=1e5f, const float sigma=1.f, const unsigned block_size=0, const float k_thr=0.04f);
+AFAPI features harris(const array& in, const unsigned max_corners=500,
+ const float min_response=1e5f, const float sigma=1.f,
+ const unsigned block_size=0, const float k_thr=0.04f);
#endif
/**
@@ -93,7 +97,10 @@ AFAPI features harris(const array& in, const unsigned max_corners=500, const flo
\ingroup cv_func_orb
*/
-AFAPI void orb(features& feat, array& desc, const array& image, const float fast_thr=20.f, const unsigned max_feat=400, const float scl_fctr=1.5f, const unsigned levels=4, const bool blur_img=false);
+AFAPI void orb(features& feat, array& desc, const array& image,
+ const float fast_thr=20.f, const unsigned max_feat=400,
+ const float scl_fctr=1.5f, const unsigned levels=4,
+ const bool blur_img=false);
#if AF_API_VERSION >= 31
/**
@@ -127,7 +134,48 @@ AFAPI void orb(features& feat, array& desc, const array& image, const float fast
\ingroup cv_func_sift
*/
-AFAPI void sift(features& feat, array& desc, const array& in, const unsigned n_layers=3, const float contrast_thr=0.04f, const float edge_thr=10.f, const float init_sigma=1.6f, const bool double_input=true, const float intensity_scale=0.00390625f, const float feature_ratio=0.05f);
+AFAPI void sift(features& feat, array& desc, const array& in, const unsigned n_layers=3,
+ const float contrast_thr=0.04f, const float edge_thr=10.f,
+ const float init_sigma=1.6f, const bool double_input=true,
+ const float intensity_scale=0.00390625f, const float feature_ratio=0.05f);
+#endif
+
+#if AF_API_VERSION >= 32
+/**
+ C++ Interface for SIFT feature detector and GLOH descriptor
+
+ \param[out] feat features object composed of arrays for x and y
+ coordinates, score, orientation and size of selected features
+ \param[out] desc Nx272 array containing extracted GLOH descriptors, where N
+ is the number of features found by SIFT
+ \param[in] in array containing a grayscale image (color images are not
+ supported)
+ \param[in] n_layers number of layers per octave, the number of octaves is
+ computed automatically according to the input image dimensions,
+ the original SIFT paper suggests 3
+ \param[in] contrast_thr threshold used to filter out features that have
+ low contrast, the original SIFT paper suggests 0.04
+ \param[in] edge_thr threshold used to filter out features that are too
+ edge-like, the original SIFT paper suggests 10.0
+ \param[in] init_sigma the sigma value used to filter the input image at
+ the first octave, the original SIFT paper suggests 1.6
+ \param[in] double_input if true, the input image dimensions will be
+ doubled and the doubled image will be used for the first octave
+ \param[in] intensity_scale the inverse of the difference between the minimum
+ and maximum grayscale intensity value, e.g.: if the ranges are
+ 0-256, the proper intensity_scale value is 1/256, if the ranges
+ are 0-1, the proper intensity-scale value is 1/1
+ \param[in] feature_ratio maximum ratio of features to detect, the maximum
+ number of features is calculated by feature_ratio * in.elements().
+ The maximum number of features is not based on the score, instead,
+ features detected after the limit is reached are discarded
+
+ \ingroup cv_func_sift
+ */
+AFAPI void gloh(features& feat, array& desc, const array& in, const unsigned n_layers=3,
+ const float contrast_thr=0.04f, const float edge_thr=10.f,
+ const float init_sigma=1.6f, const bool double_input=true,
+ const float intensity_scale=0.00390625f, const float feature_ratio=0.05f);
#endif
/**
@@ -245,6 +293,37 @@ AFAPI features susan(const array& in,
AFAPI array dog(const array& in, const int radius1, const int radius2);
#endif
+#if AF_API_VERSION >= 32
+/**
+ C++ Interface for Homography estimation
+
+ \param[out] H is a 3x3 array containing the estimated homography.
+ \param[out] inliers is the number of inliers that the homography was estimated to comprise,
+ in the case that htype is AF_HOMOGRAPHY_RANSAC, a higher inlier_thr value will increase the
+ estimated inliers. Note that if the number of inliers is too low, it is likely
+ that a bad homography will be returned.
+ \param[in] x_src x coordinates of the source points.
+ \param[in] y_src y coordinates of the source points.
+ \param[in] x_dst x coordinates of the destination points.
+ \param[in] y_dst y coordinates of the destination points.
+ \param[in] htype can be AF_HOMOGRAPHY_RANSAC, for which a RANdom SAmple Consensus will be
+ used to evaluate the homography quality (e.g., number of inliers), or AF_HOMOGRAPHY_LMEDS,
+ which will use Least Median of Squares method to evaluate homography quality
+ \param[in] inlier_thr if htype is AF_HOMOGRAPHY_RANSAC, this parameter will five the maximum L2-distance
+ for a point to be considered an inlier.
+ \param[in] iterations maximum number of iterations when htype is AF_HOMOGRAPHY_RANSAC and backend is CPU,
+ if backend is CUDA or OpenCL, iterations is the total number of iterations, an
+ iteration is a selection of 4 random points for which the homography is estimated
+ and evaluated for number of inliers.
+ \param[in] otype the array type for the homography output.
+
+ \ingroup cv_func_homography
+*/
+AFAPI void homography(array& H, int& inliers, const array& x_src, const array& y_src,
+ const array& x_dst, const array& y_dst, const af_homography_type htype=AF_HOMOGRAPHY_RANSAC,
+ const float inlier_thr=3.f, const unsigned iterations=1000, const dtype otype=f32);
+#endif
+
}
#endif
@@ -277,7 +356,8 @@ extern "C" {
\ingroup cv_func_fast
*/
- AFAPI af_err af_fast(af_features *out, const af_array in, const float thr, const unsigned arc_length, const bool non_max, const float feature_ratio, const unsigned edge);
+ AFAPI af_err af_fast(af_features *out, const af_array in, const float thr, const unsigned arc_length,
+ const bool non_max, const float feature_ratio, const unsigned edge);
#if AF_API_VERSION >= 31
/**
@@ -306,7 +386,9 @@ extern "C" {
\ingroup cv_func_harris
*/
- AFAPI af_err af_harris(af_features *out, const af_array in, const unsigned max_corners, const float min_response, const float sigma, const unsigned block_size, const float k_thr);
+ AFAPI af_err af_harris(af_features *out, const af_array in, const unsigned max_corners,
+ const float min_response, const float sigma,
+ const unsigned block_size, const float k_thr);
#endif
/**
@@ -331,7 +413,9 @@ extern "C" {
\ingroup cv_func_orb
*/
- AFAPI af_err af_orb(af_features *feat, af_array *desc, const af_array in, const float fast_thr, const unsigned max_feat, const float scl_fctr, const unsigned levels, const bool blur_img);
+ AFAPI af_err af_orb(af_features *feat, af_array *desc, const af_array in,
+ const float fast_thr, const unsigned max_feat, const float scl_fctr,
+ const unsigned levels, const bool blur_img);
#if AF_API_VERSION >= 31
/**
@@ -365,7 +449,48 @@ extern "C" {
\ingroup cv_func_sift
*/
- AFAPI af_err af_sift(af_features *feat, af_array *desc, const af_array in, const unsigned n_layers, const float contrast_thr, const float edge_thr, const float init_sigma, const bool double_input, const float intensity_scale, const float feature_ratio);
+ AFAPI af_err af_sift(af_features *feat, af_array *desc, const af_array in,
+ const unsigned n_layers, const float contrast_thr, const float edge_thr,
+ const float init_sigma, const bool double_input,
+ const float intensity_scale, const float feature_ratio);
+#endif
+
+#if AF_API_VERSION >= 32
+ /**
+ C++ Interface for SIFT feature detector and GLOH descriptor
+
+ \param[out] feat af_features object composed of arrays for x and y
+ coordinates, score, orientation and size of selected features
+ \param[out] desc Nx272 array containing extracted GLOH descriptors, where N
+ is the number of features found by SIFT
+ \param[in] in array containing a grayscale image (color images are not
+ supported)
+ \param[in] n_layers number of layers per octave, the number of octaves is
+ computed automatically according to the input image dimensions,
+ the original SIFT paper suggests 3
+ \param[in] contrast_thr threshold used to filter out features that have
+ low contrast, the original SIFT paper suggests 0.04
+ \param[in] edge_thr threshold used to filter out features that are too
+ edge-like, the original SIFT paper suggests 10.0
+ \param[in] init_sigma the sigma value used to filter the input image at
+ the first octave, the original SIFT paper suggests 1.6
+ \param[in] double_input if true, the input image dimensions will be
+ doubled and the doubled image will be used for the first octave
+ \param[in] intensity_scale the inverse of the difference between the minimum
+ and maximum grayscale intensity value, e.g.: if the ranges are
+ 0-256, the proper intensity_scale value is 1/256, if the ranges
+ are 0-1, the proper intensity-scale value is 1/1
+ \param[in] feature_ratio maximum ratio of features to detect, the maximum
+ number of features is calculated by feature_ratio * in.elements().
+ The maximum number of features is not based on the score, instead,
+ features detected after the limit is reached are discarded
+
+ \ingroup cv_func_sift
+ */
+ AFAPI af_err af_gloh(af_features *feat, af_array *desc, const af_array in,
+ const unsigned n_layers, const float contrast_thr,
+ const float edge_thr, const float init_sigma, const bool double_input,
+ const float intensity_scale, const float feature_ratio);
#endif
/**
@@ -441,7 +566,8 @@ extern "C" {
\ingroup cv_func_match_template
*/
- AFAPI af_err af_match_template(af_array *out, const af_array search_img, const af_array template_img, const af_match_type m_type);
+ AFAPI af_err af_match_template(af_array *out, const af_array search_img,
+ const af_array template_img, const af_match_type m_type);
#if AF_API_VERSION >= 31
/**
@@ -462,7 +588,8 @@ extern "C" {
\ingroup cv_func_susan
*/
- AFAPI af_err af_susan(af_features* out, const af_array in, const unsigned radius, const float diff_thr, const float geom_thr,
+ AFAPI af_err af_susan(af_features* out, const af_array in, const unsigned radius,
+ const float diff_thr, const float geom_thr,
const float feature_ratio, const unsigned edge);
#endif
@@ -482,6 +609,40 @@ extern "C" {
AFAPI af_err af_dog(af_array *out, const af_array in, const int radius1, const int radius2);
#endif
+#if AF_API_VERSION >= 32
+ /**
+ C Interface wrapper for Homography estimation
+
+ \param[out] H is a 3x3 array containing the estimated homography.
+ \param[out] inliers is the number of inliers that the homography was estimated to comprise,
+ in the case that htype is AF_HOMOGRAPHY_RANSAC, a higher inlier_thr value will increase the
+ estimated inliers. Note that if the number of inliers is too low, it is likely
+ that a bad homography will be returned.
+ \param[in] x_src x coordinates of the source points.
+ \param[in] y_src y coordinates of the source points.
+ \param[in] x_dst x coordinates of the destination points.
+ \param[in] y_dst y coordinates of the destination points.
+ \param[in] htype can be AF_HOMOGRAPHY_RANSAC, for which a RANdom SAmple Consensus will be
+ used to evaluate the homography quality (e.g., number of inliers), or AF_HOMOGRAPHY_LMEDS,
+ which will use Least Median of Squares method to evaluate homography quality.
+ \param[in] inlier_thr if htype is AF_HOMOGRAPHY_RANSAC, this parameter will five the maximum L2-distance
+ for a point to be considered an inlier.
+ \param[in] iterations maximum number of iterations when htype is AF_HOMOGRAPHY_RANSAC and backend is CPU,
+ if backend is CUDA or OpenCL, iterations is the total number of iterations, an
+ iteration is a selection of 4 random points for which the homography is estimated
+ and evaluated for number of inliers.
+ \param[in] otype the array type for the homography output.
+ \return \ref AF_SUCCESS if the computation is is successful,
+ otherwise an appropriate error code is returned.
+
+ \ingroup cv_func_homography
+ */
+ AFAPI af_err af_homography(af_array *H, int *inliers, const af_array x_src, const af_array y_src,
+ const af_array x_dst, const af_array y_dst,
+ const af_homography_type htype, const float inlier_thr,
+ const unsigned iterations, const af_dtype otype);
+#endif
+
#ifdef __cplusplus
}
#endif
diff --git a/include/arrayfire.h b/include/arrayfire.h
index 56518b2c86..e4ac1bbb71 100644
--- a/include/arrayfire.h
+++ b/include/arrayfire.h
@@ -200,6 +200,13 @@
Reading and writing images
@}
+ @defgroup unified_func Unified API Functions
+ @{
+
+ Functions to set current backend and utilities
+
+ @}
+
@defgroup external Interface Functions
@{
@@ -262,12 +269,16 @@
\example histogram.cpp
\example fractal.cpp
\example plot2d.cpp
+\example plot3.cpp
+\example surface.cpp
\example conway_pretty.cpp
+\example basic.cpp
\example helloworld.cpp
\example vectorize.cpp
\example integer.cpp
\example convolve.cpp
\example rainfall.cpp
+\example swe.cpp
\example morphing.cpp
\example image_demo.cpp
\example brain_segmentation.cpp
@@ -284,6 +295,7 @@
#include "af/algorithm.h"
#include "af/arith.h"
#include "af/array.h"
+#include "af/backend.h"
#include "af/blas.h"
#include "af/constants.h"
#include "af/complex.h"
diff --git a/src/api/c/approx.cpp b/src/api/c/approx.cpp
index 1bc7723fdf..7c2935ac1b 100644
--- a/src/api/c/approx.cpp
+++ b/src/api/c/approx.cpp
@@ -41,13 +41,18 @@ af_err af_approx1(af_array *out, const af_array in, const af_array pos,
ArrayInfo i_info = getInfo(in);
ArrayInfo p_info = getInfo(pos);
+ dim4 idims = i_info.dims();
+ dim4 pdims = p_info.dims();
+
af_dtype itype = i_info.getType();
ARG_ASSERT(1, i_info.isFloating()); // Only floating and complex types
ARG_ASSERT(2, p_info.isRealFloating()); // Only floating types
ARG_ASSERT(1, i_info.isSingle() == p_info.isSingle()); // Must have same precision
ARG_ASSERT(1, i_info.isDouble() == p_info.isDouble()); // Must have same precision
- DIM_ASSERT(2, p_info.isColumn()); // Only 1D input allowed
+ // POS should either be (x, 1, 1, 1) or (1, idims[1], idims[2], idims[3])
+ DIM_ASSERT(2, p_info.isColumn() ||
+ (pdims[1] == idims[1] && pdims[2] == idims[2] && pdims[3] == idims[3]));
ARG_ASSERT(3, (method == AF_INTERP_LINEAR || method == AF_INTERP_NEAREST));
af_array output;
@@ -74,16 +79,23 @@ af_err af_approx2(af_array *out, const af_array in, const af_array pos0, const a
ArrayInfo p_info = getInfo(pos0);
ArrayInfo q_info = getInfo(pos1);
+ dim4 idims = i_info.dims();
+ dim4 pdims = p_info.dims();
+ dim4 qdims = q_info.dims();
+
af_dtype itype = i_info.getType();
- ARG_ASSERT(1, i_info.isFloating()); // Only floating and complex types
- ARG_ASSERT(2, p_info.isRealFloating()); // Only floating types
- ARG_ASSERT(3, q_info.isRealFloating()); // Only floating types
- ARG_ASSERT(1, p_info.getType() == q_info.getType()); // Must have same type
- ARG_ASSERT(1, i_info.isSingle() == p_info.isSingle()); // Must have same precision
- ARG_ASSERT(1, i_info.isDouble() == p_info.isDouble()); // Must have same precision
- DIM_ASSERT(2, p_info.dims() == q_info.dims()); // POS0 and POS1 must have same dims
- DIM_ASSERT(2, p_info.ndims() < 3);// Allowing input batch but not positions. Output dims = (px, py, iz, iw)
+ ARG_ASSERT(1, i_info.isFloating()); // Only floating and complex types
+ ARG_ASSERT(2, p_info.isRealFloating()); // Only floating types
+ ARG_ASSERT(3, q_info.isRealFloating()); // Only floating types
+ ARG_ASSERT(1, p_info.getType() == q_info.getType()); // Must have same type
+ ARG_ASSERT(1, i_info.isSingle() == p_info.isSingle()); // Must have same precision
+ ARG_ASSERT(1, i_info.isDouble() == p_info.isDouble()); // Must have same precision
+ DIM_ASSERT(2, pdims == qdims); // POS0 and POS1 must have same dims
+
+ // POS should either be (x, y, 1, 1) or (x, y, idims[2], idims[3])
+ DIM_ASSERT(2, (pdims[2] == 1 && pdims[3] == 1) ||
+ (pdims[2] == idims[2] && pdims[3] == idims[3]));
ARG_ASSERT(3, (method == AF_INTERP_LINEAR || method == AF_INTERP_NEAREST));
af_array output;
diff --git a/src/api/c/assign.cpp b/src/api/c/assign.cpp
index c990889666..13fa179da8 100644
--- a/src/api/c/assign.cpp
+++ b/src/api/c/assign.cpp
@@ -105,6 +105,8 @@ void assign_helper(Array &out, const unsigned &ndims, const af_seq *index, co
case u32: assign(out, ndims, index, getArray(in_)); break;
case s64: assign(out, ndims, index, getArray(in_)); break;
case u64: assign(out, ndims, index, getArray(in_)); break;
+ case s16: assign(out, ndims, index, getArray(in_)); break;
+ case u16: assign(out, ndims, index, getArray(in_)); break;
case u8 : assign(out, ndims, index, getArray(in_)); break;
case b8 : assign(out, ndims, index, getArray(in_)); break;
default : TYPE_ERROR(1, iType); break;
@@ -165,6 +167,8 @@ af_err af_assign_seq(af_array *out,
case u32: assign_helper(getWritableArray(res), ndims, index, rhs); break;
case s64: assign_helper(getWritableArray(res), ndims, index, rhs); break;
case u64: assign_helper(getWritableArray(res), ndims, index, rhs); break;
+ case s16: assign_helper(getWritableArray(res), ndims, index, rhs); break;
+ case u16: assign_helper(getWritableArray(res), ndims, index, rhs); break;
case u8 : assign_helper(getWritableArray(res), ndims, index, rhs); break;
case b8 : assign_helper(getWritableArray(res), ndims, index, rhs); break;
default : TYPE_ERROR(1, oType); break;
@@ -332,6 +336,8 @@ af_err af_assign_gen(af_array *out,
case u32: genAssign(output, idxrs, rhs); break;
case s64: genAssign(output, idxrs, rhs); break;
case s32: genAssign(output, idxrs, rhs); break;
+ case s16: genAssign(output, idxrs, rhs); break;
+ case u16: genAssign(output, idxrs, rhs); break;
case u8: genAssign(output, idxrs, rhs); break;
case b8: genAssign(output, idxrs, rhs); break;
default: TYPE_ERROR(1, rhsType);
diff --git a/src/api/c/bilateral.cpp b/src/api/c/bilateral.cpp
index c83c7ef8db..4f9281d782 100644
--- a/src/api/c/bilateral.cpp
+++ b/src/api/c/bilateral.cpp
@@ -42,6 +42,8 @@ static af_err bilateral(af_array *out, const af_array &in, const float &s_sigma,
case s32: output = bilateral (in, s_sigma, c_sigma); break;
case u32: output = bilateral (in, s_sigma, c_sigma); break;
case u8 : output = bilateral (in, s_sigma, c_sigma); break;
+ case s16: output = bilateral (in, s_sigma, c_sigma); break;
+ case u16: output = bilateral (in, s_sigma, c_sigma); break;
default : TYPE_ERROR(1, type);
}
std::swap(*out,output);
diff --git a/src/api/c/binary.cpp b/src/api/c/binary.cpp
index 8a6ae465a6..2997c13692 100644
--- a/src/api/c/binary.cpp
+++ b/src/api/c/binary.cpp
@@ -55,6 +55,8 @@ static af_err af_arith(af_array *out, const af_array lhs, const af_array rhs, co
case b8 : res = arithOp(lhs, rhs, odims); break;
case s64: res = arithOp(lhs, rhs, odims); break;
case u64: res = arithOp(lhs, rhs, odims); break;
+ case s16: res = arithOp(lhs, rhs, odims); break;
+ case u16: res = arithOp(lhs, rhs, odims); break;
default: TYPE_ERROR(0, otype);
}
@@ -85,6 +87,8 @@ static af_err af_arith_real(af_array *out, const af_array lhs, const af_array rh
case b8 : res = arithOp(lhs, rhs, odims); break;
case s64: res = arithOp(lhs, rhs, odims); break;
case u64: res = arithOp(lhs, rhs, odims); break;
+ case s16: res = arithOp(lhs, rhs, odims); break;
+ case u16: res = arithOp(lhs, rhs, odims); break;
default: TYPE_ERROR(0, otype);
}
@@ -260,6 +264,8 @@ static af_err af_logic(af_array *out, const af_array lhs, const af_array rhs, co
case b8 : res = logicOp(lhs, rhs, odims); break;
case s64: res = logicOp(lhs, rhs, odims); break;
case u64: res = logicOp(lhs, rhs, odims); break;
+ case s16: res = logicOp(lhs, rhs, odims); break;
+ case u16: res = logicOp(lhs, rhs, odims); break;
default: TYPE_ERROR(0, type);
}
@@ -335,6 +341,8 @@ static af_err af_bitwise(af_array *out, const af_array lhs, const af_array rhs,
case b8 : res = bitOp(lhs, rhs, odims); break;
case s64: res = bitOp(lhs, rhs, odims); break;
case u64: res = bitOp(lhs, rhs, odims); break;
+ case s16: res = bitOp(lhs, rhs, odims); break;
+ case u16: res = bitOp(lhs, rhs, odims); break;
default: TYPE_ERROR(0, type);
}
diff --git a/src/api/c/cast.cpp b/src/api/c/cast.cpp
index 379b2df91b..872ace27c5 100644
--- a/src/api/c/cast.cpp
+++ b/src/api/c/cast.cpp
@@ -39,6 +39,8 @@ static af_array cast(const af_array in, const af_dtype type)
case b8 : return getHandle(castArray(in));
case s64: return getHandle(castArray(in));
case u64: return getHandle(castArray(in));
+ case s16: return getHandle(castArray(in));
+ case u16: return getHandle(castArray(in));
default: TYPE_ERROR(2, type);
}
}
diff --git a/src/api/c/convolve.cpp b/src/api/c/convolve.cpp
index 912d8fd0a0..750552db88 100644
--- a/src/api/c/convolve.cpp
+++ b/src/api/c/convolve.cpp
@@ -85,6 +85,10 @@ af_err convolve(af_array *out, const af_array signal, const af_array filter)
case f64: output = convolve(signal, filter, convBT); break;
case u32: output = convolve(signal, filter, convBT); break;
case s32: output = convolve(signal, filter, convBT); break;
+ case u16: output = convolve(signal, filter, convBT); break;
+ case s16: output = convolve(signal, filter, convBT); break;
+ case u64: output = convolve(signal, filter, convBT); break;
+ case s64: output = convolve(signal, filter, convBT); break;
case u8: output = convolve(signal, filter, convBT); break;
case b8: output = convolve(signal, filter, convBT); break;
default: TYPE_ERROR(1, stype);
@@ -120,6 +124,10 @@ af_err convolve2_sep(af_array *out, af_array col_filter, af_array row_filter, co
case f64: output = convolve2(signal, col_filter, row_filter); break;
case u32: output = convolve2(signal, col_filter, row_filter); break;
case s32: output = convolve2(signal, col_filter, row_filter); break;
+ case u16: output = convolve2(signal, col_filter, row_filter); break;
+ case s16: output = convolve2(signal, col_filter, row_filter); break;
+ case u64: output = convolve2(signal, col_filter, row_filter); break;
+ case s64: output = convolve2(signal, col_filter, row_filter); break;
case u8: output = convolve2(signal, col_filter, row_filter); break;
case b8: output = convolve2(signal, col_filter, row_filter); break;
default: TYPE_ERROR(1, signalType);
diff --git a/src/api/c/corrcoef.cpp b/src/api/c/corrcoef.cpp
index d6d98006a9..275fa80239 100644
--- a/src/api/c/corrcoef.cpp
+++ b/src/api/c/corrcoef.cpp
@@ -71,6 +71,8 @@ af_err af_corrcoef(double *realVal, double *imagVal, const af_array X, const af_
case u32: *realVal = corrcoef(X, Y); break;
case s64: *realVal = corrcoef(X, Y); break;
case u64: *realVal = corrcoef(X, Y); break;
+ case s16: *realVal = corrcoef(X, Y); break;
+ case u16: *realVal = corrcoef(X, Y); break;
case u8: *realVal = corrcoef(X, Y); break;
case b8: *realVal = corrcoef(X, Y); break;
default : TYPE_ERROR(1, xType);
diff --git a/src/api/c/covariance.cpp b/src/api/c/covariance.cpp
index 80b391d1a7..f8bb9c4435 100644
--- a/src/api/c/covariance.cpp
+++ b/src/api/c/covariance.cpp
@@ -27,14 +27,16 @@ using namespace detail;
template
static af_array cov(const af_array& X, const af_array& Y, const bool isbiased)
{
- Array xArr = cast(getArray(X));
- Array yArr = cast(getArray(Y));
+ Array _x = getArray(X);
+ Array _y = getArray(Y);
+ Array xArr = cast(_x);
+ Array yArr = cast(_y);
dim4 xDims = xArr.dims();
dim_t N = isbiased ? xDims[0] : xDims[0]-1;
- Array xmArr = createValueArray(xDims, mean(xArr));
- Array ymArr = createValueArray(xDims, mean(yArr));
+ Array xmArr = createValueArray(xDims, mean(_x));
+ Array ymArr = createValueArray(xDims, mean(_y));
Array nArr = createValueArray(xDims, scalar(N));
Array diffX = detail::arithOp(xArr, xmArr, xDims);
@@ -71,6 +73,8 @@ af_err af_cov(af_array* out, const af_array X, const af_array Y, const bool isbi
case u32: output = cov(X, Y, isbiased); break;
case s64: output = cov(X, Y, isbiased); break;
case u64: output = cov(X, Y, isbiased); break;
+ case s16: output = cov(X, Y, isbiased); break;
+ case u16: output = cov(X, Y, isbiased); break;
case u8: output = cov(X, Y, isbiased); break;
default : TYPE_ERROR(1, xType);
}
diff --git a/src/api/c/data.cpp b/src/api/c/data.cpp
index 50acaad1d3..4d77fb279e 100644
--- a/src/api/c/data.cpp
+++ b/src/api/c/data.cpp
@@ -59,6 +59,8 @@ af_err af_get_data_ptr(void *data, const af_array arr)
case u8: copyData(static_cast(data), arr); break;
case s64: copyData(static_cast(data), arr); break;
case u64: copyData(static_cast(data), arr); break;
+ case s16: copyData(static_cast(data), arr); break;
+ case u16: copyData(static_cast(data), arr); break;
default: TYPE_ERROR(1, type);
}
}
@@ -88,6 +90,8 @@ af_err af_create_array(af_array *result, const void * const data,
case u8: out = createHandleFromData(d, static_cast(data)); break;
case s64: out = createHandleFromData(d, static_cast(data)); break;
case u64: out = createHandleFromData(d, static_cast(data)); break;
+ case s16: out = createHandleFromData(d, static_cast(data)); break;
+ case u16: out = createHandleFromData(d, static_cast(data)); break;
default: TYPE_ERROR(4, type);
}
std::swap(*result, out);
@@ -118,6 +122,8 @@ af_err af_constant(af_array *result, const double value,
case u8: out = createHandleFromValue(d, value); break;
case s64: out = createHandleFromValue(d, value); break;
case u64: out = createHandleFromValue(d, value); break;
+ case s16: out = createHandleFromValue(d, value); break;
+ case u16: out = createHandleFromValue(d, value); break;
default: TYPE_ERROR(4, type);
}
std::swap(*result, out);
@@ -212,6 +218,8 @@ af_err af_create_handle(af_array *result, const unsigned ndims, const dim_t * co
case u8: out = createHandle(d); break;
case s64: out = createHandle(d); break;
case u64: out = createHandle(d); break;
+ case s16: out = createHandle(d); break;
+ case u16: out = createHandle(d); break;
default: TYPE_ERROR(3, type);
}
std::swap(*result, out);
@@ -239,6 +247,8 @@ af_err af_copy_array(af_array *out, const af_array in)
case u8: res = copyArray(in); break;
case s64: res = copyArray(in); break;
case u64: res = copyArray(in); break;
+ case s16: res = copyArray(in); break;
+ case u16: res = copyArray(in); break;
default: TYPE_ERROR(1, type);
}
std::swap(*out, res);
@@ -266,6 +276,8 @@ af_err af_get_data_ref_count(int *use_count, const af_array in)
case u8: res = getArray(in).useCount(); break;
case s64: res = getArray(in).useCount(); break;
case u64: res = getArray(in).useCount(); break;
+ case s16: res = getArray(in).useCount(); break;
+ case u16: res = getArray(in).useCount(); break;
default: TYPE_ERROR(1, type);
}
std::swap(*use_count, res);
@@ -310,6 +322,8 @@ af_err af_randu(af_array *out, const unsigned ndims, const dim_t * const dims, c
case u32: result = randu_(d); break;
case s64: result = randu_(d); break;
case u64: result = randu_(d); break;
+ case s16: result = randu_(d); break;
+ case u16: result = randu_(d); break;
case u8: result = randu_(d); break;
case b8: result = randu_(d); break;
default: TYPE_ERROR(3, type);
@@ -375,6 +389,8 @@ af_err af_identity(af_array *out, const unsigned ndims, const dim_t * const dims
case u8: result = identity_(d); break;
case u64: result = identity_(d); break;
case s64: result = identity_(d); break;
+ case u16: result = identity_(d); break;
+ case s16: result = identity_(d); break;
// Removed because of bool type. Functions implementations exist.
case b8: result = identity_(d); break;
default: TYPE_ERROR(3, type);
@@ -401,6 +417,8 @@ af_err af_release_array(af_array arr)
case u8: releaseHandle(arr); break;
case s64: releaseHandle(arr); break;
case u64: releaseHandle(arr); break;
+ case s16: releaseHandle(arr); break;
+ case u16: releaseHandle(arr); break;
default: TYPE_ERROR(0, type);
}
}
@@ -433,6 +451,8 @@ af_array retain(const af_array in)
case b8: return retainHandle(in);
case s64: return retainHandle(in);
case u64: return retainHandle(in);
+ case s16: return retainHandle(in);
+ case u16: return retainHandle(in);
default:
TYPE_ERROR(1, ty);
}
@@ -470,6 +490,8 @@ af_err af_range(af_array *result, const unsigned ndims, const dim_t * const dims
case u32: out = range_(d, seq_dim); break;
case s64: out = range_(d, seq_dim); break;
case u64: out = range_(d, seq_dim); break;
+ case s16: out = range_(d, seq_dim); break;
+ case u16: out = range_(d, seq_dim); break;
case u8: out = range_(d, seq_dim); break;
default: TYPE_ERROR(4, type);
}
@@ -495,16 +517,9 @@ af_err af_iota(af_array *result, const unsigned ndims, const dim_t * const dims,
DIM_ASSERT(1, ndims > 0 && ndims <= 4);
DIM_ASSERT(3, t_ndims > 0 && t_ndims <= 4);
- dim4 d;
- dim4 t;
- for(unsigned i = 0; i < 4; i++) {
- d[i] = dims[i];
- DIM_ASSERT(2, d[i] >= 1);
- }
- for(unsigned i = 0; i < 4; i++) {
- t[i] = tdims[i];
- DIM_ASSERT(4, t[i] >= 1);
- }
+
+ dim4 d = verifyDims(ndims, dims);
+ dim4 t = verifyDims(t_ndims, tdims);
switch(type) {
case f32: out = iota_(d, t); break;
@@ -513,6 +528,8 @@ af_err af_iota(af_array *result, const unsigned ndims, const dim_t * const dims,
case u32: out = iota_(d, t); break;
case s64: out = iota_(d, t); break;
case u64: out = iota_(d, t); break;
+ case s16: out = iota_(d, t); break;
+ case u16: out = iota_(d, t); break;
case u8: out = iota_(d, t); break;
default: TYPE_ERROR(4, type);
}
@@ -596,6 +613,8 @@ af_err af_eval(af_array arr)
case b8 : eval(arr); break;
case s64: eval(arr); break;
case u64: eval(arr); break;
+ case s16: eval(arr); break;
+ case u16: eval(arr); break;
default:
TYPE_ERROR(0, type);
}
@@ -633,6 +652,8 @@ af_err af_diag_create(af_array *out, const af_array in, const int num)
case u32: result = diagCreate(in, num); break;
case s64: result = diagCreate(in, num); break;
case u64: result = diagCreate(in, num); break;
+ case s16: result = diagCreate(in, num); break;
+ case u16: result = diagCreate(in, num); break;
case u8: result = diagCreate(in, num); break;
// Removed because of bool type. Functions implementations exist.
case b8: result = diagCreate(in, num); break;
@@ -662,6 +683,8 @@ af_err af_diag_extract(af_array *out, const af_array in, const int num)
case u32: result = diagExtract(in, num); break;
case s64: result = diagExtract(in, num); break;
case u64: result = diagExtract(in, num); break;
+ case s16: result = diagExtract(in, num); break;
+ case u16: result = diagExtract(in, num); break;
case u8: result = diagExtract(in, num); break;
// Removed because of bool type. Functions implementations exist.
case b8: result = diagExtract(in, num); break;
@@ -702,6 +725,8 @@ af_err af_write_array(af_array arr, const void *data, const size_t bytes, af_sou
case u8: write_array(arr, static_cast(data), bytes, src); break;
case s64: write_array(arr, static_cast(data), bytes, src); break;
case u64: write_array(arr, static_cast