diff --git a/Libraries/oneMKL/matrix_mul/License.txt b/Libraries/oneMKL/matrix_mul/License.txt new file mode 100644 index 0000000000..9cde07f558 --- /dev/null +++ b/Libraries/oneMKL/matrix_mul/License.txt @@ -0,0 +1,8 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/Libraries/oneMKL/matrix_mul/Makefile b/Libraries/oneMKL/matrix_mul/Makefile new file mode 100644 index 0000000000..5ec9dbfb07 --- /dev/null +++ b/Libraries/oneMKL/matrix_mul/Makefile @@ -0,0 +1,27 @@ +DPCPP_CXX = dpcpp +DPCPP_CXXFLAGS = -g -o + +MKL_CXXFLAGS = -I$(MKLROOT)/include -DMKL_ILP64 +MKL_LDFLAGS = ${MKLROOT}/lib/intel64/libmkl_sycl.a -L${MKLROOT}/lib/intel64 -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lOpenCL -ldl +MKL_EXE_NAME = matrix_mul_mkl +MKL_SOURCES = src/matrix_mul_mkl.cpp + +all: + $(DPCPP_CXX) $(MKL_CXXFLAGS) $(DPCPP_CXXFLAGS) $(MKL_EXE_NAME) $(MKL_SOURCES) $(MKL_LDFLAGS) + +build_mkl: + $(DPCPP_CXX) $(MKL_CXXFLAGS) $(DPCPP_CXXFLAGS) $(MKL_EXE_NAME) $(MKL_SOURCES) $(MKL_LDFLAGS) + + +run: + SYCL_BE=PI_OPENCL ./$(MKL_EXE_NAME) + +run_mkl: + SYCL_BE=PI_OPENCL ./$(MKL_EXE_NAME) + + +clean: + rm -rf $(MKL_EXE_NAME) + + + diff --git a/Libraries/oneMKL/matrix_mul/Makefile.win b/Libraries/oneMKL/matrix_mul/Makefile.win new file mode 100644 index 0000000000..1698210904 --- /dev/null +++ b/Libraries/oneMKL/matrix_mul/Makefile.win @@ -0,0 +1,24 @@ +DPCPP_CXX = dpcpp-cl +DPCPP_CXXFLAGS = /Zi /EHsc + +MKL_CXXFLAGS = -I"$(MKLROOT)/include" -DMKL_ILP64 +MKL_LDFLAGS = /link /libpath:"$(MKLROOT)\lib\intel64" mkl_intel_ilp64.lib mkl_sequential.lib mkl_core.lib mkl_sycl.lib opencl.lib +MKL_EXE_NAME = matrix_mul_mkl.exe +MKL_SOURCES = src/matrix_mul_mkl.cpp + +all: + $(DPCPP_CXX) $(MKL_CXXFLAGS) $(DPCPP_CXXFLAGS) -o $(MKL_EXE_NAME) $(MKL_SOURCES) $(MKL_LDFLAGS) + +build_mkl: + $(DPCPP_CXX) $(MKL_CXXFLAGS) $(DPCPP_CXXFLAGS) -o $(MKL_EXE_NAME) $(MKL_SOURCES) $(MKL_LDFLAGS) + + +run: + $(MKL_EXE_NAME) + +run_mkl: + $(MKL_EXE_NAME) + + +clean: + del -rf $(MKL_EXE_NAME) *.pdb diff --git a/Libraries/oneMKL/matrix_mul/README.md b/Libraries/oneMKL/matrix_mul/README.md new file mode 100644 index 0000000000..68ae246440 --- /dev/null +++ b/Libraries/oneMKL/matrix_mul/README.md @@ -0,0 +1,63 @@ +# `Matrix Multiplication` sample +Matrix Multiplication is a simple program that multiplies together two large matrices and verifies the results. +This program is implemented using C++ with oneAPI Math Kernel Library (oneMKL): + +| Optimized for | Description +|:--- |:--- +| OS | Linux* Ubuntu* 18.04, Windows 10* +| Hardware | Skylake with GEN9 or newer +| Software | Intel® oneAPI DPC++ Compiler beta, oneMKL +| What you will learn | Offloads computations on 2D arrays to GPU using oneMKL +| Time to complete | 15 minutes + +## Key implementation details +oneMKL implementation explained. + +## License +This code sample is licensed under MIT license. + +## Bulding `Matrix Multiplication` for oneMKL + +### On a Linux* System +Perform the following steps: +1. Build the program using the following `cmake` commands. +``` +$ mkdir build +$ cd build +$ cmake .. +$ make +``` + +> Note: by default, exectables are created for both USM and buffers. You can build individually with the following: +> Create buffers executable: make mandelbrot +> Create USM executable: make mandelbrot_usm + +2. Run the program (default uses buffers): + ``` + make run + ``` +> Note: for USM use `make run_usm` + +3. Clean the program using: + ``` + make clean + ``` + +### On a Windows* System Using Visual Studio* Version 2017 or Newer + +* Build the program using VS2017 or VS2019 + Right click on the solution file and open using either VS2017 or VS2019 IDE. + Right click on the project in Solution explorer and select Rebuild. + From top menu select Debug -> Start without Debugging. + + +* Build the program using MSBuild + Open "x64 Native Tools Command Prompt for VS2017" or "x64 Native Tools Command Prompt for VS2019" + Run - MSBuild matrix_mul.sln /t:Rebuild /p:Configuration="Release" + +#### Visual Studio IDE + * Open Visual Studio 2017 + * Select Menu "File > Open > Project/Solution", find "matrix_mul" folder and select "matrix_mul.sln" + * Select Menu "Project > Build" to build the selected configuration + * Select Menu "Debug > Start Without Debugging" to run the program + diff --git a/Libraries/oneMKL/matrix_mul/matrix_mul_mkl.cpp b/Libraries/oneMKL/matrix_mul/matrix_mul_mkl.cpp new file mode 100644 index 0000000000..2f1f9d31be --- /dev/null +++ b/Libraries/oneMKL/matrix_mul/matrix_mul_mkl.cpp @@ -0,0 +1,185 @@ +//============================================================== +// Copyright � 2020 Intel Corporation +// +// SPDX-License-Identifier: MIT +// ============================================================= +// +// Matrix Multiplication is a simple program that multiplies together two large matrices and verifies the results. +// This program is implemented using C++ with oneAPI Math Kernel Library (oneMKL) + +#include +#include +#include +#include "mkl.h" +#include "mkl_blas_sycl.hpp" + +using namespace std; +using namespace sycl; + +// Matrix size constants +auto constexpr size = (600 * 8) // Must be a multiple of 8. +auto constexpr M = size / 8 +auto constexpr N = size / 4 +auto constexpr P = size / 2 + +/** + * Perform the matrix multiplication on host to verify results from mkl. + */ +int VerifyResult(double *c_back); + +int main() { + // + // Initialize data for Gemm + // + // C = alpha * op(A) * op(B) + beta * C + // + mkl::transpose transA = mkl::transpose::nontrans; + mkl::transpose transB = mkl::transpose::nontrans; + + // Matrix data sizes + int m = M; + int n = P; + int k = N; + + // Meading dimensions of data + int ldA = m; + int ldB = k; + int ldC = m; + + // Set scalar fp values + double alpha = 1.0; + double beta = 0.0; + + // 1D arrays on host side + + auto A = new double[M * N]; + auto B = new double[N * P]; + auto C = new double[M * P]; + + // Prepare matrix data with column-major style + int i, j; + // A(M, N) is a matrix whose values are column number plus one + for (i = 0; i < N; i++) + for (j = 0; j < M; j++) A[i * M + j] = i + 1.0; + + // B(N, P) is matrix whose values are row number plus one + for (i = 0; i < P; i++) + for (j = 0; j < N; j++) B[i * N + j] = j + 1.0; + + cout << "Problem size: c(" << M << "," << P << ") = a(" << M << "," << N + << ") * b(" << N << "," << P << ")" << cerr; + + // Execute Gemm + auto asyncHandler = [&](exception_list eL) { + for (auto &e : eL) { + try { + rethrow_exception(e); + } catch (exception &e) { + cout << e.what() << cerr; + cout << "fail" << cerr; + // terminate() will exit the process, return non-zero, and output a + // message to the user about the exception + terminate(); + } + } + }; + + try { + // Initializing the devices queue with the default selector + // The device queue is used to enqueue the kernels and encapsulates + // all the states needed for execution + default_selector device_selector; + queue device_queue(device_selector, asyncHandler); + + cout << "Device: " + << device_queue.get_device().get_info() + << cerr; + + // Creating 1D buffers for matrices which are bound to host memory array + buffer a{A, range<1>{M * N}}; + buffer b{B, range<1>{N * P}}; + buffer c{C, range<1>{M * P}}; + + mkl::blas::gemm(device_queue, transA, transB, m, n, k, alpha, a, ldA, b, + ldB, beta, c, ldC); + } catch (exception const &e) { + cerr << "\t\tSYCL exception during GEMM\n" + << e.what() << cerr + << "OpenCL status: " << e.get_cl_code() << cerr; + } + + int result; + result = VerifyResult(C); + + delete[] A; + delete[] B; + delete[] C; + + return result; +} + +bool ValueSame(double a, double b) { + return fabs(a - b) < numeric_limits::epsilon(); +} + +int VerifyResult(double *c_back) { + // Check that the results are correct by comparing with host computing + int i, j, k; + + // 2D arrays on host side + + auto a_host = new double[M][N]; + auto b_host = new double[N][P]; + auto c_host = new double[M][P]; + + // a_host is a matrix whose values are column number plus one + for (i = 0; i < M; i++) + for (j = 0; j < N; j++) a_host[i][j] = j + 1.0; + + // b_host is a matrix whose values are row number plus one + for (i = 0; i < N; i++) + for (j = 0; j < P; j++) b_host[i][j] = i + 1.0; + + // c_host is initialized to zero + for (i = 0; i < M; i++) + for (j = 0; j < P; j++) c_host[i][j] = 0; + + for (i = 0; i < M; i++) { + for (k = 0; k < N; k++) { + for (j = 0; j < P; j++) { + c_host[i][j] += a_host[i][k] * b_host[k][j]; + } + } + } + + bool MismatchFound = false; + + // Compare host side results with the result buffer from device side: print + // fail data 5 times only. + int printf_count = 0; + for (i = 0; i < M; i++) { + for (j = 0; j < P; j++) { + if (!ValueSame(c_back[i + j * M], c_host[i][j])) { + cout << "fail - The result is incorrect for element: [" << i << ", " + << j << "], expected: " << c_host[i][j] + << " , but got: " << c_back[i + j * M] << cerr; + MismatchFound = true; + printf_count++; + if (printf_count >= 5) break; + } + } + if (printf_count >= 5) break; + } + + delete[] a_host; + delete[] b_host; + delete[] c_host; + + if (!MismatchFound) { + cout << "success - The results are correct!" << cerr; + return 0; + } else { + cerr << "fail - The results mis-match!" << cerr; + return -1; + } +} diff --git a/Libraries/oneMKL/matrix_mul/sample.json b/Libraries/oneMKL/matrix_mul/sample.json new file mode 100644 index 0000000000..e739d2c1bf --- /dev/null +++ b/Libraries/oneMKL/matrix_mul/sample.json @@ -0,0 +1,32 @@ +{ + "guid": "5E72947A-B5C2-465A-8514-85671EDCADF8", + "name": "Matrix Multiplication using oneMKL", + "categories": [ "Toolkit/Intel® oneAPI HPC Toolkit" ], + "toolchain": [ "dpcpp", "icc" ], + "dependencies": [ "mkl" ], + "description": "Simple program that multiplies two large matrices in parallel using oneMKL.", + "languages": [ { "cpp": { "properties": { "projectOptions": [ { "projectType": "makefile" } ] } } } ], + "targetDevice": [ "CPU", "GPU" ], + "os": [ "linux", "windows" ], + "builder": [ "ide", "make" ], + "ciTests": { + "linux": [{ + "steps": [ + "mkdir build", + "cd build", + "cmake ..", + "make", + "make run" + ] + }], + "windows": [ + { + "steps": [ + "MSBuild matrix_mul.sln /t:Rebuild /p:Configuration=\"release\"", + "cd x64/Release", + "matrix_mul_mkl.exe" + ] + } + ] + } +}