From 324db060c92ee5e952b9228d33b751397d646a4b Mon Sep 17 00:00:00 2001 From: ltsai1 Date: Fri, 7 Aug 2020 00:31:49 -0700 Subject: [PATCH 01/11] add oneDNN and oneCCL samples Signed-off-by: ltsai1 --- Libraries/oneCCL/README.md | 19 + .../oneCCL_Getting_Started/CMakeLists.txt | 20 + .../oneCCL/oneCCL_Getting_Started/License.txt | 19 + .../oneCCL/oneCCL_Getting_Started/README.md | 175 +++ .../oneCCL_Getting_Started.ipynb | 1024 +++++++++++++++++ Libraries/oneCCL/oneCCL_Getting_Started/q | 32 + .../oneCCL/oneCCL_Getting_Started/sample.json | 26 + Libraries/oneDNN/README.md | 31 + .../dpcpp_interoperability/CMakeLists.txt | 7 + .../oneDNN/dpcpp_interoperability/License.txt | 19 + .../oneDNN/dpcpp_interoperability/README.md | 128 +++ .../oneDNN/dpcpp_interoperability/sample.json | 26 + .../oneDNN/getting_started/CMakeLists.txt | 17 + Libraries/oneDNN/getting_started/License.txt | 19 + Libraries/oneDNN/getting_started/README.md | 184 +++ .../getting_started/getting_started.ipynb | 561 +++++++++ Libraries/oneDNN/getting_started/q | 32 + Libraries/oneDNN/getting_started/sample.json | 25 + Libraries/oneDNN/simple_model/CMakeLists.txt | 17 + Libraries/oneDNN/simple_model/License.txt | 19 + Libraries/oneDNN/simple_model/README.md | 311 +++++ .../codes_for_ipynb/cnn_inference_f32.cpp | 700 +++++++++++ .../codes_for_ipynb/cnn_inference_f32.patch | 245 ++++ Libraries/oneDNN/simple_model/q | 32 + Libraries/oneDNN/simple_model/sample.json | 27 + .../oneDNN/simple_model/simple_model.ipynb | 957 +++++++++++++++ 26 files changed, 4672 insertions(+) create mode 100644 Libraries/oneCCL/README.md create mode 100644 Libraries/oneCCL/oneCCL_Getting_Started/CMakeLists.txt create mode 100644 Libraries/oneCCL/oneCCL_Getting_Started/License.txt create mode 100644 Libraries/oneCCL/oneCCL_Getting_Started/README.md create mode 100644 Libraries/oneCCL/oneCCL_Getting_Started/oneCCL_Getting_Started.ipynb create mode 100755 Libraries/oneCCL/oneCCL_Getting_Started/q create mode 100644 Libraries/oneCCL/oneCCL_Getting_Started/sample.json create mode 100644 Libraries/oneDNN/README.md create mode 100644 Libraries/oneDNN/dpcpp_interoperability/CMakeLists.txt create mode 100644 Libraries/oneDNN/dpcpp_interoperability/License.txt create mode 100644 Libraries/oneDNN/dpcpp_interoperability/README.md create mode 100644 Libraries/oneDNN/dpcpp_interoperability/sample.json create mode 100644 Libraries/oneDNN/getting_started/CMakeLists.txt create mode 100644 Libraries/oneDNN/getting_started/License.txt create mode 100644 Libraries/oneDNN/getting_started/README.md create mode 100644 Libraries/oneDNN/getting_started/getting_started.ipynb create mode 100755 Libraries/oneDNN/getting_started/q create mode 100644 Libraries/oneDNN/getting_started/sample.json create mode 100644 Libraries/oneDNN/simple_model/CMakeLists.txt create mode 100644 Libraries/oneDNN/simple_model/License.txt create mode 100644 Libraries/oneDNN/simple_model/README.md create mode 100644 Libraries/oneDNN/simple_model/codes_for_ipynb/cnn_inference_f32.cpp create mode 100644 Libraries/oneDNN/simple_model/codes_for_ipynb/cnn_inference_f32.patch create mode 100755 Libraries/oneDNN/simple_model/q create mode 100644 Libraries/oneDNN/simple_model/sample.json create mode 100644 Libraries/oneDNN/simple_model/simple_model.ipynb diff --git a/Libraries/oneCCL/README.md b/Libraries/oneCCL/README.md new file mode 100644 index 0000000000..c6057a55f0 --- /dev/null +++ b/Libraries/oneCCL/README.md @@ -0,0 +1,19 @@ +# Intel oneAPI Collective Communications Library (oneCCL) + +Collective Communication Library is a library providing an efficient implementation of communication patterns usedin deep learning. + +Github : https://github.com/oneapi-src/oneCCL + +## License +The code samples are licensed under MIT license + +# oneCCL samples + +| Type | Name | Description | +| --------- | ----------------------- | ------------------------------------------------------------ | +| Component | oneCCL_Getting_Started | Those C++ & C API example demonstrates basic of oneCCL programming model by invoking different collective operations such as allreduce. | +| Component | oneCCL_Getting_Started.ipynb |This Jupyter Notebook demonstrates how to compile a oneCCL sample with different releases and how to port a oneCCL sample from CPU-only version to CPU&GPU version by using DPC++ via batch jobs on the Intel oneAPI DevCloud (check below Notice)| +> Notice : Please use Intel oneAPI DevCloud as the environment for jupyter notebook samples. \ +Users can refer to [DevCloud Getting Started](https://devcloud.intel.com/oneapi/get-started/) for using DevCloud \ +Users can use JupyterLab from DevCloud via "One-click Login in", and download samples via "git clone" or the "oneapi-cli" tool \ +Once users are in the JupyterLab with downloaded jupyter notebook samples, they can start following the steps without further installion needed. diff --git a/Libraries/oneCCL/oneCCL_Getting_Started/CMakeLists.txt b/Libraries/oneCCL/oneCCL_Getting_Started/CMakeLists.txt new file mode 100644 index 0000000000..7b151c24a8 --- /dev/null +++ b/Libraries/oneCCL/oneCCL_Getting_Started/CMakeLists.txt @@ -0,0 +1,20 @@ +cmake_minimum_required(VERSION 2.8.11) +if("${CMAKE_CXX_COMPILER}" STREQUAL "") + set(CMAKE_C_COMPILER "clang") + set(CMAKE_CXX_COMPILER "dpcpp") +endif() +project (oneCCL_Getting_Started) +if("$ENV{EXAMPLE_ROOT}" STREQUAL "") + message(" - use default examples") + if($ENV{CCL_CONFIGURATION} MATCHES "cpu_gpu_dpcpp") + file(COPY $ENV{CCL_ROOT}/examples/sycl DESTINATION src) + endif() + file(COPY $ENV{CCL_ROOT}/examples/cpu DESTINATION src) + file(COPY $ENV{CCL_ROOT}/examples/common DESTINATION src) + file(COPY $ENV{CCL_ROOT}/examples/benchmark DESTINATION src) + file(COPY $ENV{CCL_ROOT}/examples/include DESTINATION src) + file(COPY $ENV{CCL_ROOT}/examples/CMakeLists.txt DESTINATION src) + add_subdirectory (${PROJECT_BINARY_DIR}/src out) +else() + add_subdirectory ($ENV{EXAMPLE_ROOT} out) +endif() diff --git a/Libraries/oneCCL/oneCCL_Getting_Started/License.txt b/Libraries/oneCCL/oneCCL_Getting_Started/License.txt new file mode 100644 index 0000000000..8b5e331b54 --- /dev/null +++ b/Libraries/oneCCL/oneCCL_Getting_Started/License.txt @@ -0,0 +1,19 @@ +Copyright (c) 2020, Intel Corporation. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Libraries/oneCCL/oneCCL_Getting_Started/README.md b/Libraries/oneCCL/oneCCL_Getting_Started/README.md new file mode 100644 index 0000000000..78e7ded1bd --- /dev/null +++ b/Libraries/oneCCL/oneCCL_Getting_Started/README.md @@ -0,0 +1,175 @@ +# oneCCL Getting Started samples +The CCL sample codes are implemented using C++, C and DPC++ language for CPU and GPU. +By using all reduce collective operation samples, users can understand how to compile oneCCL codes with various oneCCL configurations in Intel oneAPI environment. + +| Optimized for | Description +|:--- |:--- +| OS | Linux Ubuntu 18.04; +| Hardware | Kaby Lake with GEN9 or newer +| Software | Intel oneAPI Collective Communications Library (oneCCL), Intel oneAPI DPC++ Compiler, Intel oneAPI DPC++ Library (oneDPL), GNU Compiler +| What you will learn | basic oneCCL programming model for both Intel CPU and GPU +| Time to complete | 15 minutes + +## List of Samples +| C++ API | C API | Collective Operation | +| ------ | ------ | ------ | +| sycl_allreduce_cpp_test.cpp | sycl_allreduce_test.cpp |[Allreduce](https://intel.github.io/oneccl/spec/communication_primitives.html#allreduce) | +| cpu_allreduce_cpp_test.cpp | cpu_allreduce_test.cpp/cpu_allreduce_bfp16.c |[Allreduce](https://intel.github.io/oneccl/spec/communication_primitives.html#allreduce) | +|oneCCL_Getting_Started.ipynb (check below Notice)| | | +> Notice : Please use Intel oneAPI DevCloud as the environment for jupyter notebook samples. \ +Users can refer to [DevCloud Getting Started](https://devcloud.intel.com/oneapi/get-started/) for using DevCloud \ +Users can use JupyterLab from DevCloud via "One-click Login in", and download samples via "git clone" or the "oneapi-cli" tool \ +Once users are in the JupyterLab with download jupytered notebook samples, they can start following the steps without further installion needed. + +## Purpose +The sample demonstrates how to compile the code with various oneCCL configurations in Intel oneAPI environment. + +## License +Those code samples are licensed under MIT license + +## Prerequisites + +### CPU + +----- + +The samples below require the following components, which are part of the [Intel oneAPI DL Framework Developer Toolkit (DLFD Kit) +](https://software.intel.com/en-us/oneapi/dldev-kit) +* Intel oneAPI Collective Communications Library (oneCCL) + +You can refer to this page [oneAPI](https://software.intel.com/en-us/oneapi) for toolkit installation. + + +### GPU and CPU + +----- + +The samples below require the following components, which are part of the [Intel oneAPI Base Tookit](https://software.intel.com/en-us/oneapi/oneapi-kit) +* Intel oneAPI Collective Communications Library (oneCCL) +* Intel oneAPI DPC++ Compiler +* Intel oneAPI DPC++ Library (oneDPL) + +The samples also require OpenCL driver. Please refer [System Requirements](https://software.intel.com/en-us/articles/intel-oneapi-base-toolkit-system-requirements) for OpenCL driver installation. + + +You can refer to this page [oneAPI](https://software.intel.com/en-us/oneapi) for toolkit installation. + + + + +## Building the samples for CPU and GPU + +### on a Linux* System + +#### CPU only: + +- Build the samples with GCC for CPU only \ + please replace ${ONEAPI_ROOT} for your installation path. \ + ex : /opt/intel/inteloneapi \ + Don't need to replace {DPCPP_CMPLR_ROOT} + ``` + source ${ONEAPI_ROOT}/setvars.sh --ccl-configuration=cpu_icc + + cd oneapi-toolkit/oneCCL/oneCCL_Getting_Started + mkdir build + cd build + cmake .. -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ + make cpu_allreduce_cpp_test + ``` +> NOTE: The source file "cpu_allreduce_cpp_test.cpp" will be copied from ${INTEL_ONEAPI_INSTALL_FOLDER}/ccl/latest/examples/cpu to build/src/cpu folder. +Users can rebuild the cpu_allreduce_cpp_test.cpp by typing "make cpu_allreduce_cpp_test" under build folder. + +#### GPU and CPU: + +- Build the samples with SYCL for GPU and CPU \ + please replace ${ONEAPI_ROOT} for your installation path. \ + ex : /opt/intel/inteloneapi \ + Don't need to replace {DPCPP_CMPLR_ROOT} + ``` + source ${ONEAPI_ROOT}/setvars.sh --ccl-configuration=cpu_gpu_dpcpp + + cd oneapi-toolkit/oneCCL/oneCCL_Getting_Started + mkdir build + cd build + cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp + make sycl_allreduce_cpp_test + ``` +> NOTE: The source file "sycl_allreduce_cpp_test.cpp" will be copied from ${INTEL_ONEAPI_INSTALL_FOLDER}/ccl/latest/examples/sycl to build/src/sycl folder. +Users can rebuild the sycl_allreduce_cpp_test.cpp by typing "make sycl_allreduce_cpp_test" under build folder. + +## Running the Sample + +### on a Linux* System + +#### CPU only: +- Run the program \ + take cpu_allreduce_cpp_test for example. \ + you can apply those steps for all other sample binaries. \ + please replace the {NUMBER_OF_PROCESSES} with integer number accordingly + + ``` + mpirun -n ${NUMBER_OF_PROCESSES} ./out/cpu/cpu_allreduce_cpp_test + ``` + + ex: + ``` + mpirun -n 2 ./out/cpu/cpu_allreduce_cpp_test + ``` + + +#### GPU and CPU: +- Run the program \ + take sycl_allreduce_cpp_test for example. \ + you can apply those steps for all other sample binaries. \ + please replace the {NUMBER_OF_PROCESSES} with integer number accordingly + + ``` + mpirun -n ${NUMBER_OF_PROCESSES} ./out/sycl/sycl_allreduce_cpp_test gpu|cpu|host|default + ``` + + ex: run on GPU + ``` + mpirun -n 2 ./out/sycl/sycl_allreduce_cpp_test gpu + ``` + + +### Example of Output + +#### on Linux +- Run the program on CPU or GPU following [How to Run Section](#running-the-sample) +- CPU Results + + ``` + Provided device type: cpu + Running on Intel(R) Core(TM) i7-7567U CPU @ 3.50GHz + Example passes + ``` + please note that name of running device may vary according to your environment + + +- GPU Results + ``` + Provided device type: gpu + Running on Intel(R) Gen9 HD Graphics NEO + Example passes + ``` + please note that name of running device may vary according to your environment + +- Enable oneCCL Verbose log + + There are different log levels in oneCCL. Users can refer to below table for different log levels. + + | CCL_LOG_LEVEL | value + | :------ | :------ + | ERROR | 0 + | INFO | 1 + | DEBUG | 2 + | TRACE | 3 + + + Users can enable oneCCL verbose log by following below command to see more + runtime information from oneCCL. + ``` + export CCL_LOG_LEVEL=1 + ``` + diff --git a/Libraries/oneCCL/oneCCL_Getting_Started/oneCCL_Getting_Started.ipynb b/Libraries/oneCCL/oneCCL_Getting_Started/oneCCL_Getting_Started.ipynb new file mode 100644 index 0000000000..e9f18415c6 --- /dev/null +++ b/Libraries/oneCCL/oneCCL_Getting_Started/oneCCL_Getting_Started.ipynb @@ -0,0 +1,1024 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Module 1.1 - port an Intel® oneAPI Collective Communications Library (oneCCL) sample from CPU to GPU - CCL Allreduce \n", + "\n", + "## Learning Objectives\n", + "In this module, the developer will:\n", + "* Learn different oneCCL configurations inside the Intel® oneAPI toolkit\n", + "* Learn how to compile a oneCCL sample with different configurations via batch jobs on the Intel® DevCloud for oneAPI or in local environments\n", + "* Learn how to program oneCCL with a simple sample\n", + "* Learn how to port a oneCCL sample from CPU-only version to CPU&GPU version by using DPC++\n", + "* Learn how to collect VTune™ Amplifier data for CPU and GPU runs\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***\n", + "# CCL Allreduce CPU to GPU porting Exercise\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1 : introduce oneCCL configurations inside oneAPI toolkits\n", + "oneCCL has two different configurations inside the oneAPI toolkits. Both lib and include folders under the oneCCL installation path contain two different configurations, and each configuration supports a different compiler." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set the installation path of your oneAPI toolkit:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%env ONEAPI_INSTALL=/opt/intel/inteloneapi/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!printf '%s\\n' $ONEAPI_INSTALL/ccl/latest/lib/*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see, there are two different folders under the oneCCL installation path, and each of those configurations supports different features. \n", + "This tutorial will guide you on how to compile and run against different oneCCL configurations." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, create a lab folder for this exercise:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!mkdir lab" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2 : Editing the cpu_allreduce_cpp_test.cpp code which only supports CPU\n", + "\n", + "This C++ API example demonstrates how to build a global reduction operation by using the sum function, and it can run only on CPU.\n", + "You can find a detailed allreduce API explanation at this [link](https://intel.github.io/oneccl/spec/communication_primitives.html#allreduce)\n", + "\n", + "\n", + "The Jupyter cell below with the gray background can be edited in-place and saved.\n", + "The first line of the cell contains the command **%%writefile ' lab/cpu_allreduce_cpp_test.cpp'** This tells the input cell to save the contents of the cell into the file name ' cpu_allreduce_cpp_test.cpp' As you edit the cell and run it, it will save your changes into that file.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile lab/cpu_allreduce_cpp_test.cpp\n", + "#include \n", + "#include \n", + "#include \"ccl.hpp\"\n", + "\n", + "#define COUNT 128\n", + "\n", + "using namespace std;\n", + "\n", + "int main(int argc, char** argv)\n", + "{\n", + " int i = 0;\n", + " int size = 0;\n", + " int rank = 0;\n", + "\n", + " auto sendbuf = new int[COUNT];\n", + " auto recvbuf = new int[COUNT];\n", + "\n", + " auto comm = ccl::environment::instance().create_communicator();\n", + " auto stream = ccl::environment::instance().create_stream();\n", + "\n", + " rank = comm->rank();\n", + " size = comm->size();\n", + "\n", + " /* initialize sendbuf */\n", + " for (i = 0; i < COUNT; i++) {\n", + " sendbuf[i] = rank;\n", + " }\n", + "\n", + " /* modify sendbuf */\n", + " for (i = 0; i < COUNT; i++) {\n", + " sendbuf[i] += 1;\n", + " }\n", + "\n", + " /* invoke ccl_allreduce */\n", + " comm->allreduce(sendbuf,\n", + " recvbuf,\n", + " COUNT,\n", + " ccl::reduction::sum,\n", + " nullptr, /* attr */\n", + " stream)->wait();\n", + "\n", + " /* check correctness of recvbuf */\n", + " for (i = 0; i < COUNT; i++) {\n", + " if (recvbuf[i] != size * (size + 1) / 2) {\n", + " recvbuf[i] = -1;\n", + " }\n", + " }\n", + "\n", + " /* print out the result of the test */\n", + " if (rank == 0) {\n", + " for (i = 0; i < COUNT; i++) {\n", + " if (recvbuf[i] == -1) {\n", + " cout << \"FAILED\" << endl;\n", + " break;\n", + " }\n", + " }\n", + " if (i == COUNT) {\n", + " cout << \"PASSED\" << endl;\n", + " }\n", + " }\n", + "\n", + " return 0;\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then, copy the required CMake file into lab folder. The top half of CMakeList.txt handles CPU-only samples, and the bottom half handles DPC++ samples with CPU and GPU support." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile lab/CMakeLists.txt\n", + "#cmake_minimum_required (VERSION 2.8)\n", + "#project(CCL_SAMPLES)\n", + "set(CCL_TEST_INCLUDE_DIR \"$ENV{PWD}/../include\")\n", + "set(CMAKE_INSTALL_PREFIX \"$ENV{PWD}/_install\")\n", + "if(${CMAKE_CXX_COMPILER_ID} STREQUAL \"GNU\")\n", + " file(GLOB sources \"cpu_*.c\" \"cpu_*.cpp\")\n", + " set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} ${CMAKE_CLANG_FLAGS} -std=c++11\")\n", + " set(CCL_INCLUDE_DIR \"$ENV{CCL_ROOT}/include/cpu_icc\")\n", + " set(CCL_LIB_DIR \"$ENV{CCL_ROOT}/lib/cpu_icc\")\n", + " foreach(src ${sources})\n", + " include_directories(${CCL_INCLUDE_DIR})\n", + " include_directories(${CCL_TEST_INCLUDE_DIR})\n", + " link_directories(${CCL_LIB_DIR})\n", + " get_filename_component(executable ${src} NAME_WE)\n", + " add_executable(${executable} ${src})\n", + " target_link_libraries(${executable} PUBLIC rt)\n", + " target_link_libraries(${executable} PUBLIC m)\n", + " target_link_libraries(${executable} PRIVATE ccl)\n", + " target_link_libraries(${executable} PUBLIC pthread dl stdc++)\n", + " install(TARGETS ${executable} RUNTIME DESTINATION \"${CMAKE_INSTALL_PREFIX}\")\n", + " endforeach()\n", + "endif()\n", + "\n", + "if(${CMAKE_CXX_COMPILER_ID} STREQUAL \"Clang\")\n", + " set(CCL_INCLUDE_DIRS \"${CCL_INCLUDE_DIRS} $ENV{SYCL_BUNDLE_ROOT}/include\")\n", + " set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -fsycl -std=c++11\")\n", + " file(GLOB sources \"sycl_*.c\" \"sycl_*.cpp\")\n", + " set(CCL_INCLUDE_DIR \"$ENV{CCL_ROOT}/include/cpu_gpu_dpcpp\")\n", + " set(CCL_LIB_DIR \"$ENV{CCL_ROOT}/lib/cpu_gpu_dpcpp\")\n", + " foreach(src ${sources})\n", + " include_directories(${CCL_INCLUDE_DIR})\n", + " include_directories(${CCL_TEST_INCLUDE_DIR})\n", + " link_directories(${CCL_LIB_DIR})\n", + " get_filename_component(executable ${src} NAME_WE)\n", + " add_executable(${executable} ${src})\n", + " target_link_libraries(${executable} PUBLIC rt)\n", + " target_link_libraries(${executable} PUBLIC m)\n", + " target_link_libraries(${executable} PRIVATE ccl)\n", + " target_link_libraries(${executable} PRIVATE OpenCL)\n", + " target_link_libraries(${executable} PRIVATE sycl)\n", + " install(TARGETS ${executable} RUNTIME DESTINATION \"${CMAKE_INSTALL_PREFIX}\")\n", + " endforeach()\n", + "endif()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step3: Build and Execution\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build and Run with GNU Compiler and OpenMP\n", + "The global reduction operations by using sum function sample uses the GNU compiler for this CPU. The following section guides you on how to build with G++ and run on a CPU." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Script - build.sh\n", + "The script **build.sh** encapsulates the compiler command and flags that will generate the executable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile build.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --ccl-configuration=cpu_icc --force > /dev/null 2>&1\n", + "export EXAMPLE_ROOT=./lab/\n", + "mkdir cpu_gomp\n", + "cd cpu_gomp\n", + "cmake .. -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++\n", + "make cpu_allreduce_cpp_test\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you achieve an all-clear from your compilation, you execute your program on the Intel DevCloud or in local environments.\n", + "\n", + "#### Script - run.sh\n", + "the script **run.sh** encapsulates the program for submission to the job queue for execution.\n", + "The user must switch to the g++ oneCCL configuration by inputting a custom configuration \"--ccl-configuration=cpu_icc\" when running \"source setvars.sh\".\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile run.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --ccl-configuration=cpu_icc --force > /dev/null 2>&1\n", + "echo \"########## Executing the run\"\n", + "./cpu_gomp/out/cpu_allreduce_cpp_test\n", + "echo \"########## Done with the run\"\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Submitting **build.sh** and **run.sh** to the job queue\n", + "Now we can submit the **build.sh** and **run.sh** to the job queue.\n", + "\n", + "##### NOTE - it is possible to execute any of the build and run commands in local environments.\n", + "To enable users to run their scripts both on the DevCloud or in local environments, this and subsequent training checks for the existence of the job submission command **qsub**. If the check fails, it is assumed that build/run will be local." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "!rm -rf cpu_gomp; chmod 755 q; chmod 755 build.sh; chmod 755 run.sh;if [ -x \"$(command -v qsub)\" ]; then ./q build.sh; ./q run.sh; else ./build.sh; ./run.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4: Analyze performance with VTune Amplifier\n", + "Use the VTune Amplifier command line to analyze performance and display the summary." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### do CPU profiling first\n", + "The script vtune_collect.sh encapsulates the profiling command and flags that will generate the VTune Amplifier profiling results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile vtune_collect.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --ccl-configuration=cpu_icc --force \n", + "type=hotspots\n", + "\n", + "rm -r $(pwd)/vtune_data\n", + "\n", + "echo \"VTune Collect $type\"\n", + "vtune -collect $type -result-dir $(pwd)/vtune_data $(pwd)/cpu_gomp/out/cpu_allreduce_cpp_test\n", + "\n", + "echo \"VTune Summary Report\"\n", + "vtune -report summary -result-dir $(pwd)/vtune_data -format html -report-output $(pwd)/summary.html\n", + "echo \"Done profiling\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Run VTune Amplifier to Collect Hotspots and Generate Report\n", + "Collect VTune Amplifier data and generate report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! chmod 755 vtune_collect.sh; if [ -x \"$(command -v qsub)\" ]; then ./q vtune_collect.sh; else ./vtune_collect.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### DisplayVTune Amplifier Summary\n", + "Display VTune Amplifier summary report generated in HTML format" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import IFrame\n", + "IFrame(src='summary.html', width=960, height=600)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### do GPU profiling \n", + "The script vtune_collect.sh encapsulates the profiling command and flags that will generate the VTune Amplifier profiling results." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The profiling type is changed from hotspots to gpu-hotspots in below script to do basic GPU profiling." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile vtune_collect.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --ccl-configuration=cpu_icc --force \n", + "type=gpu-hotspots\n", + "\n", + "rm -r $(pwd)/vtune_data\n", + "\n", + "echo \"VTune Collect $type\"\n", + "vtune -collect $type -result-dir $(pwd)/vtune_data $(pwd)/cpu_gomp/out/cpu_allreduce_cpp_test\n", + "\n", + "echo \"VTune Summary Report\"\n", + "vtune -report summary -result-dir $(pwd)/vtune_data -format html -report-output $(pwd)/summary-gpu.html\n", + "echo \"Done profiling\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Run VTune Amplifier to Collect Hotspots and Generate Report\n", + "Collect VTune Amplifier data and generate report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! chmod 755 vtune_collect.sh; if [ -x \"$(command -v qsub)\" ]; then ./q vtune_collect.sh; else ./vtune_collect.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Display VTune Amplifier Summary\n", + "Display the VTune Amplifier summary report generated in HTML format" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the VTune Amplifier summary page, the GPU is stalled/idle all the time. This sample does not utilize GPU." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "from IPython.display import IFrame\n", + "IFrame(src='summary-gpu.html', width=960, height=600)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 5 : Modifying the cpu_allreduce_cpp_test.cpp code which supports both CPU and GPU\n", + "\n", + "In this session, we will convert the above sycl_allreduce_cpp_test.cpp to support both CPU and GPU and compile the sample with DPC++ instead of g++.\n", + "\n", + "There are several steps to complete the code conversion from CPU to GPU for this sample.\n", + "\n", + "* Step 0 : Define inline functions to create sycl queue with the selected selector\n", + "* Step 1 : Declare the sycl queue and sycl buffers\n", + "* Step 2 : Use the inline functions in Step 0 to create the sycl queue\n", + "* Step 3 : Access sycl buffer via its accessor on both the host and target side \n", + "* Step 3.1 : Initialize sycl buffer and its acccessor on the host side\n", + "* Step 3.2 : Modify sycl buffer via its accessor on the target device side \n", + "* Step 3.3 : Check sycl buffer's correctness on the target device side \n", + "* Step 3.4 : Check sycl buffer's correctness on the host side\n", + "\n", + "You can find related modifications below in sycl_allreduce_cpp_test.cpp, and the modifications for each step are wrapped up with \">>>>>>\" and \"<<<<<<\".\n", + "\n", + "**_NOTE:_** Host Accessors: The constructor for a host accessor waits for all kernels that modify the same buffer (or\n", + "image) in any queues to complete and then copies data back to host memory before the constructor returns.\n", + "Any command groups with requirements to the same memory object cannot execute until the host accessor\n", + "is destroyed. **Therefore, we must have { } for Step 3.1**\n", + "\n", + "There are two files in this DPC++ allreduce sample:\n", + "* sycl_base.hpp\n", + "* sycl_allreduce_cpp_test.cpp\n", + "\n", + "sycl_base.hpp contains inline functions to create sycl queue with the selected selector, and main program is in sycl_allreduce_cpp_test.cpp.\n", + "\n", + "The Jupyter cell below with the gray background can be edited in-place and saved.\n", + "The first line of the cell contains the command **%%writefile ' lab/sycl_base.hpp' '** This tells the input cell to save the contents of the cell into the file name ' lsycl_base.hpp' As you edit the cell and run it, it will save your changes into that file.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### lab/sycl_base.hpp\n", + "header file for inline functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile lab/sycl_base.hpp\n", + "#include \n", + "#include \n", + "\n", + "// ------ GPU code conversion --Step 0 >>>>>>\n", + "// Define inline functions to create sycl queue with the selected selector\n", + "#include \n", + "#include \"ccl.hpp\"\n", + "\n", + "using namespace std;\n", + "\n", + "using namespace cl::sycl;\n", + "using namespace cl::sycl::access;\n", + "\n", + "inline bool has_gpu()\n", + "{\n", + " std::vector devices = cl::sycl::device::get_devices();\n", + " for (const auto& device : devices)\n", + " {\n", + " if (device.is_gpu())\n", + " {\n", + " return true;\n", + " }\n", + " }\n", + " return false;\n", + "}\n", + "\n", + "inline int create_sycl_queue(int argc, char **argv, cl::sycl::queue &queue)\n", + "{\n", + " unique_ptr selector;\n", + " if (argc == 2)\n", + " {\n", + " if (strcmp(argv[1], \"cpu\") == 0)\n", + " {\n", + " selector.reset(new cl::sycl::cpu_selector());\n", + " }\n", + " else if (strcmp(argv[1], \"gpu\") == 0)\n", + " {\n", + " if (has_gpu()) \n", + " {\n", + " selector.reset(new cl::sycl::gpu_selector());\n", + " }\n", + " else\n", + " {\n", + " selector.reset(new cl::sycl::default_selector());\n", + " cout << \"GPU is unavailable, default_selector has been created instead of gpu_selector.\" << std::endl;\n", + " }\n", + " }\n", + " else if (strcmp(argv[1], \"host\") == 0)\n", + " {\n", + " selector.reset(new cl::sycl::host_selector());\n", + " }\n", + " else if (strcmp(argv[1], \"default\") == 0)\n", + " {\n", + " selector.reset(new cl::sycl::host_selector());\n", + " cout << \"Accelerator is unavailable for multiprocessing, host_selector has been created instead of default_selector.\" << std::endl;\n", + " }\n", + " else\n", + " {\n", + " cerr << \"Please provide device type: cpu | gpu | host | default \" << std::endl;\n", + " return -1;\n", + " }\n", + " queue = cl::sycl::queue(*selector);\n", + " cout << \"Provided device type \" << argv[1] << \"\\nRunning on \"\n", + " << queue.get_device().get_info()\n", + " << \"\\n\";\n", + " }\n", + " else\n", + " {\n", + " cerr << \"Please provide device type: cpu | gpu | host | default \" << std::endl;\n", + " return -1;\n", + " }\n", + " return 0;\n", + "}\n", + " \n", + "//<<<<<< ------ GPU code conversion --Step 0 " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "##### lab/sycl_allreduce_cpp_test.cpp\n", + "Implementation of SYCL allreduce functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The Jupyter cell below with the gray background can be edited in-place and saved.\n", + "The first line of the cell contains the command **%%writefile ' lab/sycl_allreduce_cpp_test.cpp' '** This tells the input cell to save the contents of the cell into the file name ' sycl_allreduce_cpp_test.cpp' As you edit the cell and run it, it will save your changes into that file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile lab/sycl_allreduce_cpp_test.cpp\n", + "// ------ GPU code conversion --Step 0 >>>>>>\n", + "#include \"sycl_base.hpp\"\n", + "//<<<<<< ------ GPU code conversion --Step 0 \n", + "#define COUNT 128\n", + "\n", + "int main(int argc, char** argv)\n", + "{\n", + " int i = 0;\n", + " int size = 0;\n", + " int rank = 0;\n", + "\n", + " // ------ GPU code conversion --Step 1 >>>>>>\n", + " // Declare the sycl queue and sycl buffers\n", + " cl::sycl::queue q;\n", + " cl::sycl::buffer sendbuf(COUNT);\n", + " cl::sycl::buffer recvbuf(COUNT);\n", + " //<<<<<< ------ GPU code conversion --Step 1 \n", + " \n", + " // ------ GPU code conversion --Step 2 >>>>>>\n", + " // Use inline functions in Step 0 to create the sycl queue\n", + " if (create_sycl_queue(argc, argv, q) != 0) {\n", + " return -1;\n", + " }\n", + " //<<<<<< ------ GPU code conversion --Step 2\n", + " \n", + " auto comm = ccl::environment::instance().create_communicator();\n", + " auto stream = ccl::environment::instance().create_stream();\n", + "\n", + " rank = comm->rank();\n", + " size = comm->size();\n", + "\n", + " /* initialize sendbuf and recvbuf*/\n", + " // ------ GPU code conversion --Step 3.1 >>>>>>\n", + " {\n", + " // open buffers and initialize them on the CPU side \n", + " auto host_acc_sbuf = sendbuf.get_access();\n", + " auto host_acc_rbuf = recvbuf.get_access();\n", + " for (i = 0; i < COUNT; i++) {\n", + " host_acc_sbuf[i] = rank;\n", + " host_acc_rbuf[i] = -1;\n", + " }\n", + " }\n", + " //<<<<<< ------ GPU code conversion --Step 3.1\n", + "\n", + " /* modify sendbuf */\n", + " // ------ GPU code conversion --Step 3.2 >>>>>>\n", + " // open sendbuf and modify it on the target device side \n", + " q.submit([&](handler& cgh){\n", + " auto dev_acc_sbuf = sendbuf.get_access(cgh);\n", + " cgh.parallel_for(range<1>{COUNT}, [=](item<1> id) {\n", + " dev_acc_sbuf[id] += 1;\n", + " });\n", + " });\n", + " //<<<<<< ------ GPU code conversion --Step 3.2\n", + " \n", + " /* invoke ccl_allreduce */\n", + " comm->allreduce(sendbuf,\n", + " recvbuf,\n", + " COUNT,\n", + " ccl::reduction::sum,\n", + " nullptr, /* attr */\n", + " stream)->wait();\n", + "\n", + " \n", + " \n", + " /* check correctness of recvbuf */\n", + " // ------ GPU code conversion --Step 3.3 >>>>>>\n", + " // open recvbuf and check its correctness on the target device side \n", + " q.submit([&](handler& cgh){\n", + " auto dev_acc_rbuf = recvbuf.get_access(cgh);\n", + " cgh.parallel_for(range<1>{COUNT}, [=](item<1> id) {\n", + " if (dev_acc_rbuf[id] != size*(size+1)/2) {\n", + " dev_acc_rbuf[id] = -1;\n", + " }\n", + " });\n", + " });\n", + " //<<<<<< ------ GPU code conversion --Step 3.3\n", + " \n", + " /* print out the result of the test */\n", + " if (rank == 0) {\n", + " // ------ GPU code conversion --Step 3.4 >>>>>>\n", + " // open buffers and validate them on the CPU side \n", + " auto host_acc_rbuf_new = recvbuf.get_access();\n", + " for (i = 0; i < COUNT; i++) {\n", + " if (host_acc_rbuf_new[i] == -1) {\n", + " //<<<<<< ------ GPU code conversion --Step 3.4\n", + " cout << \"FAILED\" << std::endl;\n", + " break;\n", + " }\n", + " }\n", + " if (i == COUNT) {\n", + " cout << \"PASSED\" << std::endl;\n", + " }\n", + " }\n", + "\n", + " return 0;\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build and Run with the DPC++ Compiler\n", + "For this global reduction operation sample on GPU and CPU, DPC++ is used as the compiler.\n", + "The following section guides you how to build with DPC++ and run on GPU and CPU." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Script - build.sh\n", + "The script **build.sh** encapsulates the compiler command and flags that will generate the executable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile build.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --ccl-configuration=cpu_gpu_dpcpp --force > /dev/null 2>&1\n", + "export EXAMPLE_ROOT=./lab/\n", + "mkdir dpcpp\n", + "cd dpcpp\n", + "cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp\n", + "make sycl_allreduce_cpp_test\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you achieve an all-clear from your compilation, execute your program on the DevCloud or in local environments.\n", + "\n", + "#### Script - run.sh\n", + "The script **run.sh** encapsulates the program for submission to the job queue for execution.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile run.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --ccl-configuration=cpu_gpu_dpcpp --force > /dev/null 2>&1\n", + "echo \"########## Executing the run\"\n", + "./dpcpp/out/sycl_allreduce_cpp_test gpu\n", + "echo \"########## Done with the run\"\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Submitting **build.sh** and **run.sh** to the job queue\n", + "Now we can submit the **build.sh** and **run.sh** to the job queue.\n", + "\n", + "##### NOTE - it is possible to execute any of the build and run commands in local environments.\n", + "To enable users to run their scripts both on the Intel DevCloud or in local environments, this and subsequent training checks for the existence of the job submission command **qsub**. If the check fails it is assumed that build/run will be local." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!rm -rf dpcpp; chmod 755 q; chmod 755 build.sh; chmod 755 run.sh;if [ -x \"$(command -v qsub)\" ]; then ./q build.sh; ./q run.sh; else ./build.sh; ./run.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 6: Analyze performance with VTune Amplifier\n", + "Use the VTune Amplifier command line to analyze performace and display the summary" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### do CPU profiling first. \n", + "The script vtune_collect.sh encapsulates the profiling command and flags that will generate the VTune Amplifier profiling results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile vtune_collect.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --ccl-configuration=cpu_gpu_dpcpp --force\n", + "type=hotspots\n", + "\n", + "rm -r $(pwd)/vtune_data\n", + "\n", + "echo \"VTune Collect $type\"\n", + "vtune -collect $type -result-dir vtune_data $(pwd)/dpcpp/out/sycl_allreduce_cpp_test cpu\n", + "\n", + "echo \"VTune Summary Report\"\n", + "vtune -report summary -result-dir $(pwd)/vtune_data -format html -report-output $(pwd)/summary.html\n", + "echo \"Done profiling\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Run VTune Amplifier to Collect Hotspots and Generate Report\n", + "Collect VTune Amplifier data and generate report:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! chmod 755 vtune_collect.sh; if [ -x \"$(command -v qsub)\" ]; then ./q vtune_collect.sh; else ./vtune_collect.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Display VTune Amplifier Summary\n", + "Display the VTune Amplifier summary report generated in HTML format:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import IFrame\n", + "IFrame(src='summary.html', width=960, height=600)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### do GPU profiling \n", + "The script vtune_collect.sh encapsulates the profiling command and flags that will generate the VTune Amplifier profiling results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile vtune_collect.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --ccl-configuration=cpu_gpu_dpcpp --force\n", + "type=gpu-hotspots\n", + "\n", + "rm -r $(pwd)/vtune_data\n", + "\n", + "echo \"VTune Collect $type\"\n", + "vtune -collect $type -result-dir $(pwd)/vtune_data $(pwd)/dpcpp/out/sycl_allreduce_cpp_test gpu\n", + "\n", + "\n", + "echo \"VTune Summary Report\"\n", + "vtune -report summary -result-dir $(pwd)/vtune_data -format html -report-output $(pwd)/summary-gpu.html\n", + "echo \"Done profiling\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Run VTune Amplifier to Collect Hotspots and Generate Report\n", + "Collect VTune Amplifier data and generate report:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! chmod 755 vtune_collect.sh; if [ -x \"$(command -v qsub)\" ]; then ./q vtune_collect.sh; else ./vtune_collect.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Display VTune Amplifier Summary\n", + "Display the VTune Amplifier summary report generated in HTML format:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import IFrame\n", + "IFrame(src='summary-gpu.html', width=960, height=600)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here are the supported profiling types from VTune Amplifier.\n", + "\n", + "* type=hotspots\n", + "* type=memory-consumption\n", + "* type=uarch-exploration\n", + "* type=memory-access\n", + "* type=threading\n", + "* type=hpc-performance\n", + "* type=system-overview\n", + "* type=graphics-rendering\n", + "* type=io\n", + "* type=fpga-interaction\n", + "* type=gpu-offload\n", + "* type=gpu-hotspots\n", + "* type=throttling\n", + "* type=platform-profiler\n", + "* type=cpugpu-concurrency\n", + "* type=tsx-exploration\n", + "* type=tsx-hotspots\n", + "* type=sgx-hotspots\n", + "\n", + "For details of VTune Amplifier usage, please refer to https://software.intel.com/en-us/oneapi/vtune-profiler" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***\n", + "# Summary\n", + "In this lab the developer will have learned the following:\n", + "* Know different oneCCL configurations inside oneAPI toolkit\n", + "* Know how to compile a oneCCL sample with different configurations via batch jobs on the Intel oneAPI DevCloud or in local environments\n", + "* Know how to program oneCCL with a simple sample\n", + "* Know how to port a oneCCL sample from CPU-only version to CPU&GPU version by using DPC++\n", + "* Know how to collect VTune Amplifier data for CPU and GPU runs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "525.6px", + "left": "28px", + "top": "137.8px", + "width": "301.109px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Libraries/oneCCL/oneCCL_Getting_Started/q b/Libraries/oneCCL/oneCCL_Getting_Started/q new file mode 100755 index 0000000000..7905552a6f --- /dev/null +++ b/Libraries/oneCCL/oneCCL_Getting_Started/q @@ -0,0 +1,32 @@ +#!/bin/bash +#======================================== +# Script to submit job in Intel devcloud +# +# Version: 0.5 +#======================================== +if [ -z "$1" ]; then + echo "Missing script argument, Usage: ./q run.sh" +elif [ ! -f "$1" ]; then + echo "File $1 does not exist" +else + script=$1 + rm *.sh.* > /dev/null 2>&1 + #qsub + echo "Submitting job:" + qsub -l nodes=1:gpu:ppn=2 -d . $script + #qstat + qstat + #wait for output file to be generated and display + echo -ne "Waiting for Output." + until [ -f $script.o* ]; do + sleep 1 + echo -ne "." + ((timeout++)) + if [ $timeout == 60 ]; then + echo "TimeOut 60 seconds: Job is still queued for execution, check for output file later (*.sh.o)" + break + fi + done + cat $script.o* + cat $script.e* +fi diff --git a/Libraries/oneCCL/oneCCL_Getting_Started/sample.json b/Libraries/oneCCL/oneCCL_Getting_Started/sample.json new file mode 100644 index 0000000000..3feba3c102 --- /dev/null +++ b/Libraries/oneCCL/oneCCL_Getting_Started/sample.json @@ -0,0 +1,26 @@ +{ + "guid": "C56209D9-5CF1-4EEC-AE95-596D81640AEB", + "name": "oneCCL Getting Started", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/oneCCL"], + "description": "Basic oneCCL programming model for both Intel CPU and GPU.", + "toolchain": ["dpcpp"], + "languages": [{"cpp":{}}], + "dependencies": ["ccl"], + "os": ["linux"], + "builder": ["cli","cmake"], + "targetDevice": ["CPU", "GPU"], + "ciTests": { + "linux": [{ + "env": ["source /opt/intel/oneapi/setvars.sh --ccl-configuration=cpu_gpu_dpcpp --force" ], + "id": "gsg", + "steps": [ + "mkdir build", + "cd build", + "cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp", + "make", + "mpirun -n 2 ./out/sycl/sycl_allreduce_cpp_test cpu", + "mpirun -n 2 ./out/sycl/sycl_allreduce_cpp_test gpu" + ] + }] + } +} diff --git a/Libraries/oneDNN/README.md b/Libraries/oneDNN/README.md new file mode 100644 index 0000000000..4c11e0d6d5 --- /dev/null +++ b/Libraries/oneDNN/README.md @@ -0,0 +1,31 @@ +# oneAPI Deep Neural Network Library (oneDNN) + +oneAPI Deep Neural Network Library (oneDNN) is an open-source performance +library for deep learning applications. The library includes basic building +blocks for neural networks optimized for Intel Architecture Processors +and Intel Processor Graphics. oneDNN is intended for deep learning +applications and framework developers interested in improving application +performance on Intel CPUs and GPUs. + +You can find library source code and code used by these samples at [oneDNN Github repository](https://github.com/oneapi-src/oneDNN). + +## License +The code samples are licensed under MIT license. + +# oneDNN Samples + +| Type | Name | Description +| --------- | ------------------------------------------------ | - +| Component | [getting_started](getting_started) | A C++ sample demonstrating basics of oneDNN programming model. The sample also includes a Jupyter notebook with step by step instructions on building code with different compilers and runtime configurations oneDNN support. +| Component | [dpcpp_interoparibility](dpcpp_interoperability) | A DPC++ example demonstrating interoperaility of oneDNN with DPC++ application code. +| Component | [simple_model](simple_model) | A C++ example demonstrating implmentation of simple convolutional model with oneDNN. The samples also include a Jupyter notebook with step by step instructions on running oneDNN-based application on a GPU. + +# Using Samples in Intel oneAPI DevCloud + +You can use oneDNN samples in +[Intel oneAPI DevCloud](https://devcloud.intel.com/oneapi/get-started/) +environment in the following ways: +* Login to a DevCloud system via SSH and + * use `git clone` to get a full copy of samples repository, or + * use `oneapi-cli` tool to download specific sample. +* Launch a JupyterLab server and run Jupyter Notebooks from your web browser. diff --git a/Libraries/oneDNN/dpcpp_interoperability/CMakeLists.txt b/Libraries/oneDNN/dpcpp_interoperability/CMakeLists.txt new file mode 100644 index 0000000000..47f17001bb --- /dev/null +++ b/Libraries/oneDNN/dpcpp_interoperability/CMakeLists.txt @@ -0,0 +1,7 @@ +cmake_minimum_required(VERSION 2.8.11) +set(CMAKE_C_COMPILER "clang") +set(CMAKE_CXX_COMPILER "dpcpp") +project (oneDNN_SYCL_InterOp) +file(COPY $ENV{DNNLROOT}/examples/sycl_interop.cpp DESTINATION src) +file(COPY $ENV{DNNLROOT}/examples/CMakeLists.txt DESTINATION src) +add_subdirectory (${PROJECT_BINARY_DIR}/src out) diff --git a/Libraries/oneDNN/dpcpp_interoperability/License.txt b/Libraries/oneDNN/dpcpp_interoperability/License.txt new file mode 100644 index 0000000000..8b5e331b54 --- /dev/null +++ b/Libraries/oneDNN/dpcpp_interoperability/License.txt @@ -0,0 +1,19 @@ +Copyright (c) 2020, Intel Corporation. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Libraries/oneDNN/dpcpp_interoperability/README.md b/Libraries/oneDNN/dpcpp_interoperability/README.md new file mode 100644 index 0000000000..064e80ce72 --- /dev/null +++ b/Libraries/oneDNN/dpcpp_interoperability/README.md @@ -0,0 +1,128 @@ +# oneDNN DPC++ Interoperability Sample + +This sample is implemented in DPC++ language and runs on CPU and GPU. + +| Optimized for | Description +| :--- | :--- +| OS | Linux Ubuntu 18.04; +| Hardware | Kaby Lake with GEN9 or newer +| Software | Intel oneAPI Deep Neural Network Library (oneDNN), Intel oneAPI DPC++ Compiler, Intel oneAPI Threading Building Blocks (oneTBB) +| What you will learn | Using oneDNN in DPC++ application targeting Intel CPU or Intel GPU +| Time to complete | 15 minutes + +## What You Will Learn + +* How to create a GPU or CPU engine. +* How to create a memory descriptor/object. +* How to create a SYCL kernel for data initialization. +* How to access a SYCL buffer via SYCL interoperability interface. +* How to access a SYCL queue via SYCL interoperability interface. +* How to execute a SYCL kernel with related SYCL queue and SYCL buffer +* How to create operation descriptor/operation primitives descriptor/primitive. +* How to execute the primitive with the initialized memory. +* How to validate the result through a host accessor. + +## Pre-requisites + +The sample below require the following components, which are part of +Intel oneAPI Base Toolkit (Base Kit): + +* Intel oneAPI Deep Neural Network Library (oneDNN) +* Intel oneAPI DPC++ Compiler +* Intel oneAPI Threading Building Blocks (oneTBB) +* Intel Graphics Compute Runtime for oneAPI Level Zero and OpenCL Driver + +Refer to [Intel oneAPI Toolkits Installation Guide](https://software.intel.com/content/www/us/en/develop/articles/installation-guide-for-intel-oneapi-toolkits.html) +for instructions on installing these components. + +## Building the sample for CPU and GPU + +### on a Linux* System + +#### Using DPC++ Compiler + +When compiled with Intel oneAPI DPC++ Compiler this sample runs on Intel CPU +or Intel GPU. + +Start with a clean console environment. + +``` +source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh +``` + +Specific oneDNN configuration may be selected with +`--dnnl-configuraition` option. Defailt configuration is `cpu_dpcpp_gpu_dpcpp`. + +Make sure that both the enviroments of compiler and oneDNN are properly set up +before you process following steps. +If setvars.sh complains "not found" for compiler or oneDNN, please check your +installation first. + +``` +cd oneapi-toolkit/oneDNN/oneDNN_SYCL_InterOp +mkdir dpcpp +cd dpcpp +cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp +make sycl-interop-cpp +``` + +> NOTE: The source file `sycl_interop.cpp` will be in `dpcpp/src` folder. +> You can rebuild the sample by typing `make` in `dpcpp` folder. + +## Running the Sample + +### on a Linux* System +Run the program on CPU: + +``` +./out/sycl-interop-cpp cpu +``` + +Run the program on GPU + +``` +./out/sycl-interop-cpp gpu +``` + +> NOTE: Zero Level runtime is enabled by default. Please make sure proper +> installation of Level Zero driver including level-zero-devel package following +> installation guide. If you still encounter runtime issue such as "could not +> create a primitive", please apply workaround to set SYCL_BE=PI_OPENCL before +> running a DPC++ program. To apply the workaround in this sample add +> `export SYCL_BE=PI_OPENCL` in CMakeLists.txt. After applying the worklaround, +> the sample will use OpenCL runtime instead. + +### Example of Output + +#### on a Linux* System + +Enable oneDNN verbose log: + +``` +export DNNL_VERBOSE=1 +``` + +Run the program on CPU or GPU following [How to Run Session](#how-to-run). + +CPU Results: + +``` +dnnl_verbose,info,DNNL v1.90.1 (commit 9151ddc657e4c6775f17f3bcec46872e5fac47ee) +dnnl_verbose,info,Detected ISA is Intel AVX2 +dnnl_verbose,exec,cpu,eltwise,jit:avx2,forward_training,data_f32::blocked:abcd:f0 diff_undef::undef::f0,,alg:eltwise_relu alpha:0 beta:0,2x3x4x5,700.608 +Example passes +``` + +GPU Results: + +``` +dnnl_verbose,info,DNNL v1.90.1 (commit 9151ddc657e4c6775f17f3bcec46872e5fac47ee) +dnnl_verbose,info,Detected ISA is Intel AVX2 +dnnl_verbose,exec,gpu,eltwise,ocl:ref:any,forward_training,data_f32::blocked:abcd:f0 diff_undef::undef::f0,,alg:eltwise_relu alpha:0 beta:0,2x3x4x5 +Example passes +``` + +## Implementation Details + +This sample uses example code from oneDNN distribution. You can find this code +in [oneDNN Github repository](https://github.com/oneapi-src/oneDNN/blob/dev-v2/examples/sycl_interop.cpp). diff --git a/Libraries/oneDNN/dpcpp_interoperability/sample.json b/Libraries/oneDNN/dpcpp_interoperability/sample.json new file mode 100644 index 0000000000..a52693fdcf --- /dev/null +++ b/Libraries/oneDNN/dpcpp_interoperability/sample.json @@ -0,0 +1,26 @@ +{ + "guid": "EF50CE31-C467-4374-8BCC-4E5F93B4D1C1", + "name": "oneDNN SYCL Interop", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/oneDNN"], + "description": "oneDNN SYCL extensions API programming for both Intel CPU and GPU.", + "toolchain": ["dpcpp"], + "languages": [{"cpp":{}}], + "dependencies": ["oneDNN", "tbb"], + "os": ["linux"], + "builder": ["ide","cmake"], + "targetDevice": ["CPU", "GPU"] + "ciTests": { + "linux": [{ + "env": ["source /opt/intel/oneapi/setvars.sh --dnnl-configuration=cpu_dpcpp_gpu_dpcpp --force" ], + "id": "interop", + "steps": [ + "mkdir build", + "cd build", + "cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp", + "make sycl-interop-cpp", + "./out/sycl-interop-cpp cpu", + "SYCL_BE=PI_OPENCL ./out/sycl-interop-cpp gpu" + ] + }] + } +} diff --git a/Libraries/oneDNN/getting_started/CMakeLists.txt b/Libraries/oneDNN/getting_started/CMakeLists.txt new file mode 100644 index 0000000000..3799a98c7a --- /dev/null +++ b/Libraries/oneDNN/getting_started/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 2.8.11) +if("${CMAKE_CXX_COMPILER}" STREQUAL "") + set(CMAKE_C_COMPILER "clang") + set(CMAKE_CXX_COMPILER "dpcpp") +endif() +project (oneDNN_Getting_Started) +if("$ENV{EXAMPLE_ROOT}" STREQUAL "") + message(" - use default examples") + file(COPY $ENV{DNNLROOT}/examples/getting_started.cpp DESTINATION src) + file(COPY $ENV{DNNLROOT}/examples/CMakeLists.txt DESTINATION src) + if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC) + file(COPY $ENV{DNNLROOT}/examples/template.vcxproj.user DESTINATION src) + endif() + add_subdirectory (${PROJECT_BINARY_DIR}/src out) +else() + add_subdirectory ($ENV{EXAMPLE_ROOT} out) +endif() diff --git a/Libraries/oneDNN/getting_started/License.txt b/Libraries/oneDNN/getting_started/License.txt new file mode 100644 index 0000000000..8b5e331b54 --- /dev/null +++ b/Libraries/oneDNN/getting_started/License.txt @@ -0,0 +1,19 @@ +Copyright (c) 2020, Intel Corporation. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Libraries/oneDNN/getting_started/README.md b/Libraries/oneDNN/getting_started/README.md new file mode 100644 index 0000000000..8177c0429d --- /dev/null +++ b/Libraries/oneDNN/getting_started/README.md @@ -0,0 +1,184 @@ +# oneDNN Getting Started Sample + +This sample is implemented in C++ and executes on CPU or GPU. The sample also +also includes [a Jupyer Notebook](getting_started.ipynb) that +demonstrates how to compile the code with various oneDNN configurations +in Intel oneAPI DevCloud environment. + +| Optimized for | Description +| :--- | :--- +| OS | Linux Ubuntu 18.04; Windows 10 +| Hardware | Kaby Lake with GEN9 or newer +| Software | Intel oneAPI Deep Neural Network Library (oneDNN), Intel oneAPI DPC++ Compiler, Intel oneAPI Threading Building Blocks (oneTBB) +| What you will learn | basic oneDNN programming model for Intel CPU and GPU +| Time to complete | 15 minutes + +## What You Will Learn + +* How to create oneDNN memory objects. +* How to get data from application buffer into a oneDNN memory object. +* How tensor's logical dimensions and memory object formats relate. +* How to create oneDNN primitives. +* How to execute the primitives. + +## Pre-requisites + +The sample below require the following components, which are part of +Intel oneAPI Base Toolkit (Base Kit): + +* Intel oneAPI Deep Neural Network Library (oneDNN) +* Intel oneAPI DPC++ Compiler +* Intel oneAPI Threading Building Blocks (oneTBB) +* Intel Graphics Compute Runtime for oneAPI Level Zero and OpenCL Driver + +Refer to [Intel oneAPI Toolkits Installation Guide](https://software.intel.com/content/www/us/en/develop/articles/installation-guide-for-intel-oneapi-toolkits.html) +for instructions on installing these components. + +## Building the sample for CPU and GPU + +### On a Linux* System + +#### Using DPC++ Compiler + +When compiled with Intel DPC++ Compiler this sample runs on Intel CPU +or Intel GPU and relies on Intel DPC++ Runtime for parallelism. + + + +Start with a clean console environment. + +``` +source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh +``` + +Specific oneDNN configuration may be selected with +`--dnnl-configuraition` option. Defailt configuration is `cpu_dpcpp_gpu_dpcpp`. + +Make sure that both the enviroments of compiler and oneDNN are properly set up +before you process following steps. If setvars.sh complains "not found" for +compiler or oneDNN, please check your installation first. + +``` +cd oneapi-toolkit/oneDNN/oneDNN_Getting_Started +mkdir dpcpp +cd dpcpp +cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp +make getting-started-cpp +``` + +> NOTE: The source file `getting_started.cpp` will be copied from +>`${INTEL_ONEAPI_INSTALL_FOLDER}/oneDNN/latest/dpcpp` to `dpcpp/src folder`. +> You can rebuild the sample by typing `make` in `dpcpp` folder. + +### On a Windows* System + +When compiler with Microsoft C++ Compiler the sample runs on Intel CPU and uses +Microsoft OpenMP runtime for parallelism. + +#### Visual Studio* Version 2015 or Newe + +Start with Intel oneAPI command prompt for Microsoft Visual Studio. + +``` +C:\Program Files (x86)\intel\oneapi> oneDNN\latest\env\vars.bat --dnnl-configuration=cpu_vcomp +``` + +Make sure that both the enviroments of compiler and oneDNN are properly set up +before you process following steps. + +``` +cd oneapi-toolkit/oneDNN/oneDNN_Getting_Started +mkdir cpu_vcomp +cd cpu_vcomp +cmake -G "Visual Studio 16 2019" .. +cmake --build . +``` + +> NOTE: You can open the oneDNN_CNN.sln inside cpu_vcomp folder to edit source +> code with Microsoft Visual Studio integrated development environment. + +## Running the Sample + +### On a Linux* System + +Run the program on CPU + +``` +./out/getting-started-cpp cpu +``` + +Run the program on GPU + +``` +./out/getting-started-cpp gpu +``` + +> NOTE: Zero Level runtime is enabled by default. Please make sure proper +> installation of Level Zero driver including level-zero-devel package following +> installation guide. If you still encounter runtime issue such as "could not +> create a primitive", please apply workaround to set SYCL_BE=PI_OPENCL before +> running a DPC++ program. To apply the workaround in this sample add +> `export SYCL_BE=PI_OPENCL` in CMakeLists.txt. After applying the worklaround, +> the sample will use OpenCL runtime instead. + +### On a Windows* System + +Run the program on CPU + +``` +out\Debug\getting-started-cpp.exe +``` + +### Example of Output + +#### On a Linux* System + +Enable oneDNN verbose log + +``` +export DNNL_VERBOSE=1 +``` + +Run the program on CPU or GPU following [How to Run Session](#how-to-run) + +CPU Results: + +``` +dnnl_verbose,info,DNNL v1.90.1 (commit 9151ddc657e4c6775f17f3bcec46872e5fac47ee) +dnnl_verbose,info,Detected ISA is Intel AVX2 +dnnl_verbose,exec,cpu,eltwise,jit:avx2,forward_inference,data_f32::blocked:acdb:f0 diff_undef::undef::f0,,alg:eltwise_relu alpha:0 beta:0,1x3x13x13,704.982 +Example passes +``` + +GPU Results: + +``` +dnnl_verbose,info,DNNL v1.90.1 (commit 9151ddc657e4c6775f17f3bcec46872e5fac47ee) +dnnl_verbose,info,Detected ISA is Intel AVX2 +dnnl_verbose,exec,gpu,eltwise,ocl:ref:any,forward_inference,data_f32::blocked:acdb:f0 diff_undef::undef::f0,,alg:eltwise_relu alpha:0 beta:0,1x3x13x13 +Example passes +``` + +#### On a Windows* System + +Enable oneDNN verbose log + +``` +set DNNL_VERBOSE=1 +``` + +Run the program on CPU or GPU following [How to Run Session](#how-to-run). + +CPU Results: + +``` +dnnl_verbose,info,DNNL v1.90.1 (commit 9151ddc657e4c6775f17f3bcec46872e5fac47ee) +dnnl_verbose,info,Detected ISA is Intel AVX2 +dnnl_verbose,exec,cpu,eltwise,jit:avx2,forward_inference,data_f32::blocked:acdb:f0 diff_undef::undef::f0,,alg:eltwise_relu alpha:0 beta:0,1x3x13x13,704.982 +Example passes +``` + +## Implementation Details + +This sample uses example code from oneDNN distribution. You can find this code +in [oneDNN Github repository](https://github.com/oneapi-src/oneDNN/blob/dev-v2/examples/getting_started.cpp). diff --git a/Libraries/oneDNN/getting_started/getting_started.ipynb b/Libraries/oneDNN/getting_started/getting_started.ipynb new file mode 100644 index 0000000000..f87476aae2 --- /dev/null +++ b/Libraries/oneDNN/getting_started/getting_started.ipynb @@ -0,0 +1,561 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Module 1.1 - Introduction to Intel® oneAPI Deep Neural Network Library (oneDNN) - Getting Started\n", + "\n", + "## Learning Objectives\n", + "In this module the developer will:\n", + "* Learn different oneDNN configurations inside the Intel® oneAPI toolkit\n", + "* Learn how to compile a oneDNN sample with different configurations via batch jobs on the Intel® DevCloud for oneAPI or in local environments.\n", + "* Learn how to program oneDNN with a simple sample\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***\n", + "# Getting Started Sample Exercise\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## introduce oneDNN configurations inside Intel oneAPI toolkits\n", + "oneDNN has four different configurations inside the Intel oneAPI toolkits. Each configuration is in a different folder under the oneDNN installation path, and each configurations supports a different compiler or threading library.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set the installation path of your oneAPI toolkit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%env ONEAPI_INSTALL=/opt/intel/oneapi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "!printf '%s\\n' $ONEAPI_INSTALL/oneDNN/latest/cpu_*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see, there are four different folders under the oneDNN installation path, and each of those configurations supports different features. This tutorial will show you how to compile and run against different oneDNN configurations." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, create a lab folder for this exercise." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!mkdir lab" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Preparing the getting_started.cpp code\n", + "\n", + "This exercise use the getting_started.cpp example from oneDNN installation path.\n", + "\n", + "First, below section will copy the getting_started.cpp file into lab folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!cp $ONEAPI_INSTALL/oneDNN/latest/cpu_dpcpp_gpu_dpcpp/examples/getting_started.cpp lab/" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Users can browser source codes by running below section, and below section also remove comments for readability." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!cpp -fpreprocessed -dD -E lab/getting_started.cpp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then, copy the required header files and CMake file into the lab folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!cp $ONEAPI_INSTALL/oneDNN/latest/cpu_dpcpp_gpu_dpcpp/examples/example_utils.hpp lab/\n", + "!cp $ONEAPI_INSTALL/oneDNN/latest/cpu_dpcpp_gpu_dpcpp/examples/example_utils.h lab/\n", + "!cp $ONEAPI_INSTALL/oneDNN/latest/cpu_dpcpp_gpu_dpcpp/examples/CMakeLists.txt lab/" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Build and Run with oneAPI DPC++ Compiler \n", + "one of the oneDNN configurations supports oneAPI DPC++ compiler, and it can run on different architectures by using DPC++.\n", + "The following section shows you how to build with DPC++ and run on different architectures." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Script - build.sh\n", + "The script **build.sh** encapsulates the compiler **dpcpp** command and flags that will generate the exectuable.\n", + "In order to use DPC++ compiler and related SYCL runtime, some definitions must be passed as cmake arguments.\n", + "Here are related cmake arguments for DPC++ configuration : \n", + "\n", + " -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp -DDNNL_CPU_RUNTIME=SYCL -DDNNL_GPU_RUNTIME=SYCL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile build.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --force> /dev/null 2>&1\n", + "export EXAMPLE_ROOT=./lab/\n", + "mkdir dpcpp\n", + "cd dpcpp\n", + "cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp -DDNNL_CPU_RUNTIME=SYCL -DDNNL_GPU_RUNTIME=SYCL\n", + "make getting-started-cpp\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you achieve an all-clear from your compilation, you execute your program on the DevCloud or a local machine.\n", + "\n", + "#### Script - run.sh\n", + "the script **run.sh** encapsulates the program for submission to the job queue for execution.\n", + "By default, the built program uses CPU as the execution engine, but the user can switch to GPU by giving an input argument \"gpu\".\n", + "The user can refer run.sh below to run on GPU.\n", + "To run on CPU, simply remove the input argument \"gpu\" ." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile run.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --force > /dev/null 2>&1\n", + "echo \"########## Executing the run\"\n", + "./dpcpp/out/getting-started-cpp cpu\n", + "echo \"########## Done with the run\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Submitting **build.sh** and **run.sh** to the job queue\n", + "Now we can submit the **build.sh** and **run.sh** to the job queue.\n", + "##### NOTE - it is possible to execute any of the build and run commands in local environments.\n", + "To enable users to run their scripts both on the Intel DevCloud or in local environments, this and subsequent training checks for the existence of the job submission command **qsub**. If the check fails, it is assumed that build/run will be local." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "! rm -rf dpcpp;chmod 755 q; chmod 755 build.sh; chmod 755 run.sh;if [ -x \"$(command -v qsub)\" ]; then ./q build.sh; ./q run.sh; else ./build.sh; ./run.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# Build and Run with GNU Compiler and OpenMP \n", + "One of the oneDNN configurations supports GNU compilers, but it can run only on CPU.\n", + "The following section shows you how to build with G++ and run on CPU." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Script - build.sh\n", + "The script **build.sh** encapsulates the compiler command and flags that will generate the exectuable.\n", + "The user must switch to the G++ oneDNN configurations by inputting a custom configuration \"--dnnl-configuration=cpu_gomp\" when running \"source setvars.sh\".\n", + "In order to use the G++ compiler and related OMP runtime, some definitions must be passed as cmake arguments.\n", + "Here are related cmake arguments for DPC++ configuration : \n", + "\n", + " -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DDNNL_CPU_RUNTIME=OMP -DDNNL_GPU_RUNTIME=NONE" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile build.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_gomp --force> /dev/null 2>&1\n", + "export EXAMPLE_ROOT=./lab/\n", + "mkdir cpu_gomp\n", + "cd cpu_gomp\n", + "cmake .. -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DDNNL_CPU_RUNTIME=OMP -DDNNL_GPU_RUNTIME=NONE\n", + "make getting-started-cpp\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you achieve an all-clear from your compilation, you execute your program on the DevCloud or in local environments.\n", + "\n", + "#### Script - run.sh\n", + "the script **run.sh** encapsulates the program for submission to the job queue for execution.\n", + "The user must switch to the G++ oneDNN configuration by inputting a custom configuration \"--dnnl-configuration=cpu_gomp\" when running \"source setvars.sh\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile run.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_gomp --force> /dev/null 2>&1\n", + "echo \"########## Executing the run\"\n", + "./cpu_gomp/out/getting-started-cpp\n", + "echo \"########## Done with the run\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Submitting **build.sh** and **run.sh** to the job queue\n", + "Now we can submit the **build.sh** and **run.sh** to the job queue.\n", + "\n", + "##### NOTE - it is possible to execute any of the build and run commands in local environments.\n", + "To enable users to run their scripts both on the DevCloud or in local environments, this and subsequent training checks for the existence of the job submission command **qsub**. If the check fails, it is assumed that build/run will be local." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "! rm -rf cpu_gomp;chmod 755 q; chmod 755 build.sh; chmod 755 run.sh;if [ -x \"$(command -v qsub)\" ]; then ./q build.sh; ./q run.sh; else ./build.sh; ./run.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# Build and Run with Intel Compiler and OpenMP\n", + "One of the oneDNN configurations supports Intel compilers, but it can run only on CPU.\n", + "The following section shows you how to build with ICC and run on CPU." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> NOTE : This section is optional and it is for developers who want to use Intel Compiler" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Script - build.sh\n", + "The script **build.sh** encapsulates the compiler command and flags that will generate the executable.\n", + "The user must switch to the ICC oneDNN configuration by inputting a custom configuration \"--dnnl-configuration=cpu_iomp\" when running \"source setvars.sh\".\n", + "In order to use ICC compiler and related OMP runtime, some definitions must be passed as cmake arguments.\n", + "Here are related cmake arguments for DPC++ configuration : \n", + "\n", + " -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_COMPILER=icpc -DDNNL_CPU_RUNTIME=OMP -DDNNL_GPU_RUNTIME=NONE" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile build.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_iomp --force> /dev/null 2>&1\n", + "export EXAMPLE_ROOT=./lab/\n", + "mkdir cpu_iomp\n", + "cd cpu_iomp\n", + "cmake .. -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_COMPILER=icpc -DDNNL_CPU_RUNTIME=OMP -DDNNL_GPU_RUNTIME=NONE\n", + "make getting-started-cpp\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you achieve an all-clear from your compilation, you execute your program on the DevCloud or in local environments.\n", + "\n", + "#### Script - run.sh\n", + "The script **run.sh** encapsulates the program for submission to the job queue for execution.\n", + "The user must switch to the ICC oneDNN configuration by inputting a custom configuration \"--dnnl-configuration=cpu_iomp\" when running \"source setvars.sh\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile run.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_iomp --force> /dev/null 2>&1\n", + "echo \"########## Executing the run\"\n", + "./cpu_iomp/out/getting-started-cpp\n", + "echo \"########## Done with the run\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Submitting **build.sh** and **run.sh** to the job queue\n", + "Now we can submit the **build.sh** and **run.sh** to the job queue.\n", + "\n", + "##### NOTE - it is possible to execute any of the build and run commands in local environments.\n", + "To enable users to run their scripts both on the DevCloud or in local environments, this and subsequent training checks for the existence of the job submission command **qsub**. If the check fails it is assumed that build/run will be local." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "! chmod 755 q; chmod 755 build.sh; chmod 755 run.sh;if [ -x \"$(command -v qsub)\" ]; then ./q build.sh; ./q run.sh; else ./build.sh; ./run.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "# Build and Run with GNU Compiler and oneTBB \n", + "One of the oneDNN configurations supports Intel® oneAPI Threading bBuilding block Blocks (oneTBB) as its threading library, but it can run only on CPU.\n", + "The following section shows you how to build with oneTBB and run on CPU." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> NOTE : This section is optional and it is for developers who want to use Intel oneTBB" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Script - build.sh\n", + "The script **build.sh** encapsulates the compiler **dpcpp** command and flags that will generate the executable.\n", + "The user must switch to the G++ oneDNN configuration by inputting a custom configuration \"--dnnl-configuration=cpu_gomp\" when running \"source setvars.sh\".\n", + "In order to use G++ compiler and related OMP runtime, some definitions must be passed as cmake arguments.\n", + "Here are related cmake arguments for DPC++ configuration : \n", + "\n", + " -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DDNNL_CPU_RUNTIME=TBB -DDNNL_GPU_RUNTIME=NONE" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile build.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_tbb --force> /dev/null 2>&1\n", + "export EXAMPLE_ROOT=./lab/\n", + "mkdir cpu_tbb\n", + "cd cpu_tbb\n", + "cmake .. -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DDNNL_CPU_RUNTIME=TBB -DDNNL_GPU_RUNTIME=NONE\n", + "make getting-started-cpp\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you achieve an all-clear from your compilation, you execute your program on the DevCloud or a local machine.\n", + "\n", + "#### Script - run.sh\n", + "The script **run.sh** encapsulates the program for submission to the job queue for execution.\n", + "The user must switch to the oneDNN with oneTBB threading configuration by inputting a custom configuration \"--dnnl-configuration=cpu_tbb\" when running \"source setvars.sh\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile run.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_tbb --force> /dev/null 2>&1\n", + "echo \"########## Executing the run\"\n", + "./cpu_tbb/out/getting-started-cpp\n", + "echo \"########## Done with the run\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Submitting **build.sh** and **run.sh** to the job queue\n", + "Now we can submit the **build.sh** and **run.sh** to the job queue.\n", + "\n", + "##### NOTE - it is possible to execute any of the build and run commands in local environments.\n", + "To enable users to run their scripts both on the DevCloud or in local environments, this and subsequent training checks for the existence of the job submission command **qsub**. If the check fails, it is assumed that build/run will be local." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "! chmod 755 q; chmod 755 build.sh; chmod 755 run.sh;if [ -x \"$(command -v qsub)\" ]; then ./q build.sh; ./q run.sh; else ./build.sh; ./run.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***\n", + "# Summary\n", + "In this lab the developer learned the following:\n", + "* What are the different oneDNN configurations inside the Intel oneAPI toolkits\n", + "* How to compile a oneDNN sample with different configurations via batch jobs on the Intel oneAPI DevCloud or in local environments\n", + "* How to program oneDNN with a simple sample\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "525.6px", + "left": "28px", + "top": "137.8px", + "width": "301.109px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Libraries/oneDNN/getting_started/q b/Libraries/oneDNN/getting_started/q new file mode 100755 index 0000000000..8377675780 --- /dev/null +++ b/Libraries/oneDNN/getting_started/q @@ -0,0 +1,32 @@ +#!/bin/bash +#======================================== +# Script to submit job in Intel devcloud +# +# Version: 0.5 +#======================================== +if [ -z "$1" ]; then + echo "Missing script argument, Usage: ./q run.sh" +elif [ ! -f "$1" ]; then + echo "File $1 does not exist" +else + script=$1 + rm *.sh.* > /dev/null 2>&1 + #qsub + echo "Submitting job:" + qsub -l nodes=1:gpu:ppn=2 -d . $script + #qstat + qstat + #wait for output file to be generated and display + echo -ne "Waiting for Output." + until [ -f $script.o* ]; do + sleep 1 + echo -ne "." + ((timeout++)) + if [ $timeout == 60 ]; then + echo "TimeOut 60 seconds: Job is still queued for execution, check for output file later (*.sh.o)" + break + fi + done + cat $script.o* + cat $script.e* +fi diff --git a/Libraries/oneDNN/getting_started/sample.json b/Libraries/oneDNN/getting_started/sample.json new file mode 100644 index 0000000000..956fc69f38 --- /dev/null +++ b/Libraries/oneDNN/getting_started/sample.json @@ -0,0 +1,25 @@ +{ + "guid": "028AE3ED-2896-4C56-9066-42AA5D5FA973", + "name": "oneDNN Getting Started", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/oneDNN"], + "description": "Basic oneDNN programming model for both Intel CPU and GPU.", + "toolchain": ["dpcpp"], + "languages": [{"cpp":{}}], + "dependencies": ["oneDNN", "tbb"], + "os": ["linux"], + "builder": ["ide","cmake"], + "targetDevice": ["CPU", "GPU"], + "ciTests": { + "linux": [{ + "env": ["source /opt/intel/oneapi/setvars.sh --dnnl-configuration=cpu_dpcpp_gpu_dpcpp --force" ], + "id": "gsg", + "steps": [ + "mkdir build", + "cd build", + "cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp", + "make getting-started-cpp", + "SYCL_BE=PI_OPENCL ./out/getting-started-cpp gpu" + ] + }] + } +} diff --git a/Libraries/oneDNN/simple_model/CMakeLists.txt b/Libraries/oneDNN/simple_model/CMakeLists.txt new file mode 100644 index 0000000000..d54283bd9a --- /dev/null +++ b/Libraries/oneDNN/simple_model/CMakeLists.txt @@ -0,0 +1,17 @@ +cmake_minimum_required(VERSION 2.8.11) +if("${CMAKE_CXX_COMPILER}" STREQUAL "") + set(CMAKE_C_COMPILER "clang") + set(CMAKE_CXX_COMPILER "dpcpp") +endif() +project (oneDNN_CNN) +if("$ENV{EXAMPLE_ROOT}" STREQUAL "") + message(" - use default examples") + file(COPY $ENV{DNNLROOT}/examples/cnn_inference_f32.cpp DESTINATION src) + file(COPY $ENV{DNNLROOT}/examples/CMakeLists.txt DESTINATION src) + if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC) + file(COPY $ENV{DNNLROOT}/examples/template.vcxproj.user DESTINATION src) + endif() + add_subdirectory (${PROJECT_BINARY_DIR}/src out) +else() + add_subdirectory ($ENV{EXAMPLE_ROOT} out) +endif() diff --git a/Libraries/oneDNN/simple_model/License.txt b/Libraries/oneDNN/simple_model/License.txt new file mode 100644 index 0000000000..8b5e331b54 --- /dev/null +++ b/Libraries/oneDNN/simple_model/License.txt @@ -0,0 +1,19 @@ +Copyright (c) 2020, Intel Corporation. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Libraries/oneDNN/simple_model/README.md b/Libraries/oneDNN/simple_model/README.md new file mode 100644 index 0000000000..bef7a82ceb --- /dev/null +++ b/Libraries/oneDNN/simple_model/README.md @@ -0,0 +1,311 @@ +# oneDNN Simple Model Sample + +This sample is implemented in C++ and DPC++ and runs on CPU or GPU. The sample +also includes [a Jupyter Notebook](simple_model.ipynb) that +demonstrates how to port a oneDNN sample from CPU-only version to CPU & GPU +in Intel oneAPI DevCloud environment. + +| Optimized for | Description +| :--- | :--- +| OS | Linux Ubuntu 18.04; Windows 10 +| Hardware | Kaby Lake with GEN9 or newer +| Software | Intel oneAPI Deep Neural Network Library (oneDNN), Intel oneAPI DPC++ Compiler, Intel oneAPI Threading Building Blocks (oneTBB), GNU Compiler , Intel C++ Compiler +| What you will learn | run a simple convolutional model on Intel CPU or Intel GPU +| Time to complete | 15 minutes + +## License + +This code sample is licensed under MIT license. + +## What You Will Learn + +* How to run a simple convolutional network on Intel CPU or Intel GPU. +* How to compile examples with Intel oneAPI DPC++ Compiler, Intel C++ Compiler, +and GNU C++ Compiler +* How to switch between OpenMP and TBB for CPU parallelization +* How tensors are implemented and submitted to primitives. +* How primitives are created. +* How primitives are sequentially submitted to the network, where the output +from primitives is passed as input to the next primitive. The latter specifies +a dependency between the primitive input and output data. +* Specific 'inference-only' configurations. +* Limiting the number of reorders performed that are detrimental to performance. + +## Pre-requisites + +### Using Intel C++ Compiler + +Using Intel C++ Compiler also requires the following component which is part of the [Intel oneAPI HPC Toolkit (HPC Kit)](https://software.intel.com/en-us/oneapi/hpc-kit) +* oneAPI Intel C++ Compiler + +### Using TBB for CPU parallelization + +Using Threading Building Blocks also requires the following component which is part of the [Intel oneAPI Base Toolkit (Base Kit)](https://software.intel.com/en-us/oneapi/oneapi-kit) +* Intel oneAPI Threading Building Blocks (oneTBB) + +### GPU and CPU + +The sample below require the following components which are part of the [Intel oneAPI Base Toolkit (Base Kit)](https://software.intel.com/en-us/oneapi/oneapi-kit) +* Intel oneAPI Deep Neural Network Library (oneDNN) +* Intel oneAPI DPC++ Compiler +* Intel oneAPI DPC++ Library (oneDPL) +* Intel oneAPI Threading Building Blocks (oneTBB) + +The sample also requires OpenCL driver. Please refer [System Requirements](https://software.intel.com/en-us/articles/intel-oneapi-base-toolkit-system-requirements) for OpenCL driver installation. + +## Building the sample for CPU and GPU + +### CPU + +#### Using GNU C++ Compiler + +When compiled with GNU C++ Compiler this sample runs on Intel CPU and uses +GNU OpenMP runtime for parallelism. + +##### on a Linux* System + +Start with a clean console environment. + +``` +source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh --dnnl-configuration=cpu_gomp +``` + +Make sure that both the enviroments of compiler and oneDNN are properly set up +before you process following steps. If setvars.sh complains "not found" for +compiler or oneDNN, please check your installation first. + +``` +cd oneapi-toolkit/oneDNN/oneDNN_CNN_INFERENCE_FP32 +mkdir cpu_gomp +cd cpu_gomp +cmake .. -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ +make cnn-inference-f32-cpp +``` + +> NOTE: The source file `cnn_inference_f32.cpp` will be copied from +> `${INTEL_ONEAPI_INSTALL_FOLDER}/oneDNN/latest/cpu_gomp` to `cpu_gomp/src` folder. +> You can rebuild the sample by typing `make` in `cpu_gomp` folder. + +#### Using Intel C++ Compiler + +When compiled with Intel C++ Compiler this sample runs on Intel CPU and +uses Intel OpenMP for CPU parallelism. + +##### on a Linux* System + +Start with a clean console environment. + +``` +source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh --dnnl-configuration=cpu_iomp +``` + +Make sure that both the enviroments of compiler and oneDNN are properly set up +before you process following steps. If setvars.sh complains "not found" for +compiler or oneDNN, please check your installation first. + +``` +cd oneapi-toolkit/oneDNN/oneDNN_CNN_INFERENCE_FP32 +mkdir cpu_iomp +cd cpu_iomp +cmake .. -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_COMPILER=icpc +make cnn-inference-f32-cpp +``` + +> NOTE: The source file `cnn_inference_f32.cpp` will be copied from +> `${INTEL_ONEAPI_INSTALL_FOLDER}/oneDNN/latest/cpu_iomp` to `cpu_iomp/src` folder. +> You can rebuild the sample by typing `make` in `cpu_iomp` folder. + +#### Using TBB + +oneDNN supports both Intel OpenMP and TBB for CPU parallelization. +You can switch to TBB runtime using steps below. + +##### on a Linux* System + +Start with a clean console environment. + +``` +source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh --dnnl-configuration=cpu_tbb +``` + +Make sure that both the enviroments of compiler and oneDNN are properly set up +before you process following steps. If setvars.sh complains "not found" for +compiler or oneDNN, please check your installation first. + +``` +cd oneapi-toolkit/oneDNN/oneDNN_CNN_INFERENCE_FP32 +mkdir cpu_tbb +cd cpu_tbb +cmake .. +make cnn-inference-f32-cpp +``` + +> NOTE: The source file `cnn_inference_f32.cpp` will be copied from +> `${INTEL_ONEAPI_INSTALL_FOLDER}/oneDNN/latest/cpu_tbb` to `cpu_tbb/src` folder. +You can rebuild the sample by typing `make` in `cpu_tbb` folder. + +#### On a Windows* System + +When compiled with Microsoft Visual C++ Compiler this sample runs on Intel CPU +and uses Microsoft OpenMP runtime for parallelism. + + +Start with Intel oneAPI command prompt for Microsoft Visual Studio. + +``` +C:\Program Files (x86)\intel\oneapi> oneDNN\latest\env\vars.bat --dnnl-configuration=cpu_vcomp +``` + +Make sure that both the enviroments of compiler and oneDNN are properly set up +before you process following steps. + +``` +cd oneapi-toolkit/oneDNN/oneDNN_CNN_INFERENCE_FP32 +mkdir cpu_vcomp +cd cpu_vcomp +cmake -G "Visual Studio 16 2019" .. +cmake --build . +``` + +> NOTE: You can open the oneDNN_CNN.sln inside cpu_vcomp folder to edit source +> code with Microsoft Visual Studio integrated development environment. + +## CPU and GPU + +### Using DPC++ Compiler + +By using DPC++ compiler, this sample supports CNN FP32 both on Intel CPU and GPU. + +#### on a Linux* System + +Start with a clean console environment. + +``` +source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh +``` + +Specific oneDNN configuration may be selected with +`--dnnl-configuraition` option. Defailt configuration is `cpu_dpcpp_gpu_dpcpp`. + +Make sure that both the enviroments of compiler and oneDNN are properly set up +before you process following steps. If setvars.sh complains "not found" for +compiler or oneDNN, please check your installation first. + +``` +cd oneapi-toolkit/oneDNN/oneDNN_CNN_INFERENCE_FP32 +mkdir dpcpp +cd dpcpp +cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp +make cnn-inference-f32-cpp +``` + +> NOTE: The source file `cnn_inference_f32.cpp` will be copied from +> `${INTEL_ONEAPI_INSTALL_FOLDER}/oneDNN/latest/dpcpp` to `dpcpp/src` folder. +You can rebuild the sample by typing `make` in `dpcpp` folder. + +## Running the sample + +### on a Linux* System + +Run the program on CPU: + +``` +./out/cnn-inference-f32-cpp +``` + +Run the program on GPU: +``` +./out/cnn-inference-f32-cpp gpu +``` + +> NOTE: Zero Level runtime is enabled by default. Please make sure proper +> installation of zero level driver +> including level-zero-devel package following installation guide. +> If you still encounter runtime issue such as "could not create a primitive", +> Please apply workaround to set SYCL_BE=PI_OPENCL before running +> a DPC++ program. For applying the workaround in this sample, users can add +> `export SYCL_BE=PI_OPENCL` in CMakeLists.txt. After applying the worklaround, +> sample use OpenCL runtime instead. + +### On a Windows* System + +Run the program on CPU: + +``` +out\Debug\cnn-inference-f32-cpp.exe +``` + +### Example of Output + +#### on a Linux* System + +Enable oneDNN verbose log: + +``` +export DNNL_VERBOSE=1 +``` + +Run the program on CPU or GPU following [How to Run Session](#how-to-run). + +CPU Results: + +``` +dnnl_verbose,info,DNNL v1.90.1 (commit 9151ddc657e4c6775f17f3bcec46872e5fac47ee) +dnnl_verbose,info,Detected ISA is Intel AVX2 +... +/oneDNN VERBOSE LOGS/ +... +dnnl_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:aBcd8b:f0 dst_f32::blocked:abcd:f0,,,1x256x6x6,0.032959 +dnnl_verbose,exec,cpu,inner_product,gemm:jit,forward_inference,src_f32::blocked:abcd:f0 wei_f32::blocked:abcd:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic256ih6iw6oc4096,5.4458 +dnnl_verbose,exec,cpu,inner_product,gemm:jit,forward_inference,src_f32::blocked:ab:f0 wei_f32::blocked:ab:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic4096oc4096,2.50317 +dnnl_verbose,exec,cpu,inner_product,gemm:jit,forward_inference,src_f32::blocked:ab:f0 wei_f32::blocked:ab:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic4096oc1000,0.634033 +dnnl_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:ab:f0 dst_f32::blocked:ab:f0,,,1x1000,0.0290527 +Use time 33.22 +``` + +GPU Results: + +``` +dnnl_verbose,info,DNNL v1.90.1 (commit 9151ddc657e4c6775f17f3bcec46872e5fac47ee) +dnnl_verbose,info,Detected ISA is Intel AVX2 +... +/DNNL VERBOSE LOGS/ +... +dnnl_verbose,exec,gpu,reorder,ocl:simple:any,undef,src_f32::blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,,,1x256x6x6 +dnnl_verbose,exec,gpu,inner_product,ocl:gemm,forward_inference,src_f32::blocked:abcd:f0 wei_f32::blocked:abcd:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic256ih6iw6oc4096 +dnnl_verbose,exec,gpu,inner_product,ocl:gemm,forward_inference,src_f32::blocked:ab:f0 wei_f32::blocked:ab:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic4096oc4096 +dnnl_verbose,exec,gpu,inner_product,ocl:gemm,forward_inference,src_f32::blocked:ab:f0 wei_f32::blocked:ab:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic4096oc1000 +dnnl_verbose,exec,gpu,reorder,ocl:simple:any,undef,src_f32::blocked:ab:f0 dst_f32::blocked:ab:f0,,,1x1000 +Use time 106.29 +``` + +#### on a Windows* System + +Enable oneDNN verbose log: +``` +set DNNL_VERBOSE=1 + +``` + +Run the program on CPU or GPU following [How to Run Session](#how-to-run). + +CPU Results: + +``` +dnnl_verbose,info,DNNL v1.90.1 (commit 9151ddc657e4c6775f17f3bcec46872e5fac47ee) +dnnl_verbose,info,Detected ISA is Intel AVX2 +... +/DNNL VERBOSE LOGS/ +... +dnnl_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:aBcd8b:f0 dst_f32::blocked:abcd:f0,,,1x256x6x6,0.032959 +dnnl_verbose,exec,cpu,inner_product,gemm:jit,forward_inference,src_f32::blocked:abcd:f0 wei_f32::blocked:abcd:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic256ih6iw6oc4096,5.4458 +dnnl_verbose,exec,cpu,inner_product,gemm:jit,forward_inference,src_f32::blocked:ab:f0 wei_f32::blocked:ab:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic4096oc4096,2.50317 +dnnl_verbose,exec,cpu,inner_product,gemm:jit,forward_inference,src_f32::blocked:ab:f0 wei_f32::blocked:ab:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic4096oc1000,0.634033 +dnnl_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:ab:f0 dst_f32::blocked:ab:f0,,,1x1000,0.0290527 +Use time 33.22 +``` + +## Implementation Details + +This sample uses example code from oneDNN distribution. You can find this code +in [oneDNN Github repository](https://github.com/oneapi-src/oneDNN/blob/dev-v2/examples/cnn_inference_f32.cpp). diff --git a/Libraries/oneDNN/simple_model/codes_for_ipynb/cnn_inference_f32.cpp b/Libraries/oneDNN/simple_model/codes_for_ipynb/cnn_inference_f32.cpp new file mode 100644 index 0000000000..606c8ddc77 --- /dev/null +++ b/Libraries/oneDNN/simple_model/codes_for_ipynb/cnn_inference_f32.cpp @@ -0,0 +1,700 @@ + +#include + +#include +#include +#include +#include +#include +#include + +#include "dnnl.hpp" + +using namespace dnnl; + +using namespace std; + +memory::dim product(const memory::dims &dims) { + return std::accumulate(dims.begin(), dims.end(), (memory::dim)1, + std::multiplies()); +} + +void simple_net(int times = 100) { + using tag = memory::format_tag; + using dt = memory::data_type; + + + engine eng(engine::kind::cpu, 0); + stream s(eng); + + std::vector net; + std::vector> net_args; + + + const memory::dim batch = 1; + + // AlexNet: conv1 + // {batch, 3, 227, 227} (x) {96, 3, 11, 11} -> {batch, 96, 55, 55} + // strides: {4, 4} + memory::dims conv1_src_tz = { batch, 3, 227, 227 }; + memory::dims conv1_weights_tz = { 96, 3, 11, 11 }; + memory::dims conv1_bias_tz = { 96 }; + memory::dims conv1_dst_tz = { batch, 96, 55, 55 }; + memory::dims conv1_strides = { 4, 4 }; + memory::dims conv1_padding = { 0, 0 }; + +/// Allocate buffers for input and output data, weights, and bias. +/// @snippet cpu_cnn_inference_f32.cpp Allocate buffers +//[Allocate buffers] + std::vector user_src(batch * 3 * 227 * 227); + std::vector user_dst(batch * 1000); + std::vector conv1_weights(product(conv1_weights_tz)); + std::vector conv1_bias(product(conv1_bias_tz)); +//[Allocate buffers] + + + auto user_src_memory = memory( + { { conv1_src_tz }, dt::f32, tag::nchw }, eng, user_src.data()); + auto user_weights_memory + = memory({ { conv1_weights_tz }, dt::f32, tag::oihw }, eng, + conv1_weights.data()); + auto conv1_user_bias_memory = memory( + { { conv1_bias_tz }, dt::f32, tag::x }, eng, conv1_bias.data()); + + + + auto conv1_src_md = memory::desc({ conv1_src_tz }, dt::f32, tag::any); + auto conv1_bias_md = memory::desc({ conv1_bias_tz }, dt::f32, tag::any); + auto conv1_weights_md + = memory::desc({ conv1_weights_tz }, dt::f32, tag::any); + auto conv1_dst_md = memory::desc({ conv1_dst_tz }, dt::f32, tag::any); + + + + auto conv1_desc = convolution_forward::desc(prop_kind::forward_inference, + algorithm::convolution_direct, conv1_src_md, conv1_weights_md, conv1_bias_md, + conv1_dst_md, conv1_strides, conv1_padding, conv1_padding); + + auto conv1_prim_desc = convolution_forward::primitive_desc(conv1_desc, eng); + + + auto conv1_src_memory = user_src_memory; + if (conv1_prim_desc.src_desc() != user_src_memory.get_desc()) { + conv1_src_memory = memory(conv1_prim_desc.src_desc(), eng); + net.push_back(reorder(user_src_memory, conv1_src_memory)); + net_args.push_back({ { DNNL_ARG_FROM, user_src_memory }, + { DNNL_ARG_TO, conv1_src_memory } }); + } + + auto conv1_weights_memory = user_weights_memory; + if (conv1_prim_desc.weights_desc() != user_weights_memory.get_desc()) { + conv1_weights_memory = memory(conv1_prim_desc.weights_desc(), eng); + reorder(user_weights_memory, conv1_weights_memory) + .execute(s, user_weights_memory, conv1_weights_memory); + } + + auto conv1_dst_memory = memory(conv1_prim_desc.dst_desc(), eng); + + + + net.push_back(convolution_forward(conv1_prim_desc)); + net_args.push_back({ { DNNL_ARG_SRC, conv1_src_memory }, + { DNNL_ARG_WEIGHTS, conv1_weights_memory }, + { DNNL_ARG_BIAS, conv1_user_bias_memory }, + { DNNL_ARG_DST, conv1_dst_memory } }); + + + // AlexNet: relu1 + // {batch, 96, 55, 55} -> {batch, 96, 55, 55} + const float negative1_slope = 1.0f; + + + + auto relu1_desc = eltwise_forward::desc(prop_kind::forward_inference, + algorithm::eltwise_relu, conv1_dst_memory.get_desc(), + negative1_slope); + auto relu1_prim_desc = eltwise_forward::primitive_desc(relu1_desc, eng); + + net.push_back(eltwise_forward(relu1_prim_desc)); + net_args.push_back({ { DNNL_ARG_SRC, conv1_dst_memory }, + { DNNL_ARG_DST, conv1_dst_memory } }); + + + // AlexNet: lrn1 + // {batch, 96, 55, 55} -> {batch, 96, 55, 55} + // local size: 5 + // alpha1: 0.0001 + // beta1: 0.75 + const memory::dim local1_size = 5; + const float alpha1 = 0.0001f; + const float beta1 = 0.75f; + const float k1 = 1.0f; + + // create lrn primitive and add it to net + auto lrn1_desc = lrn_forward::desc(prop_kind::forward_inference, + algorithm::lrn_across_channels, conv1_dst_memory.get_desc(), local1_size, + alpha1, beta1, k1); + auto lrn1_prim_desc = lrn_forward::primitive_desc(lrn1_desc, eng); + auto lrn1_dst_memory = memory(lrn1_prim_desc.dst_desc(), eng); + + net.push_back(lrn_forward(lrn1_prim_desc)); + net_args.push_back({ { DNNL_ARG_SRC, conv1_dst_memory }, + { DNNL_ARG_DST, lrn1_dst_memory } }); + + // AlexNet: pool1 + // {batch, 96, 55, 55} -> {batch, 96, 27, 27} + // kernel: {3, 3} + // strides: {2, 2} + memory::dims pool1_dst_tz = { batch, 96, 27, 27 }; + memory::dims pool1_kernel = { 3, 3 }; + memory::dims pool1_strides = { 2, 2 }; + memory::dims pool_padding = { 0, 0 }; + + auto pool1_dst_md = memory::desc({ pool1_dst_tz }, dt::f32, tag::any); + + auto pool1_desc = pooling_forward::desc(prop_kind::forward_inference, + algorithm::pooling_max, lrn1_dst_memory.get_desc(), pool1_dst_md, + pool1_strides, pool1_kernel, pool_padding, pool_padding); + auto pool1_pd = pooling_forward::primitive_desc(pool1_desc, eng); + auto pool1_dst_memory = memory(pool1_pd.dst_desc(), eng); + + net.push_back(pooling_forward(pool1_pd)); + net_args.push_back({ { DNNL_ARG_SRC, lrn1_dst_memory }, + { DNNL_ARG_DST, pool1_dst_memory } }); + + // AlexNet: conv2 + // {batch, 96, 27, 27} (x) {2, 128, 48, 5, 5} -> {batch, 256, 27, 27} + // strides: {1, 1} + memory::dims conv2_src_tz = { batch, 96, 27, 27 }; + memory::dims conv2_weights_tz = { 2, 128, 48, 5, 5 }; + memory::dims conv2_bias_tz = { 256 }; + memory::dims conv2_dst_tz = { batch, 256, 27, 27 }; + memory::dims conv2_strides = { 1, 1 }; + memory::dims conv2_padding = { 2, 2 }; + + std::vector conv2_weights(product(conv2_weights_tz)); + std::vector conv2_bias(product(conv2_bias_tz)); + + // create memory for user data + auto conv2_user_weights_memory + = memory({ { conv2_weights_tz }, dt::f32, tag::goihw }, eng, + conv2_weights.data()); + auto conv2_user_bias_memory = memory( + { { conv2_bias_tz }, dt::f32, tag::x }, eng, conv2_bias.data()); + + // create memory descriptors for convolution data w/ no specified format + auto conv2_src_md = memory::desc({ conv2_src_tz }, dt::f32, tag::any); + auto conv2_bias_md = memory::desc({ conv2_bias_tz }, dt::f32, tag::any); + auto conv2_weights_md + = memory::desc({ conv2_weights_tz }, dt::f32, tag::any); + auto conv2_dst_md = memory::desc({ conv2_dst_tz }, dt::f32, tag::any); + + // create a convolution + auto conv2_desc = convolution_forward::desc(prop_kind::forward_inference, + algorithm::convolution_direct, conv2_src_md, conv2_weights_md, conv2_bias_md, + conv2_dst_md, conv2_strides, conv2_padding, conv2_padding); + auto conv2_prim_desc = convolution_forward::primitive_desc(conv2_desc, eng); + + auto conv2_src_memory = pool1_dst_memory; + if (conv2_prim_desc.src_desc() != conv2_src_memory.get_desc()) { + conv2_src_memory = memory(conv2_prim_desc.src_desc(), eng); + net.push_back(reorder(pool1_dst_memory, conv2_src_memory)); + net_args.push_back({ { DNNL_ARG_FROM, pool1_dst_memory }, + { DNNL_ARG_TO, conv2_src_memory } }); + } + + auto conv2_weights_memory = conv2_user_weights_memory; + if (conv2_prim_desc.weights_desc() + != conv2_user_weights_memory.get_desc()) { + conv2_weights_memory = memory(conv2_prim_desc.weights_desc(), eng); + reorder(conv2_user_weights_memory, conv2_weights_memory) + .execute(s, conv2_user_weights_memory, conv2_weights_memory); + } + + auto conv2_dst_memory = memory(conv2_prim_desc.dst_desc(), eng); + + // create convolution primitive and add it to net + net.push_back(convolution_forward(conv2_prim_desc)); + net_args.push_back({ { DNNL_ARG_SRC, conv2_src_memory }, + { DNNL_ARG_WEIGHTS, conv2_weights_memory }, + { DNNL_ARG_BIAS, conv2_user_bias_memory }, + { DNNL_ARG_DST, conv2_dst_memory } }); + + // AlexNet: relu2 + // {batch, 256, 27, 27} -> {batch, 256, 27, 27} + const float negative2_slope = 1.0f; + + // create relu primitive and add it to net + auto relu2_desc = eltwise_forward::desc(prop_kind::forward_inference, + algorithm::eltwise_relu, conv2_dst_memory.get_desc(), + negative2_slope); + auto relu2_prim_desc = eltwise_forward::primitive_desc(relu2_desc, eng); + + net.push_back(eltwise_forward(relu2_prim_desc)); + net_args.push_back({ { DNNL_ARG_SRC, conv2_dst_memory }, + { DNNL_ARG_DST, conv2_dst_memory } }); + + // AlexNet: lrn2 + // {batch, 256, 27, 27} -> {batch, 256, 27, 27} + // local size: 5 + // alpha2: 0.0001 + // beta2: 0.75 + const memory::dim local2_size = 5; + const float alpha2 = 0.0001f; + const float beta2 = 0.75f; + const float k2 = 1.0f; + + // create lrn primitive and add it to net + auto lrn2_desc = lrn_forward::desc(prop_kind::forward_inference, + algorithm::lrn_across_channels, conv2_prim_desc.dst_desc(), local2_size, + alpha2, beta2, k2); + auto lrn2_prim_desc = lrn_forward::primitive_desc(lrn2_desc, eng); + auto lrn2_dst_memory = memory(lrn2_prim_desc.dst_desc(), eng); + + net.push_back(lrn_forward(lrn2_prim_desc)); + net_args.push_back({ { DNNL_ARG_SRC, conv2_dst_memory }, + { DNNL_ARG_DST, lrn2_dst_memory } }); + + // AlexNet: pool2 + // {batch, 256, 27, 27} -> {batch, 256, 13, 13} + // kernel: {3, 3} + // strides: {2, 2} + memory::dims pool2_dst_tz = { batch, 256, 13, 13 }; + memory::dims pool2_kernel = { 3, 3 }; + memory::dims pool2_strides = { 2, 2 }; + memory::dims pool2_padding = { 0, 0 }; + + auto pool2_dst_md = memory::desc({ pool2_dst_tz }, dt::f32, tag::any); + + // create a pooling + auto pool2_desc = pooling_forward::desc(prop_kind::forward_inference, + algorithm::pooling_max, lrn2_dst_memory.get_desc(), pool2_dst_md, + pool2_strides, pool2_kernel, pool2_padding, pool2_padding); + auto pool2_pd = pooling_forward::primitive_desc(pool2_desc, eng); + auto pool2_dst_memory = memory(pool2_pd.dst_desc(), eng); + + // create pooling primitive an add it to net + net.push_back(pooling_forward(pool2_pd)); + net_args.push_back({ { DNNL_ARG_SRC, lrn2_dst_memory }, + { DNNL_ARG_DST, pool2_dst_memory } }); + + // AlexNet: conv3 + // {batch, 256, 13, 13} (x) {384, 256, 3, 3}; -> {batch, 384, 13, 13}; + // strides: {1, 1} + memory::dims conv3_src_tz = { batch, 256, 13, 13 }; + memory::dims conv3_weights_tz = { 384, 256, 3, 3 }; + memory::dims conv3_bias_tz = { 384 }; + memory::dims conv3_dst_tz = { batch, 384, 13, 13 }; + memory::dims conv3_strides = { 1, 1 }; + memory::dims conv3_padding = { 1, 1 }; + + std::vector conv3_weights(product(conv3_weights_tz)); + std::vector conv3_bias(product(conv3_bias_tz)); + + // create memory for user data + auto conv3_user_weights_memory + = memory({ { conv3_weights_tz }, dt::f32, tag::oihw }, eng, + conv3_weights.data()); + auto conv3_user_bias_memory = memory( + { { conv3_bias_tz }, dt::f32, tag::x }, eng, conv3_bias.data()); + + // create memory descriptors for convolution data w/ no specified format + auto conv3_src_md = memory::desc({ conv3_src_tz }, dt::f32, tag::any); + auto conv3_bias_md = memory::desc({ conv3_bias_tz }, dt::f32, tag::any); + auto conv3_weights_md + = memory::desc({ conv3_weights_tz }, dt::f32, tag::any); + auto conv3_dst_md = memory::desc({ conv3_dst_tz }, dt::f32, tag::any); + + // create a convolution + auto conv3_desc = convolution_forward::desc(prop_kind::forward_inference, + algorithm::convolution_direct, conv3_src_md, conv3_weights_md, conv3_bias_md, + conv3_dst_md, conv3_strides, conv3_padding, conv3_padding); + auto conv3_prim_desc = convolution_forward::primitive_desc(conv3_desc, eng); + + auto conv3_src_memory = pool2_dst_memory; + if (conv3_prim_desc.src_desc() != conv3_src_memory.get_desc()) { + conv3_src_memory = memory(conv3_prim_desc.src_desc(), eng); + net.push_back(reorder(pool2_dst_memory, conv3_src_memory)); + net_args.push_back({ { DNNL_ARG_FROM, pool2_dst_memory }, + { DNNL_ARG_TO, conv3_src_memory } }); + } + + auto conv3_weights_memory = conv3_user_weights_memory; + if (conv3_prim_desc.weights_desc() + != conv3_user_weights_memory.get_desc()) { + conv3_weights_memory = memory(conv3_prim_desc.weights_desc(), eng); + reorder(conv3_user_weights_memory, conv3_weights_memory) + .execute(s, conv3_user_weights_memory, conv3_weights_memory); + } + + auto conv3_dst_memory = memory(conv3_prim_desc.dst_desc(), eng); + + // create convolution primitive and add it to net + net.push_back(convolution_forward(conv3_prim_desc)); + net_args.push_back({ { DNNL_ARG_SRC, conv3_src_memory }, + { DNNL_ARG_WEIGHTS, conv3_weights_memory }, + { DNNL_ARG_BIAS, conv3_user_bias_memory }, + { DNNL_ARG_DST, conv3_dst_memory } }); + + // AlexNet: relu3 + // {batch, 384, 13, 13} -> {batch, 384, 13, 13} + const float negative3_slope = 1.0f; + + // create relu primitive and add it to net + auto relu3_desc = eltwise_forward::desc(prop_kind::forward_inference, + algorithm::eltwise_relu, conv3_dst_memory.get_desc(), + negative3_slope); + auto relu3_prim_desc = eltwise_forward::primitive_desc(relu3_desc, eng); + + net.push_back(eltwise_forward(relu3_prim_desc)); + net_args.push_back({ { DNNL_ARG_SRC, conv3_dst_memory }, + { DNNL_ARG_DST, conv3_dst_memory } }); + + // AlexNet: conv4 + // {batch, 384, 13, 13} (x) {2, 192, 192, 3, 3}; -> + // {batch, 384, 13, 13}; + // strides: {1, 1} + memory::dims conv4_src_tz = { batch, 384, 13, 13 }; + memory::dims conv4_weights_tz = { 2, 192, 192, 3, 3 }; + memory::dims conv4_bias_tz = { 384 }; + memory::dims conv4_dst_tz = { batch, 384, 13, 13 }; + memory::dims conv4_strides = { 1, 1 }; + memory::dims conv4_padding = { 1, 1 }; + + std::vector conv4_weights(product(conv4_weights_tz)); + std::vector conv4_bias(product(conv4_bias_tz)); + + // create memory for user data + auto conv4_user_weights_memory + = memory({ { conv4_weights_tz }, dt::f32, tag::goihw }, eng, + conv4_weights.data()); + auto conv4_user_bias_memory = memory( + { { conv4_bias_tz }, dt::f32, tag::x }, eng, conv4_bias.data()); + + // create memory descriptors for convolution data w/ no specified format + auto conv4_src_md = memory::desc({ conv4_src_tz }, dt::f32, tag::any); + auto conv4_bias_md = memory::desc({ conv4_bias_tz }, dt::f32, tag::any); + auto conv4_weights_md + = memory::desc({ conv4_weights_tz }, dt::f32, tag::any); + auto conv4_dst_md = memory::desc({ conv4_dst_tz }, dt::f32, tag::any); + + // create a convolution + auto conv4_desc = convolution_forward::desc(prop_kind::forward_inference, + algorithm::convolution_direct, conv4_src_md, conv4_weights_md, conv4_bias_md, + conv4_dst_md, conv4_strides, conv4_padding, conv4_padding); + auto conv4_prim_desc = convolution_forward::primitive_desc(conv4_desc, eng); + + auto conv4_src_memory = conv3_dst_memory; + if (conv4_prim_desc.src_desc() != conv4_src_memory.get_desc()) { + conv4_src_memory = memory(conv4_prim_desc.src_desc(), eng); + net.push_back(reorder(conv3_dst_memory, conv4_src_memory)); + net_args.push_back({ { DNNL_ARG_FROM, conv3_dst_memory }, + { DNNL_ARG_TO, conv4_src_memory } }); + } + + auto conv4_weights_memory = conv4_user_weights_memory; + if (conv4_prim_desc.weights_desc() + != conv4_user_weights_memory.get_desc()) { + conv4_weights_memory = memory(conv4_prim_desc.weights_desc(), eng); + reorder(conv4_user_weights_memory, conv4_weights_memory) + .execute(s, conv4_user_weights_memory, conv4_weights_memory); + } + + auto conv4_dst_memory = memory(conv4_prim_desc.dst_desc(), eng); + + // create convolution primitive and add it to net + net.push_back(convolution_forward(conv4_prim_desc)); + net_args.push_back({ { DNNL_ARG_SRC, conv4_src_memory }, + { DNNL_ARG_WEIGHTS, conv4_weights_memory }, + { DNNL_ARG_BIAS, conv4_user_bias_memory }, + { DNNL_ARG_DST, conv4_dst_memory } }); + + // AlexNet: relu4 + // {batch, 384, 13, 13} -> {batch, 384, 13, 13} + const float negative4_slope = 1.0f; + + // create relu primitive and add it to net + auto relu4_desc = eltwise_forward::desc(prop_kind::forward_inference, + algorithm::eltwise_relu, conv4_dst_memory.get_desc(), + negative4_slope); + auto relu4_prim_desc = eltwise_forward::primitive_desc(relu4_desc, eng); + + net.push_back(eltwise_forward(relu4_prim_desc)); + net_args.push_back({ { DNNL_ARG_SRC, conv4_dst_memory }, + { DNNL_ARG_DST, conv4_dst_memory } }); + + // AlexNet: conv5 + // {batch, 384, 13, 13} (x) {2, 128, 192, 3, 3}; -> {batch, 256, 13, 13}; + // strides: {1, 1} + memory::dims conv5_src_tz = { batch, 384, 13, 13 }; + memory::dims conv5_weights_tz = { 2, 128, 192, 3, 3 }; + memory::dims conv5_bias_tz = { 256 }; + memory::dims conv5_dst_tz = { batch, 256, 13, 13 }; + memory::dims conv5_strides = { 1, 1 }; + memory::dims conv5_padding = { 1, 1 }; + + std::vector conv5_weights(product(conv5_weights_tz)); + std::vector conv5_bias(product(conv5_bias_tz)); + + // create memory for user data + auto conv5_user_weights_memory + = memory({ { conv5_weights_tz }, dt::f32, tag::goihw }, eng, + conv5_weights.data()); + auto conv5_user_bias_memory = memory( + { { conv5_bias_tz }, dt::f32, tag::x }, eng, conv5_bias.data()); + + // create memory descriptors for convolution data w/ no specified format + auto conv5_src_md = memory::desc({ conv5_src_tz }, dt::f32, tag::any); + auto conv5_weights_md + = memory::desc({ conv5_weights_tz }, dt::f32, tag::any); + auto conv5_bias_md = memory::desc({ conv5_bias_tz }, dt::f32, tag::any); + auto conv5_dst_md = memory::desc({ conv5_dst_tz }, dt::f32, tag::any); + + // create a convolution + auto conv5_desc = convolution_forward::desc(prop_kind::forward_inference, + algorithm::convolution_direct, conv5_src_md, conv5_weights_md, conv5_bias_md, + conv5_dst_md, conv5_strides, conv5_padding, conv5_padding); + auto conv5_prim_desc = convolution_forward::primitive_desc(conv5_desc, eng); + + auto conv5_src_memory = conv4_dst_memory; + if (conv5_prim_desc.src_desc() != conv5_src_memory.get_desc()) { + conv5_src_memory = memory(conv5_prim_desc.src_desc(), eng); + net.push_back(reorder(conv4_dst_memory, conv5_src_memory)); + net_args.push_back({ { DNNL_ARG_FROM, conv4_dst_memory }, + { DNNL_ARG_TO, conv5_src_memory } }); + } + + auto conv5_weights_memory = conv5_user_weights_memory; + if (conv5_prim_desc.weights_desc() + != conv5_user_weights_memory.get_desc()) { + conv5_weights_memory = memory(conv5_prim_desc.weights_desc(), eng); + reorder(conv5_user_weights_memory, conv5_weights_memory) + .execute(s, conv5_user_weights_memory, conv5_weights_memory); + } + + auto conv5_dst_memory = memory(conv5_prim_desc.dst_desc(), eng); + + // create convolution primitive and add it to net + net.push_back(convolution_forward(conv5_prim_desc)); + net_args.push_back({ { DNNL_ARG_SRC, conv5_src_memory }, + { DNNL_ARG_WEIGHTS, conv5_weights_memory }, + { DNNL_ARG_BIAS, conv5_user_bias_memory }, + { DNNL_ARG_DST, conv5_dst_memory } }); + + // AlexNet: relu5 + // {batch, 256, 13, 13} -> {batch, 256, 13, 13} + const float negative5_slope = 1.0f; + + // create relu primitive and add it to net + auto relu5_desc = eltwise_forward::desc(prop_kind::forward_inference, + algorithm::eltwise_relu, conv5_dst_memory.get_desc(), + negative5_slope); + auto relu5_prim_desc = eltwise_forward::primitive_desc(relu5_desc, eng); + + net.push_back(eltwise_forward(relu5_prim_desc)); + net_args.push_back({ { DNNL_ARG_SRC, conv5_dst_memory }, + { DNNL_ARG_DST, conv5_dst_memory } }); + + // AlexNet: pool5 + // {batch, 256, 13, 13} -> {batch, 256, 6, 6} + // kernel: {3, 3} + // strides: {2, 2} + memory::dims pool5_dst_tz = { batch, 256, 6, 6 }; + memory::dims pool5_kernel = { 3, 3 }; + memory::dims pool5_strides = { 2, 2 }; + memory::dims pool5_padding = { 0, 0 }; + + std::vector pool5_dst(product(pool5_dst_tz)); + + auto pool5_dst_md = memory::desc({ pool5_dst_tz }, dt::f32, tag::any); + + // create a pooling + auto pool5_desc = pooling_forward::desc(prop_kind::forward_inference, + algorithm::pooling_max, conv5_dst_memory.get_desc(), pool5_dst_md, + pool5_strides, pool5_kernel, pool5_padding, pool5_padding); + auto pool5_pd = pooling_forward::primitive_desc(pool5_desc, eng); + + auto pool5_dst_memory = memory(pool5_pd.dst_desc(), eng); + + // create pooling primitive an add it to net + net.push_back(pooling_forward(pool5_pd)); + net_args.push_back({ { DNNL_ARG_SRC, conv5_dst_memory }, + { DNNL_ARG_DST, pool5_dst_memory } }); + + + // fc6 inner product {batch, 256, 6, 6} (x) {4096, 256, 6, 6}-> {batch, + // 4096} + memory::dims fc6_src_tz = { batch, 256, 6, 6 }; + memory::dims fc6_weights_tz = { 4096, 256, 6, 6 }; + memory::dims fc6_bias_tz = { 4096 }; + memory::dims fc6_dst_tz = { batch, 4096 }; + + std::vector fc6_weights(product(fc6_weights_tz)); + std::vector fc6_bias(product(fc6_bias_tz)); + + // create memory for user data + auto fc6_user_weights_memory + = memory({ { fc6_weights_tz }, dt::f32, tag::oihw }, eng, + fc6_weights.data()); + auto fc6_user_bias_memory = memory( + { { fc6_bias_tz }, dt::f32, tag::x }, eng, fc6_bias.data()); + + // create memory descriptors for convolution data w/ no specified format + auto fc6_src_md = memory::desc({ fc6_src_tz }, dt::f32, tag::any); + auto fc6_bias_md = memory::desc({ fc6_bias_tz }, dt::f32, tag::any); + auto fc6_weights_md = memory::desc({ fc6_weights_tz }, dt::f32, tag::any); + auto fc6_dst_md = memory::desc({ fc6_dst_tz }, dt::f32, tag::any); + + // create a inner_product + auto fc6_desc = inner_product_forward::desc(prop_kind::forward_inference, + fc6_src_md, fc6_weights_md, fc6_bias_md, fc6_dst_md); + auto fc6_prim_desc = inner_product_forward::primitive_desc(fc6_desc, eng); + + auto fc6_src_memory = pool5_dst_memory; + if (fc6_prim_desc.src_desc() != fc6_src_memory.get_desc()) { + fc6_src_memory = memory(fc6_prim_desc.src_desc(), eng); + net.push_back(reorder(pool5_dst_memory, fc6_src_memory)); + net_args.push_back({ { DNNL_ARG_FROM, pool5_dst_memory }, + { DNNL_ARG_TO, fc6_src_memory } }); + } + + auto fc6_weights_memory = fc6_user_weights_memory; + if (fc6_prim_desc.weights_desc() != fc6_user_weights_memory.get_desc()) { + fc6_weights_memory = memory(fc6_prim_desc.weights_desc(), eng); + reorder(fc6_user_weights_memory, fc6_weights_memory) + .execute(s, fc6_user_weights_memory, fc6_weights_memory); + } + + auto fc6_dst_memory = memory(fc6_prim_desc.dst_desc(), eng); + + // create convolution primitive and add it to net + net.push_back(inner_product_forward(fc6_prim_desc)); + net_args.push_back({ { DNNL_ARG_SRC, fc6_src_memory }, + { DNNL_ARG_WEIGHTS, fc6_weights_memory }, + { DNNL_ARG_BIAS, fc6_user_bias_memory }, + { DNNL_ARG_DST, fc6_dst_memory } }); + + + // fc7 inner product {batch, 4096} (x) {4096, 4096}-> {batch, 4096} + memory::dims fc7_weights_tz = { 4096, 4096 }; + memory::dims fc7_bias_tz = { 4096 }; + memory::dims fc7_dst_tz = { batch, 4096 }; + + std::vector fc7_weights(product(fc7_weights_tz)); + std::vector fc7_bias(product(fc7_bias_tz)); + + // create memory for user data + auto fc7_user_weights_memory = memory( + { { fc7_weights_tz }, dt::f32, tag::nc }, eng, fc7_weights.data()); + + auto fc7_user_bias_memory = memory( + { { fc7_bias_tz }, dt::f32, tag::x }, eng, fc7_bias.data()); + + // create memory descriptors for convolution data w/ no specified format + auto fc7_bias_md = memory::desc({ fc7_bias_tz }, dt::f32, tag::any); + auto fc7_weights_md = memory::desc({ fc7_weights_tz }, dt::f32, tag::any); + auto fc7_dst_md = memory::desc({ fc7_dst_tz }, dt::f32, tag::any); + + // create a inner_product + auto fc7_desc = inner_product_forward::desc(prop_kind::forward_inference, + fc6_dst_memory.get_desc(), fc7_weights_md, fc7_bias_md, fc7_dst_md); + auto fc7_prim_desc = inner_product_forward::primitive_desc(fc7_desc, eng); + + auto fc7_weights_memory = fc7_user_weights_memory; + if (fc7_prim_desc.weights_desc() != fc7_user_weights_memory.get_desc()) { + fc7_weights_memory = memory(fc7_prim_desc.weights_desc(), eng); + reorder(fc7_user_weights_memory, fc7_weights_memory) + .execute(s, fc7_user_weights_memory, fc7_weights_memory); + } + + auto fc7_dst_memory = memory(fc7_prim_desc.dst_desc(), eng); + + // create convolution primitive and add it to net + net.push_back(inner_product_forward(fc7_prim_desc)); + net_args.push_back({ { DNNL_ARG_SRC, fc6_dst_memory }, + { DNNL_ARG_WEIGHTS, fc7_weights_memory }, + { DNNL_ARG_BIAS, fc7_user_bias_memory }, + { DNNL_ARG_DST, fc7_dst_memory } }); + + // fc8 inner product {batch, 4096} (x) {1000, 4096}-> {batch, 1000} + memory::dims fc8_weights_tz = { 1000, 4096 }; + memory::dims fc8_bias_tz = { 1000 }; + memory::dims fc8_dst_tz = { batch, 1000 }; + + std::vector fc8_weights(product(fc8_weights_tz)); + std::vector fc8_bias(product(fc8_bias_tz)); + + // create memory for user data + auto fc8_user_weights_memory = memory( + { { fc8_weights_tz }, dt::f32, tag::nc }, eng, fc8_weights.data()); + auto fc8_user_bias_memory = memory( + { { fc8_bias_tz }, dt::f32, tag::x }, eng, fc8_bias.data()); + auto user_dst_memory = memory( + { { fc8_dst_tz }, dt::f32, tag::nc }, eng, user_dst.data()); + + // create memory descriptors for convolution data w/ no specified format + auto fc8_bias_md = memory::desc({ fc8_bias_tz }, dt::f32, tag::any); + auto fc8_weights_md = memory::desc({ fc8_weights_tz }, dt::f32, tag::any); + auto fc8_dst_md = memory::desc({ fc8_dst_tz }, dt::f32, tag::any); + + // create a inner_product + auto fc8_desc = inner_product_forward::desc(prop_kind::forward_inference, + fc7_dst_memory.get_desc(), fc8_weights_md, fc8_bias_md, fc8_dst_md); + auto fc8_prim_desc = inner_product_forward::primitive_desc(fc8_desc, eng); + + auto fc8_weights_memory = fc8_user_weights_memory; + if (fc8_prim_desc.weights_desc() != fc8_user_weights_memory.get_desc()) { + fc8_weights_memory = memory(fc8_prim_desc.weights_desc(), eng); + reorder(fc8_user_weights_memory, fc8_weights_memory) + .execute(s, fc8_user_weights_memory, fc8_weights_memory); + } + + auto fc8_dst_memory = memory(fc8_prim_desc.dst_desc(), eng); + + // create convolution primitive and add it to net + net.push_back(inner_product_forward(fc8_prim_desc)); + net_args.push_back({ { DNNL_ARG_SRC, fc7_dst_memory }, + { DNNL_ARG_WEIGHTS, fc8_weights_memory }, + { DNNL_ARG_BIAS, fc8_user_bias_memory }, + { DNNL_ARG_DST, fc8_dst_memory } }); + + // create reorder between internal and user data if it is needed and + // add it to net after pooling + if (fc8_dst_memory != user_dst_memory) { + net.push_back(reorder(fc8_dst_memory, user_dst_memory)); + net_args.push_back({ { DNNL_ARG_FROM, fc8_dst_memory }, + { DNNL_ARG_TO, user_dst_memory } }); + } + +/// @page cpu_cnn_inference_f32_cpp +/// Finally, execute the primitives. For this example, the net is executed +/// multiple times and each execution is timed individually. +/// @snippet cpu_cnn_inference_f32.cpp Execute model +//[Execute model] + for (int j = 0; j < times; ++j) { + assert(net.size() == net_args.size() && "something is missing"); + for (size_t i = 0; i < net.size(); ++i) + net.at(i).execute(s, net_args.at(i)); + } +//[Execute model] + + s.wait(); +} + +int main(int argc, char **argv) { + try { + auto begin = chrono::duration_cast( + chrono::steady_clock::now().time_since_epoch()) + .count(); + int times = 100; + simple_net(times); + auto end = chrono::duration_cast( + chrono::steady_clock::now().time_since_epoch()) + .count(); + cout << "Use time " << (end - begin) / (times + 0.0) << "\n"; + } catch (error &e) { + std::cerr << "status: " << e.status << std::endl; + std::cerr << "message: " << e.message << std::endl; + } + return 0; +} \ No newline at end of file diff --git a/Libraries/oneDNN/simple_model/codes_for_ipynb/cnn_inference_f32.patch b/Libraries/oneDNN/simple_model/codes_for_ipynb/cnn_inference_f32.patch new file mode 100644 index 0000000000..c76bb2a859 --- /dev/null +++ b/Libraries/oneDNN/simple_model/codes_for_ipynb/cnn_inference_f32.patch @@ -0,0 +1,245 @@ +--- cnn_inference_f32.cpp 2020-02-12 10:12:10.467690007 -0800 ++++ cnn_inference_f32_gpu.cpp 2020-02-12 10:12:28.395690295 -0800 +@@ -19,12 +19,62 @@ + std::multiplies()); + } + ++ ++// ------ GPU code conversion --Step 2 >>>>>> ++// Read from handle, write to memory ++inline void write_to_dnnl_memory(void *handle, dnnl::memory &mem) { ++ ++ dnnl::engine eng = mem.get_engine(); ++ size_t size = mem.get_desc().get_size(); ++ ++ bool is_cpu_sycl = (DNNL_CPU_RUNTIME == DNNL_RUNTIME_SYCL ++ && eng.get_kind() == dnnl::engine::kind::cpu); ++ bool is_gpu_sycl = (DNNL_GPU_RUNTIME == DNNL_RUNTIME_SYCL ++ && eng.get_kind() == dnnl::engine::kind::gpu); ++ if (is_cpu_sycl || is_gpu_sycl) { ++ ++ auto buffer = mem.get_sycl_buffer(); ++ auto dst = buffer.get_access(); ++ uint8_t *dst_ptr = dst.get_pointer(); ++ ++ if (!dst_ptr || !handle) { ++ std::cerr << "memory is NULL" ++ << "\n"; ++ return; ++ } ++ for (size_t i = 0; i < size; ++i) ++ dst_ptr[i] = ((uint8_t *)handle)[i]; ++ return; ++ } ++ ++ if (eng.get_kind() == dnnl::engine::kind::cpu) { ++ uint8_t *dst = static_cast(mem.get_data_handle()); ++ if (!dst || !handle) { ++ std::cerr << "memory is NULL" ++ << "\n"; ++ return; ++ } ++ for (size_t i = 0; i < size; ++i) ++ dst[i] = ((uint8_t *)handle)[i]; ++ return; ++ } ++ ++ assert(!"not expected"); ++} ++//<<<<<< ------ GPU code conversion --Step 2 ++ ++ ++ + void simple_net(int times = 100) { + using tag = memory::format_tag; + using dt = memory::data_type; + + +- engine eng(engine::kind::cpu, 0); ++// ------ GPU code conversion --Step 1 >>>>>> ++ engine eng(engine::kind::gpu, 0); ++//<<< <<<------ GPU code conversion --Step 1 ++ ++ + stream s(eng); + + std::vector net; +@@ -53,13 +103,17 @@ + //[Allocate buffers] + + ++// ------ GPU code conversion --Step 3 >>>>>> + auto user_src_memory = memory( +- { { conv1_src_tz }, dt::f32, tag::nchw }, eng, user_src.data()); ++ { { conv1_src_tz }, dt::f32, tag::nchw }, eng); ++ write_to_dnnl_memory(user_src.data(), user_src_memory); + auto user_weights_memory +- = memory({ { conv1_weights_tz }, dt::f32, tag::oihw }, eng, +- conv1_weights.data()); ++ = memory({ { conv1_weights_tz }, dt::f32, tag::oihw }, eng); ++ write_to_dnnl_memory(conv1_weights.data(), user_weights_memory); + auto conv1_user_bias_memory = memory( +- { { conv1_bias_tz }, dt::f32, tag::x }, eng, conv1_bias.data()); ++ { { conv1_bias_tz }, dt::f32, tag::x }, eng); ++ write_to_dnnl_memory(conv1_bias.data(), conv1_user_bias_memory); ++//<<<<<< ------ GPU code conversion --Step 3 + + + +@@ -175,13 +229,18 @@ + std::vector conv2_weights(product(conv2_weights_tz)); + std::vector conv2_bias(product(conv2_bias_tz)); + ++ ++// ------ GPU code conversion --Step 3 >>>>>> + // create memory for user data + auto conv2_user_weights_memory +- = memory({ { conv2_weights_tz }, dt::f32, tag::goihw }, eng, +- conv2_weights.data()); ++ = memory({ { conv2_weights_tz }, dt::f32, tag::goihw }, eng); ++ write_to_dnnl_memory(conv2_weights.data(), conv2_user_weights_memory); + auto conv2_user_bias_memory = memory( +- { { conv2_bias_tz }, dt::f32, tag::x }, eng, conv2_bias.data()); ++ { { conv2_bias_tz }, dt::f32, tag::x }, eng); ++ write_to_dnnl_memory(conv2_bias.data(), conv2_user_bias_memory); ++//<<<<<< ------ GPU code conversion --Step 3 + ++ + // create memory descriptors for convolution data w/ no specified format + auto conv2_src_md = memory::desc({ conv2_src_tz }, dt::f32, tag::any); + auto conv2_bias_md = memory::desc({ conv2_bias_tz }, dt::f32, tag::any); +@@ -291,13 +350,18 @@ + std::vector conv3_weights(product(conv3_weights_tz)); + std::vector conv3_bias(product(conv3_bias_tz)); + ++ ++// ------ GPU code conversion --Step 3 >>>>>> + // create memory for user data + auto conv3_user_weights_memory +- = memory({ { conv3_weights_tz }, dt::f32, tag::oihw }, eng, +- conv3_weights.data()); ++ = memory({ { conv3_weights_tz }, dt::f32, tag::oihw }, eng); ++ write_to_dnnl_memory(conv3_weights.data(), conv3_user_weights_memory); + auto conv3_user_bias_memory = memory( +- { { conv3_bias_tz }, dt::f32, tag::x }, eng, conv3_bias.data()); ++ { { conv3_bias_tz }, dt::f32, tag::x }, eng); ++ write_to_dnnl_memory(conv3_bias.data(), conv3_user_bias_memory); ++//<<<<<< ------ GPU code conversion --Step 3 + ++ + // create memory descriptors for convolution data w/ no specified format + auto conv3_src_md = memory::desc({ conv3_src_tz }, dt::f32, tag::any); + auto conv3_bias_md = memory::desc({ conv3_bias_tz }, dt::f32, tag::any); +@@ -364,13 +428,17 @@ + std::vector conv4_weights(product(conv4_weights_tz)); + std::vector conv4_bias(product(conv4_bias_tz)); + ++// ------ GPU code conversion --Step 3 >>>>>> + // create memory for user data + auto conv4_user_weights_memory +- = memory({ { conv4_weights_tz }, dt::f32, tag::goihw }, eng, +- conv4_weights.data()); ++ = memory({ { conv4_weights_tz }, dt::f32, tag::goihw }, eng); ++ write_to_dnnl_memory(conv4_weights.data(), conv4_user_weights_memory); + auto conv4_user_bias_memory = memory( +- { { conv4_bias_tz }, dt::f32, tag::x }, eng, conv4_bias.data()); ++ { { conv4_bias_tz }, dt::f32, tag::x }, eng); ++ write_to_dnnl_memory(conv4_bias.data(), conv4_user_bias_memory); ++//<<<<<< ------ GPU code conversion --Step 3 + ++ + // create memory descriptors for convolution data w/ no specified format + auto conv4_src_md = memory::desc({ conv4_src_tz }, dt::f32, tag::any); + auto conv4_bias_md = memory::desc({ conv4_bias_tz }, dt::f32, tag::any); +@@ -436,13 +504,18 @@ + std::vector conv5_weights(product(conv5_weights_tz)); + std::vector conv5_bias(product(conv5_bias_tz)); + ++ ++// ------ GPU code conversion --Step 3 >>>>>> + // create memory for user data + auto conv5_user_weights_memory +- = memory({ { conv5_weights_tz }, dt::f32, tag::goihw }, eng, +- conv5_weights.data()); ++ = memory({ { conv5_weights_tz }, dt::f32, tag::goihw }, eng); ++ write_to_dnnl_memory(conv5_weights.data(), conv5_user_weights_memory); + auto conv5_user_bias_memory = memory( +- { { conv5_bias_tz }, dt::f32, tag::x }, eng, conv5_bias.data()); ++ { { conv5_bias_tz }, dt::f32, tag::x }, eng); ++ write_to_dnnl_memory(conv5_bias.data(), conv5_user_bias_memory); ++//<<<<<< ------ GPU code conversion --Step 3 + ++ + // create memory descriptors for convolution data w/ no specified format + auto conv5_src_md = memory::desc({ conv5_src_tz }, dt::f32, tag::any); + auto conv5_weights_md +@@ -532,13 +605,18 @@ + std::vector fc6_weights(product(fc6_weights_tz)); + std::vector fc6_bias(product(fc6_bias_tz)); + ++ ++// ------ GPU code conversion --Step 3 >>>>>> + // create memory for user data + auto fc6_user_weights_memory +- = memory({ { fc6_weights_tz }, dt::f32, tag::oihw }, eng, +- fc6_weights.data()); ++ = memory({ { fc6_weights_tz }, dt::f32, tag::oihw }, eng); ++ write_to_dnnl_memory(fc6_weights.data(), fc6_user_weights_memory); + auto fc6_user_bias_memory = memory( +- { { fc6_bias_tz }, dt::f32, tag::x }, eng, fc6_bias.data()); ++ { { fc6_bias_tz }, dt::f32, tag::x }, eng); ++ write_to_dnnl_memory(fc6_bias.data(), fc6_user_bias_memory); ++//<<<<<< ------ GPU code conversion --Step 3 + ++ + // create memory descriptors for convolution data w/ no specified format + auto fc6_src_md = memory::desc({ fc6_src_tz }, dt::f32, tag::any); + auto fc6_bias_md = memory::desc({ fc6_bias_tz }, dt::f32, tag::any); +@@ -583,13 +661,18 @@ + std::vector fc7_weights(product(fc7_weights_tz)); + std::vector fc7_bias(product(fc7_bias_tz)); + ++ ++// ------ GPU code conversion --Step 3 >>>>>> + // create memory for user data + auto fc7_user_weights_memory = memory( +- { { fc7_weights_tz }, dt::f32, tag::nc }, eng, fc7_weights.data()); +- ++ { { fc7_weights_tz }, dt::f32, tag::nc }, eng); ++ write_to_dnnl_memory(fc7_weights.data(), fc7_user_weights_memory); + auto fc7_user_bias_memory = memory( +- { { fc7_bias_tz }, dt::f32, tag::x }, eng, fc7_bias.data()); +- ++ { { fc7_bias_tz }, dt::f32, tag::x }, eng); ++ write_to_dnnl_memory(fc7_bias.data(), fc7_user_bias_memory); ++//<<<<<< ------ GPU code conversion --Step 3 ++ ++ + // create memory descriptors for convolution data w/ no specified format + auto fc7_bias_md = memory::desc({ fc7_bias_tz }, dt::f32, tag::any); + auto fc7_weights_md = memory::desc({ fc7_weights_tz }, dt::f32, tag::any); +@@ -624,14 +707,20 @@ + std::vector fc8_weights(product(fc8_weights_tz)); + std::vector fc8_bias(product(fc8_bias_tz)); + ++// ------ GPU code conversion --Step 3 >>>>>> + // create memory for user data + auto fc8_user_weights_memory = memory( +- { { fc8_weights_tz }, dt::f32, tag::nc }, eng, fc8_weights.data()); ++ { { fc8_weights_tz }, dt::f32, tag::nc }, eng); ++ write_to_dnnl_memory(fc8_weights.data(), fc8_user_weights_memory); + auto fc8_user_bias_memory = memory( +- { { fc8_bias_tz }, dt::f32, tag::x }, eng, fc8_bias.data()); ++ { { fc8_bias_tz }, dt::f32, tag::x }, eng); ++ write_to_dnnl_memory(fc8_bias.data(), fc8_user_bias_memory); + auto user_dst_memory = memory( +- { { fc8_dst_tz }, dt::f32, tag::nc }, eng, user_dst.data()); +- ++ { { fc8_dst_tz }, dt::f32, tag::nc }, eng); ++ write_to_dnnl_memory(user_dst.data(), user_dst_memory); ++//<<<<<< ------ GPU code conversion --Step 3 ++ ++ + // create memory descriptors for convolution data w/ no specified format + auto fc8_bias_md = memory::desc({ fc8_bias_tz }, dt::f32, tag::any); + auto fc8_weights_md = memory::desc({ fc8_weights_tz }, dt::f32, tag::any); diff --git a/Libraries/oneDNN/simple_model/q b/Libraries/oneDNN/simple_model/q new file mode 100755 index 0000000000..8377675780 --- /dev/null +++ b/Libraries/oneDNN/simple_model/q @@ -0,0 +1,32 @@ +#!/bin/bash +#======================================== +# Script to submit job in Intel devcloud +# +# Version: 0.5 +#======================================== +if [ -z "$1" ]; then + echo "Missing script argument, Usage: ./q run.sh" +elif [ ! -f "$1" ]; then + echo "File $1 does not exist" +else + script=$1 + rm *.sh.* > /dev/null 2>&1 + #qsub + echo "Submitting job:" + qsub -l nodes=1:gpu:ppn=2 -d . $script + #qstat + qstat + #wait for output file to be generated and display + echo -ne "Waiting for Output." + until [ -f $script.o* ]; do + sleep 1 + echo -ne "." + ((timeout++)) + if [ $timeout == 60 ]; then + echo "TimeOut 60 seconds: Job is still queued for execution, check for output file later (*.sh.o)" + break + fi + done + cat $script.o* + cat $script.e* +fi diff --git a/Libraries/oneDNN/simple_model/sample.json b/Libraries/oneDNN/simple_model/sample.json new file mode 100644 index 0000000000..9e2dfd9f20 --- /dev/null +++ b/Libraries/oneDNN/simple_model/sample.json @@ -0,0 +1,27 @@ +{ + "guid": "389BBED3-456D-4092-B6D8-DDF782865D66", + "name": "oneDNN CNN FP32 Inference", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/oneDNN"], + "description": "Run a simple CNN on both Intel CPU and GPU with sample C++ codes.", + "toolchain": ["dpcpp","gcc","icc"], + "languages": [{"cpp":{}}], + "dependencies": ["oneDNN", "tbb","compiler|icc"], + "os": ["linux"], + "builder": ["ide","cmake"], + "targetDevice": ["CPU", "GPU"], + "ciTests": { + "linux": [{ + "env": ["source /opt/intel/oneapi/setvars.sh --dnnl-configuration=cpu_dpcpp_gpu_dpcpp --force" ], + "id": "infer", + "steps": [ + "mkdir build", + "cd build", + "cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp", + "make cnn-inference-f32-cpp", + "./out/cnn-inference-f32-cpp cpu", + "SYCL_BE=PI_OPENCL ./out/cnn-inference-f32-cpp gpu" + ] + }] + + } +} diff --git a/Libraries/oneDNN/simple_model/simple_model.ipynb b/Libraries/oneDNN/simple_model/simple_model.ipynb new file mode 100644 index 0000000000..077562c61b --- /dev/null +++ b/Libraries/oneDNN/simple_model/simple_model.ipynb @@ -0,0 +1,957 @@ +f{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Module 1.1 - port a Intel® oneAPI Deep Neural Network Library (oneDNN) sample from CPU to GPU - oneDNN CNN FP32 Inference\n", + "\n", + "## Learning Objectives\n", + "In this module the developer will:\n", + "* Learn how to port a oneDNN sample from a CPU-only version to a CPU&GPU version by using DPC++\n", + "* Learn how to program a simple convolutional neural network by using oneDNN\n", + "* Learn how to collect VTune™ Amplifier data for CPU and GPU runs and compare performance results\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***\n", + "# Exercise : Porting oneDNN application from CPU to GPU \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1 : introduce oneDNN configurations inside Intel® oneAPI toolkits\n", + "oneDNN has four different configurations inside the Intel oneAPI toolkits. Each configuration is in a different folder under the oneDNN installation path, and each configuration supports different compilers or threading libraries." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set the installation path of your Intel oneAPI toolkit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%env ONEAPI_INSTALL=/opt/intel/oneapi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "!printf '%s\\n' $ONEAPI_INSTALL/oneDNN/latest/cpu_*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see, there are 4 different folders under the oneDNN installation path, and each of those configurations supports different features. This tutorial will make use of two configurations." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First of all, create a lab folder for this exercise." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!mkdir lab;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2 : scanning the cnn_inference_f32.cpp code which only supports CPU\n", + "\n", + "This C++ API example demonstrates how to build an AlexNet neural network topology for forward-pass inference, and it can run only on CPU.\n", + "You can find a detailed code explanation at this [link](https://oneapi-src.github.io/oneDNN/cnn_inference_f32_cpp.html)\n", + "\n", + "There is a cnn_inference_f32.cpp, which has a CPU-only implementation.\n", + "Let us copy into the lab folder, and use it as the base of the lab.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!cp codes_for_ipynb/cnn_inference_f32.cpp lab/" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The user could check the source file using the following command, but we recommened to use the detailed code explanation at this [link](https://oneapi-src.github.io/oneDNN/cnn_inference_f32_cpp.html) instead." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!cat lab/cnn_inference_f32.cpp " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then, copy the required CMake file into the lab folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!cp $ONEAPI_INSTALL/oneDNN/latest/cpu_gomp/examples/CMakeLists.txt lab/" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step3: Build and Execution\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build and Run with GNU Compiler and OpenMP \n", + "For this CPU-only AlexNet neural network topology for forward-pass inference sample, the GNU compiler is used.\n", + "The following section guides you how to build with G++ and run on CPU." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Script - build.sh\n", + "The script **build.sh** encapsulates the compiler command and flags that will generate the executable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile build.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_gomp --force > /dev/null 2>&1\n", + "export EXAMPLE_ROOT=./lab/\n", + "mkdir cpu_gomp\n", + "cd cpu_gomp\n", + "cmake .. -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DDNNL_CPU_RUNTIME=OMP -DDNNL_GPU_RUNTIME=NONE\n", + "make cnn-inference-f32-cpp\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you achieve an all-clear from your compilation, you execute your program on the Intel DevCloud or in local environments.\n", + "\n", + "#### Script - run.sh\n", + "the script **run.sh** encapsulates the program for submission to the job queue for execution.\n", + "The user must switch to the G++ oneDNN configuration by inputting a custom configuration \"--dnnl-configuration=cpu_gomp\" when running \"source setvars.sh\".\n", + "\n", + "By default, oneDNN Verbose log is disabled.\n", + "You can unmark #export DNNL_VERBOSE=1 to enable oneDNN verbose log." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile run.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_gomp --force > /dev/null 2>&1\n", + "echo \"########## Executing the run\"\n", + "# unmark below line to enable oneDNN verbose log\n", + "#export DNNL_VERBOSE=1\n", + "./cpu_gomp/out/cnn-inference-f32-cpp\n", + "echo \"########## Done with the run\"\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Submitting **build.sh** and **run.sh** to the job queue\n", + "Now we can submit the **build.sh** and **run.sh** to the job queue.\n", + "\n", + "##### NOTE - it is possible to execute any of the build and run commands in local environments.\n", + "To enable users to run their scripts both on the DevCloud or in local environments, this and subsequent training checks for the existence of the job submission command **qsub**. If the check fails, it is assumed that build/run will be local." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "!rm -rf cpu_gomp; chmod 755 q; chmod 755 build.sh; chmod 755 run.sh;if [ -x \"$(command -v qsub)\" ]; then ./q build.sh; ./q run.sh; else ./build.sh; ./run.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Enable oneDNN Verbose log and check the engine kind for each operation\n", + "cpu should be the engine kind for most of the operations, and you should be able to check the engine kind after \"dnnl_verbose,exec,\" for each operation.\n", + "Check this [link](https://oneapi-src.github.io/oneDNN/dev_guide_verbose.html) for a detailed explanation of oneDNN verbose log.\n", + "\n", + "Below is an example for oneDNN verbose log for convolution on CPU:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "dnnl_verbose,exec,cpu,convolution,jit:avx2,forward_inference,src_f32::blocked:abcd:f0 wei_f32::blocked:Acdb8a:f0 bia_f32::blocked:a:f0 dst_f32::blocked:aBcd8b:f0,,alg:convolution_direct,mb1_ic3oc96_ih227oh55kh11sh4dh0ph0_iw227ow55kw11sw4dw0pw0,0.458008" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4: Analyze performance with VTune Amplifier\n", + "Use the VTune™ Amplifier command line to analyze performance and display the summary" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### do CPU profiling first. \n", + "The script vtune_collect.sh encapsulates the profiling command and flags that will generate the VTune Amplifier profiling results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile vtune_collect.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_gomp --force\n", + "type=hotspots\n", + "\n", + "rm -r $(pwd)/vtune_data\n", + "\n", + "echo \"VTune Collect $type\"\n", + "vtune -collect $type -result-dir $(pwd)/vtune_data $(pwd)/cpu_gomp/out/cnn-inference-f32-cpp\n", + "\n", + "echo \"VTune Summary Report\"\n", + "vtune -report summary -result-dir $(pwd)/vtune_data -format html -report-output $(pwd)/summary.html\n", + "echo \"Done profiling\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Run VTune Amplifier to Collect Hotspots and Generate Report\n", + "Collect VTune Amplifier data and generate report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! chmod 755 vtune_collect.sh; if [ -x \"$(command -v qsub)\" ]; then ./q vtune_collect.sh; else ./vtune_collect.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Display VTune Amplifier Summary\n", + "Display VTune Amplifier summary report generated in HTML format" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import IFrame\n", + "IFrame(src='summary.html', width=960, height=600)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### do GPU profiling \n", + "The script vtune_collect.sh encapsulates the profiling command and flags that will generate the VTune Amplifier profiling results." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The profiling type is changed from hotspots to gpu-hotspots in below script to do basic GPU profiling." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile vtune_collect.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_gomp --force\n", + "type=gpu-hotspots\n", + "\n", + "rm -r $(pwd)/vtune_data_data\n", + "\n", + "echo \"VTune Collect $type\"\n", + "vtune -collect $type -result-dir $(pwd)/vtune_data_data $(pwd)/cpu_gomp/out/cnn-inference-f32-cpp\n", + "\n", + "echo \"VTune Summary Report\"\n", + "vtune -report summary -result-dir $(pwd)/vtune_data_data -format html -report-output $(pwd)/summary_gpu.html\n", + "echo \"Done profiling\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Run VTune Amplifier to Collect Hotspots and Generate Report\n", + "Collect VTune Amplifier data and generate report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! chmod 755 vtune_collect.sh; if [ -x \"$(command -v qsub)\" ]; then ./q vtune_collect.sh; else ./vtune_collect.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Display VTune Amplifier Summary\n", + "Display VTune Amplifier summary report generated in HTML format" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the VTune Amplifier summary page, the GPU is stalled/idle all the time. this sample doesn't utilize GPU at all." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "from IPython.display import IFrame\n", + "IFrame(src='summary_gpu.html', width=960, height=600)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 5 : Modifying the cnn_inference_f32.cpp code to support both CPU and GPU\n", + "\n", + "In this session, we will convert the above cnn_inference_f32.cpp to support both CPU and GPU and compile the sample with DPC++ instead of G++.\n", + "\n", + "There are three steps to do the code conversion from CPU to GPU for this sample.\n", + "\n", + "* Step 1 : change engine::kind from CPU to GPU\n", + "* Step 2 : implement a function to access GPU memory via SYCL buffer and its accessor\n", + "* Step 3 : write user's data into GPU memory via the implemented function from Step 2\n", + "\n", + "There is a cnn_inference_f32.patch file inside the src folder. It contains all the changes for porting CPU to GPU against the CPU-only version of cnn_inference_f32.cpp.\n", + "First we must patch the cnn_inference_f32.cpp under the lab folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!cd lab;patch < ../codes_for_ipynb/cnn_inference_f32.patch;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Users can check the source file using the following command." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!cat lab/cnn_inference_f32.cpp " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can find related modification in below cnn_inference_f32.cpp, and the modifications for each step are wrapped up with \">>>>>>\" and \"<<<<<<\".\n", + "\n", + "### step1 : change engine::kind from CPU to GPU\n", + "changing engine kind from cpu to gpu during engine instantiation.\n", + "* Before patching : engine eng(engine::kind::cpu, 0);\n", + "* After patching : engine eng(engine::kind::gpu, 0);\n", + "\n", + "### step 2 : implement a function to access GPU memory via SYCL buffer and its accessor\n", + "You can refer to the below function write_to_dnnl_memory for that.\n", + "overall, we use SYCL buffer and its accessor to access GPU memory.\n", + "auto buffer = mem.get_sycl_buffer();\n", + "auto dst = buffer.get_access();" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "+// ------ GPU code conversion --Step 2 >>>>>>\n", + "+// Read from handle, write to memory\n", + "+inline void write_to_dnnl_memory(void *handle, dnnl::memory &mem) {\n", + "+\n", + "+ dnnl::engine eng = mem.get_engine();\n", + "+ size_t size = mem.get_desc().get_size();\n", + "+\n", + "+ bool is_cpu_sycl = (DNNL_CPU_RUNTIME == DNNL_RUNTIME_SYCL\n", + "+ && eng.get_kind() == dnnl::engine::kind::cpu);\n", + "+ bool is_gpu_sycl = (DNNL_GPU_RUNTIME == DNNL_RUNTIME_SYCL\n", + "+ && eng.get_kind() == dnnl::engine::kind::gpu);\n", + "+ if (is_cpu_sycl || is_gpu_sycl) {\n", + "+\n", + "+ auto buffer = mem.get_sycl_buffer();\n", + "+ auto dst = buffer.get_access();\n", + "+ uint8_t *dst_ptr = dst.get_pointer();\n", + "+\n", + "+ if (!dst_ptr || !handle) {\n", + "+ std::cerr << \"memory is NULL\"\n", + "+ << \"\\n\";\n", + "+ return;\n", + "+ }\n", + "+ for (size_t i = 0; i < size; ++i)\n", + "+ dst_ptr[i] = ((uint8_t *)handle)[i];\n", + "+ return;\n", + "+ }\n", + "+\n", + "+ if (eng.get_kind() == dnnl::engine::kind::cpu) {\n", + "+ uint8_t *dst = static_cast(mem.get_data_handle());\n", + "+ if (!dst || !handle) {\n", + "+ std::cerr << \"memory is NULL\"\n", + "+ << \"\\n\";\n", + "+ return;\n", + "+ }\n", + "+ for (size_t i = 0; i < size; ++i)\n", + "+ dst[i] = ((uint8_t *)handle)[i];\n", + "+ return;\n", + "+ }\n", + "+\n", + "+ assert(!\"not expected\");\n", + "+}\n", + "+//<<<<<< ------ GPU code conversion --Step 2\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " #### Step 3 : write user's data into GPU memory via the implemented function from Step 2\n", + " For accessing user data in GPU memory, we can't use the host pointer to write data into that, but we use write_to_dnnl_memory function instead. Refer to the code snapshot below." + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + " auto user_src_memory = memory(\n", + "- { { conv1_src_tz }, dt::f32, tag::nchw }, eng, user_src.data());\n", + "+ { { conv1_src_tz }, dt::f32, tag::nchw }, eng);\n", + "+ write_to_dnnl_memory(user_src.data(), user_src_memory);\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build and Run with oneAPI DPC++ Compiler \n", + "For this AlexNet neural network topology for forward-pass inference sample on GPU, DPC++ is used as the compiler.\n", + "The following section guides you how to build with DPC++ and run on GPU." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Script - build.sh\n", + "The script **build.sh** encapsulates the compiler command and flags that will generate the exectuable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile build.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_dpcpp_gpu_dpcpp --force > /dev/null 2>&1\n", + "export EXAMPLE_ROOT=./lab/\n", + "mkdir dpcpp\n", + "cd dpcpp\n", + "cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp -DDNNL_CPU_RUNTIME=SYCL -DDNNL_GPU_RUNTIME=SYCL\n", + "make cnn-inference-f32-cpp\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you achieve an all-clear from your compilation, you execute your program on the DevCloud or in local environments.\n", + "\n", + "#### Script - run.sh\n", + "the script **run.sh** encapsulates the program for submission to the job queue for execution.\n", + "\n", + "By default, oneDNN Verbose log is disabled.\n", + "You can unmark #export DNNL_VERBOSE=1 to enable oneDNN verbose log." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile run.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_dpcpp_gpu_dpcpp --force > /dev/null 2>&1\n", + "echo \"########## Executing the run\"\n", + "#export DNNL_VERBOSE=1\n", + "./dpcpp/out/cnn-inference-f32-cpp gpu\n", + "echo \"########## Done with the run\"\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Submitting **build.sh** and **run.sh** to the job queue\n", + "Now we can submit the **build.sh** and **run.sh** to the job queue.\n", + "\n", + "##### NOTE - it is possible to execute any of the build and run commands in local environments.\n", + "To enable users to run their scripts both on the DevCloud or in local environments, this and subsequent training checks for the existence of the job submission command **qsub**. If the check fails it is assumed that build/run will be local." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!rm -rf dpcpp; chmod 755 q; chmod 755 build.sh; chmod 755 run.sh;if [ -x \"$(command -v qsub)\" ]; then ./q build.sh; ./q run.sh; else ./build.sh; ./run.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Enable oneDNN Verbose log and check the engine kind for each operation\n", + "gpu should be the engine kind for most of the operations, and you should be able to check the engine kind after \"dnnl_verbose,exec,\" for each operation.\n", + "Check this [link](https://oneapi-src.github.io/oneDNN/dev_guide_verbose.html) for a detailed explanation of oneDNN verbose log.\n", + "\n", + "Below is an example for oneDNN verbose log for convolution on GPU:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "dnnl_verbose,exec,gpu,convolution,ocl:gen9:blocked,forward_inference,src_f32::blocked:abcd:f0 wei_f32::blocked:Acdb16a:f0 bia_f32::blocked:a:f0 dst_f32::blocked:aBcd16b:f0,,alg:convolution_direct,mb1_ic3oc96_ih227oh55kh11sh4dh0ph0_iw227ow55kw11sw4dw0pw0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 6: Analyze performance with VTune Amplifier\n", + "Use the VTune Amplifier command line to analyze performace and display the summary" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### do CPU profiling first. \n", + "The script vtune_collect.sh encapsulates the profiling command and flags that will generate the VTune Amplifier profiling results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile vtune_collect.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_dpcpp_gpu_dpcpp --force\n", + "type=hotspots\n", + "\n", + "rm -r $(pwd)/vtune_data\n", + "\n", + "echo \"VTune Collect $type\"\n", + "vtune -collect $type -result-dir vtune_data $(pwd)/dpcpp/out/cnn-inference-f32-cpp gpu\n", + "\n", + "echo \"VTune Summary Report\"\n", + "vtune -report summary -result-dir $(pwd)/vtune_data -format html -report-output $(pwd)/summary.html\n", + "echo \"Done profiling\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Run VTune Amplifier to Collect Hotspots and Generate Report\n", + "Collect VTune Amplifier data and generate report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! chmod 755 vtune_collect.sh; if [ -x \"$(command -v qsub)\" ]; then ./q vtune_collect.sh; else ./vtune_collect.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Display VTune Amplifier Summary\n", + "Display VTune Amplifier summary report generated in HTML format" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import IFrame\n", + "IFrame(src='summary.html', width=960, height=600)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### do GPU profiling \n", + "The script vtune_collect.sh encapsulates the profiling command and flags that will generate the VTune Amplifier profiling results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile vtune_collect.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --force\n", + "type=gpu-hotspots\n", + "\n", + "rm -r $(pwd)/vtune_data_gpu\n", + "\n", + "echo \"VTune Collect $type\"\n", + "vtune -collect $type -result-dir $(pwd)/vtune_data_gpu $(pwd)/dpcpp/out/cnn-inference-f32-cpp gpu\n", + "\n", + "\n", + "echo \"VTune Summary Report\"\n", + "vtune -report summary -result-dir $(pwd)/vtune_data_gpu -format html -report-output $(pwd)/summary_gpu.html\n", + "echo \"Done profiling\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Run VTune Amplifier to Collect Hotspots and Generate Report\n", + "Collect VTune Amplifier data and generate report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! chmod 755 vtune_collect.sh; if [ -x \"$(command -v qsub)\" ]; then ./q vtune_collect.sh; else ./vtune_collect.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Display VTune Amplifier Summary\n", + "Display VTune Amplifier summary report generated in HTML format" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import IFrame\n", + "IFrame(src='summary_gpu.html', width=960, height=600)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here are the supported profiling types from VTune Amplifier.\n", + "\n", + "* type=hotspots\n", + "* type=memory-consumption\n", + "* type=uarch-exploration\n", + "* type=memory-access\n", + "* type=threading\n", + "* type=hpc-performance\n", + "* type=system-overview\n", + "* type=graphics-rendering\n", + "* type=io\n", + "* type=fpga-interaction\n", + "* type=gpu-offload\n", + "* type=gpu-hotspots\n", + "* type=throttling\n", + "* type=platform-profiler\n", + "* type=cpugpu-concurrency\n", + "* type=tsx-exploration\n", + "* type=tsx-hotspots\n", + "* type=sgx-hotspots\n", + "\n", + "For details of VTune Amplifier usage, refer to https://software.intel.com/en-us/oneapi/vtune-profiler" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 6: Offload Analysis with Advisor\n", + "Use Advisor command line to do offload analysis and display the summary" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Advisor Command-Line for collecting and reporting \"offload\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile advisor_offload.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --force\n", + "rm -rf advisor_offload\n", + "advixe-python $APM/collect.py advisor_offload --config gen9 -- $(pwd)/dpcpp/out/cnn-inference-f32-cpp gpu\n", + "advixe-python $APM/analyze.py advisor_offload --config gen9 --out-dir ./advisor_offload/report\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! chmod 755 advisor_offload.sh; if [ -x \"$(command -v qsub)\" ]; then ./q advisor_offload.sh; else ./advisor_offload.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Display Advisor \"offload\" report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import IFrame\n", + "IFrame(src='./advisor_offload/report/report.html', width=800, height=600)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 7: Advisor Roofline Analysis\n", + "This sections shows how to collect and generate a roofline report using Intel Advisor. Below is an Advisor-generated \"roofline\" report" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Advisor Command-Line for collecting and reporting \"roofline\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile advisor_roofline.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --force\n", + "export ADVIXE_EXPERIMENTAL=gpu-profiling\n", + "advixe-cl –collect=survey --enable-gpu-profiling --project-dir=./advisor_roofline --search-dir src:r=. -- $(pwd)/dpcpp/out/cnn-inference-f32-cpp gpu\n", + "advixe-cl –collect=tripcounts --stacks --flop --enable-gpu-profiling --project-dir=./advisor_roofline --search-dir src:r=. -- $(pwd)/dpcpp/out/cnn-inference-f32-cpp gpu\n", + "advixe-cl --report=roofline --gpu --project-dir=./advisor_roofline --report-output=./advisor_roofline/roofline.html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! chmod 755 advisor_roofline.sh; if [ -x \"$(command -v qsub)\" ]; then ./q advisor_roofline.sh; else ./advisor_roofline.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Display Advisor \"roofline\" report" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import IFrame\n", + "IFrame(src='./advisor_roofline/roofline.html', width=800, height=600)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***\n", + "# Summary\n", + "In this lab, the developer learned the following:\n", + "* How to port a oneDNN sample from CPU-only version to CPU&GPU version\n", + "* How to program a simple convolutional neural network by using oneDNN" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "525.6px", + "left": "28px", + "top": "137.8px", + "width": "301.109px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From aaaafafff867782ed770042178c8852ac6640d2d Mon Sep 17 00:00:00 2001 From: ltsai1 Date: Wed, 19 Aug 2020 19:09:49 -0700 Subject: [PATCH 02/11] fix a ciTest typo --- Libraries/oneDNN/dpcpp_interoperability/sample.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Libraries/oneDNN/dpcpp_interoperability/sample.json b/Libraries/oneDNN/dpcpp_interoperability/sample.json index a52693fdcf..0289379abe 100644 --- a/Libraries/oneDNN/dpcpp_interoperability/sample.json +++ b/Libraries/oneDNN/dpcpp_interoperability/sample.json @@ -8,7 +8,7 @@ "dependencies": ["oneDNN", "tbb"], "os": ["linux"], "builder": ["ide","cmake"], - "targetDevice": ["CPU", "GPU"] + "targetDevice": ["CPU", "GPU"], "ciTests": { "linux": [{ "env": ["source /opt/intel/oneapi/setvars.sh --dnnl-configuration=cpu_dpcpp_gpu_dpcpp --force" ], From 1181b85e616114b4c5b2e22bd4c6e39163306a9e Mon Sep 17 00:00:00 2001 From: ltsai1 Date: Thu, 20 Aug 2020 14:53:53 -0700 Subject: [PATCH 03/11] sync up with latest changes from vadim --- .../oneCCL/oneCCL_Getting_Started/License.txt | 20 +- .../oneCCL/oneCCL_Getting_Started/README.md | 6 +- .../dpcpp_interoperability/CMakeLists.txt | 13 +- .../oneDNN/dpcpp_interoperability/License.txt | 20 +- .../oneDNN/dpcpp_interoperability/README.md | 142 +++--- .../oneDNN/getting_started/CMakeLists.txt | 21 +- Libraries/oneDNN/getting_started/License.txt | 20 +- Libraries/oneDNN/getting_started/README.md | 218 ++++----- Libraries/oneDNN/simple_model/CMakeLists.txt | 21 +- Libraries/oneDNN/simple_model/License.txt | 20 +- Libraries/oneDNN/simple_model/README.md | 325 ++++--------- .../oneDNN/simple_model/simple_model.ipynb | 436 +----------------- 12 files changed, 298 insertions(+), 964 deletions(-) diff --git a/Libraries/oneCCL/oneCCL_Getting_Started/License.txt b/Libraries/oneCCL/oneCCL_Getting_Started/License.txt index 8b5e331b54..e63c6e13dc 100644 --- a/Libraries/oneCCL/oneCCL_Getting_Started/License.txt +++ b/Libraries/oneCCL/oneCCL_Getting_Started/License.txt @@ -1,19 +1,7 @@ -Copyright (c) 2020, Intel Corporation. All rights reserved. +Copyright Intel Corporation -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Libraries/oneCCL/oneCCL_Getting_Started/README.md b/Libraries/oneCCL/oneCCL_Getting_Started/README.md index 78e7ded1bd..0b77c073bf 100644 --- a/Libraries/oneCCL/oneCCL_Getting_Started/README.md +++ b/Libraries/oneCCL/oneCCL_Getting_Started/README.md @@ -22,7 +22,8 @@ Users can use JupyterLab from DevCloud via "One-click Login in", and download sa Once users are in the JupyterLab with download jupytered notebook samples, they can start following the steps without further installion needed. ## Purpose -The sample demonstrates how to compile the code with various oneCCL configurations in Intel oneAPI environment. +The samples implement the allreduce collective operation with oneCCL APIs. +With the samples users will learn how to compile the code with various oneCCL configurations in Intel oneAPI environment. ## License Those code samples are licensed under MIT license @@ -97,6 +98,9 @@ Users can rebuild the cpu_allreduce_cpp_test.cpp by typing "make cpu_allreduce_c > NOTE: The source file "sycl_allreduce_cpp_test.cpp" will be copied from ${INTEL_ONEAPI_INSTALL_FOLDER}/ccl/latest/examples/sycl to build/src/sycl folder. Users can rebuild the sycl_allreduce_cpp_test.cpp by typing "make sycl_allreduce_cpp_test" under build folder. +### Include Files +The include folder is located at ${CCL_ROOT}}\include on your development system". + ## Running the Sample ### on a Linux* System diff --git a/Libraries/oneDNN/dpcpp_interoperability/CMakeLists.txt b/Libraries/oneDNN/dpcpp_interoperability/CMakeLists.txt index 47f17001bb..4327bfb627 100644 --- a/Libraries/oneDNN/dpcpp_interoperability/CMakeLists.txt +++ b/Libraries/oneDNN/dpcpp_interoperability/CMakeLists.txt @@ -1,7 +1,12 @@ cmake_minimum_required(VERSION 2.8.11) -set(CMAKE_C_COMPILER "clang") -set(CMAKE_CXX_COMPILER "dpcpp") -project (oneDNN_SYCL_InterOp) +if("${CMAKE_CXX_COMPILER}" STREQUAL "") + set(CMAKE_C_COMPILER "clang") + set(CMAKE_CXX_COMPILER "dpcpp") +endif() +project (dpcpp_interoperability) file(COPY $ENV{DNNLROOT}/examples/sycl_interop.cpp DESTINATION src) file(COPY $ENV{DNNLROOT}/examples/CMakeLists.txt DESTINATION src) -add_subdirectory (${PROJECT_BINARY_DIR}/src out) +if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC) + file(COPY $ENV{DNNLROOT}/examples/template.vcxproj.user DESTINATION src) +endif() +add_subdirectory (${PROJECT_BINARY_DIR}/src bin) diff --git a/Libraries/oneDNN/dpcpp_interoperability/License.txt b/Libraries/oneDNN/dpcpp_interoperability/License.txt index 8b5e331b54..e63c6e13dc 100644 --- a/Libraries/oneDNN/dpcpp_interoperability/License.txt +++ b/Libraries/oneDNN/dpcpp_interoperability/License.txt @@ -1,19 +1,7 @@ -Copyright (c) 2020, Intel Corporation. All rights reserved. +Copyright Intel Corporation -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Libraries/oneDNN/dpcpp_interoperability/README.md b/Libraries/oneDNN/dpcpp_interoperability/README.md index 064e80ce72..5e26388738 100644 --- a/Libraries/oneDNN/dpcpp_interoperability/README.md +++ b/Libraries/oneDNN/dpcpp_interoperability/README.md @@ -1,6 +1,8 @@ # oneDNN DPC++ Interoperability Sample -This sample is implemented in DPC++ language and runs on CPU and GPU. +This C++ API example demonstrates programming for Intel(R) Processor Graphics with SYCL extensions API in oneDNN. +Users will know how to access SYCL buffer and queue via oneDNN SYCL interoperability interfaces, +and this interface also helps users to execute a custom SYCL kernel with oneDNN library. | Optimized for | Description | :--- | :--- @@ -10,8 +12,11 @@ This sample is implemented in DPC++ language and runs on CPU and GPU. | What you will learn | Using oneDNN in DPC++ application targeting Intel CPU or Intel GPU | Time to complete | 15 minutes -## What You Will Learn +## Purpose +This sample demonstrates programming for Intel(R) Processor Graphics with SYCL extensions API in oneDNN. + +With this sample you will learn: * How to create a GPU or CPU engine. * How to create a memory descriptor/object. * How to create a SYCL kernel for data initialization. @@ -22,107 +27,102 @@ This sample is implemented in DPC++ language and runs on CPU and GPU. * How to execute the primitive with the initialized memory. * How to validate the result through a host accessor. -## Pre-requisites - -The sample below require the following components, which are part of -Intel oneAPI Base Toolkit (Base Kit): +The sample executes on system's CPU by default and can be executed on Intel GPU +using a command line parameter `gpu`. -* Intel oneAPI Deep Neural Network Library (oneDNN) -* Intel oneAPI DPC++ Compiler -* Intel oneAPI Threading Building Blocks (oneTBB) -* Intel Graphics Compute Runtime for oneAPI Level Zero and OpenCL Driver +## Key Implementation Details -Refer to [Intel oneAPI Toolkits Installation Guide](https://software.intel.com/content/www/us/en/develop/articles/installation-guide-for-intel-oneapi-toolkits.html) -for instructions on installing these components. +This sample uses example file `${DNNLROOT}/examples/sycl_interop.cpp` +from oneDNN distribution. You can find this code in +[oneDNN Github repository](https://github.com/oneapi-src/oneDNN/blob/dev-v2/examples/sycl_interop.cpp). -## Building the sample for CPU and GPU +Detailed code walkthrough is available in [oneDNN developer guide](https://oneapi-src.github.io/oneDNN/v2/sycl_interop_cpp.html) -### on a Linux* System +## License -#### Using DPC++ Compiler +This code sample is licensed under MIT license. -When compiled with Intel oneAPI DPC++ Compiler this sample runs on Intel CPU -or Intel GPU. +## Building the sample for CPU and GPU -Start with a clean console environment. +### On a Linux System +Perform the following steps: +1. Setup oneAPI development environment ``` source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh ``` - -Specific oneDNN configuration may be selected with -`--dnnl-configuraition` option. Defailt configuration is `cpu_dpcpp_gpu_dpcpp`. - -Make sure that both the enviroments of compiler and oneDNN are properly set up -before you process following steps. -If setvars.sh complains "not found" for compiler or oneDNN, please check your -installation first. - +2. Build the program using `cmake` ``` -cd oneapi-toolkit/oneDNN/oneDNN_SYCL_InterOp -mkdir dpcpp -cd dpcpp -cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp -make sycl-interop-cpp +mkdir build +cd build +cmake .. +make +``` +3. Run the program +``` +./bin/sycl-interop-cpp ``` -> NOTE: The source file `sycl_interop.cpp` will be in `dpcpp/src` folder. -> You can rebuild the sample by typing `make` in `dpcpp` folder. - -## Running the Sample - -### on a Linux* System -Run the program on CPU: +### On a Windows* System Using Visual Studio* Version 2017 or Newer +Open "x64 Native Tools Command Prompt for VS2017" or +"x64 Native Tools Command Prompt for VS2019" and perform the following steps: +1. Setup oneAPI development environment ``` -./out/sycl-interop-cpp cpu +C:\Program Files (x86)\intel\oneapi\setvars.bat +``` +2. Build the program using `cmake` +``` +mkdir build +cd build +cmake -G "Visual Studio 16 2019" .. +cmake --build . ``` -Run the program on GPU +> Note: You can open the `dpcpp_interoperability.sln` in build folder to edit source +> code with Microsoft Visual Studio integrated development environment. +3. Run the program ``` -./out/sycl-interop-cpp gpu +./bin/Debug/sycl-interop-cpp.exe ``` -> NOTE: Zero Level runtime is enabled by default. Please make sure proper -> installation of Level Zero driver including level-zero-devel package following -> installation guide. If you still encounter runtime issue such as "could not -> create a primitive", please apply workaround to set SYCL_BE=PI_OPENCL before -> running a DPC++ program. To apply the workaround in this sample add -> `export SYCL_BE=PI_OPENCL` in CMakeLists.txt. After applying the worklaround, -> the sample will use OpenCL runtime instead. +### Include Files -### Example of Output +The include folder is located at ${DNNLROOT}\include on your development system". -#### on a Linux* System +## Running the Sample -Enable oneDNN verbose log: +### Running Samples In DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (CPU, GPU, FPGA) as well whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide (https://devcloud.intel.com/oneapi/get-started/base-toolkit/) -``` -export DNNL_VERBOSE=1 -``` +### Application Parameters -Run the program on CPU or GPU following [How to Run Session](#how-to-run). +You can specify target device for this sample using command line arguments: +* `cpu` (default) directs the application to run on system's CPU +* `gpu` directs the sample to run on Intel GPU -CPU Results: +> Note: When executed with `gpu` parameter the +> sample will return an error if there are no Intel GPUs are found in the system. -``` -dnnl_verbose,info,DNNL v1.90.1 (commit 9151ddc657e4c6775f17f3bcec46872e5fac47ee) -dnnl_verbose,info,Detected ISA is Intel AVX2 -dnnl_verbose,exec,cpu,eltwise,jit:avx2,forward_training,data_f32::blocked:abcd:f0 diff_undef::undef::f0,,alg:eltwise_relu alpha:0 beta:0,2x3x4x5,700.608 -Example passes -``` +You can get additional information during execution of this sample by setting +environment variable `DNNL_VERBOSE=1`. -GPU Results: +### Example of Output ``` -dnnl_verbose,info,DNNL v1.90.1 (commit 9151ddc657e4c6775f17f3bcec46872e5fac47ee) -dnnl_verbose,info,Detected ISA is Intel AVX2 -dnnl_verbose,exec,gpu,eltwise,ocl:ref:any,forward_training,data_f32::blocked:abcd:f0 diff_undef::undef::f0,,alg:eltwise_relu alpha:0 beta:0,2x3x4x5 -Example passes +Example passed on CPU. ``` -## Implementation Details +When executed with `DNNL_VERBOSE=1`: +``` +dnnl_verbose,info,oneDNN v1.95.0 (commit ae08a30fff7f76759fd4c5093c01707d0ee12c4c) +dnnl_verbose,info,cpu,runtime:DPC++ +dnnl_verbose,info,cpu,isa:Intel AVX2 +dnnl_verbose,info,gpu,runtime:DPC++ +dnnl_verbose,info,cpu,engine,0,backend:OpenCL,name:Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz,driver_version:2020.10.7 +dnnl_verbose,info,gpu,engine,0,backend:Level Zero,name:Intel(R) Gen12LP,driver_version:0.8.0 +dnnl_verbose,exec,cpu,eltwise,jit:avx2,forward_training,data_f32::blocked:abcd:f0 diff_undef::undef::f0,,alg:eltwise_relu alpha:0 beta:0,2x3x4x5,0.36499 +Example passed on CPU. +``` -This sample uses example code from oneDNN distribution. You can find this code -in [oneDNN Github repository](https://github.com/oneapi-src/oneDNN/blob/dev-v2/examples/sycl_interop.cpp). diff --git a/Libraries/oneDNN/getting_started/CMakeLists.txt b/Libraries/oneDNN/getting_started/CMakeLists.txt index 3799a98c7a..efbdd1aba1 100644 --- a/Libraries/oneDNN/getting_started/CMakeLists.txt +++ b/Libraries/oneDNN/getting_started/CMakeLists.txt @@ -1,17 +1,12 @@ cmake_minimum_required(VERSION 2.8.11) if("${CMAKE_CXX_COMPILER}" STREQUAL "") - set(CMAKE_C_COMPILER "clang") - set(CMAKE_CXX_COMPILER "dpcpp") + set(CMAKE_C_COMPILER "clang") + set(CMAKE_CXX_COMPILER "dpcpp") endif() -project (oneDNN_Getting_Started) -if("$ENV{EXAMPLE_ROOT}" STREQUAL "") - message(" - use default examples") - file(COPY $ENV{DNNLROOT}/examples/getting_started.cpp DESTINATION src) - file(COPY $ENV{DNNLROOT}/examples/CMakeLists.txt DESTINATION src) - if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC) - file(COPY $ENV{DNNLROOT}/examples/template.vcxproj.user DESTINATION src) - endif() - add_subdirectory (${PROJECT_BINARY_DIR}/src out) -else() - add_subdirectory ($ENV{EXAMPLE_ROOT} out) +project (getting_started) +file(COPY $ENV{DNNLROOT}/examples/getting_started.cpp DESTINATION src) +file(COPY $ENV{DNNLROOT}/examples/CMakeLists.txt DESTINATION src) +if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC) + file(COPY $ENV{DNNLROOT}/examples/template.vcxproj.user DESTINATION src) endif() +add_subdirectory (${PROJECT_BINARY_DIR}/src bin) diff --git a/Libraries/oneDNN/getting_started/License.txt b/Libraries/oneDNN/getting_started/License.txt index 8b5e331b54..e63c6e13dc 100644 --- a/Libraries/oneDNN/getting_started/License.txt +++ b/Libraries/oneDNN/getting_started/License.txt @@ -1,19 +1,7 @@ -Copyright (c) 2020, Intel Corporation. All rights reserved. +Copyright Intel Corporation -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Libraries/oneDNN/getting_started/README.md b/Libraries/oneDNN/getting_started/README.md index 8177c0429d..ed644fcc28 100644 --- a/Libraries/oneDNN/getting_started/README.md +++ b/Libraries/oneDNN/getting_started/README.md @@ -1,184 +1,152 @@ # oneDNN Getting Started Sample +oneAPI Deep Neural Network Library (oneDNN) is an open-source performance +library for deep learning applications. The library includes basic building +blocks for neural networks optimized for Intel Architecture Processors +and Intel Processor Graphics. oneDNN is intended for deep learning +applications and framework developers interested in improving application +performance on Intel CPUs and GPUs. +You can find library source code and code used by these samples at oneDNN Github repository. + This sample is implemented in C++ and executes on CPU or GPU. The sample also also includes [a Jupyer Notebook](getting_started.ipynb) that demonstrates how to compile the code with various oneDNN configurations in Intel oneAPI DevCloud environment. -| Optimized for | Description -| :--- | :--- -| OS | Linux Ubuntu 18.04; Windows 10 -| Hardware | Kaby Lake with GEN9 or newer -| Software | Intel oneAPI Deep Neural Network Library (oneDNN), Intel oneAPI DPC++ Compiler, Intel oneAPI Threading Building Blocks (oneTBB) -| What you will learn | basic oneDNN programming model for Intel CPU and GPU -| Time to complete | 15 minutes +| Optimized for | Description +| :--- | :--- +| OS | Linux* Ubuntu* 18.04; Windows 10 +| Hardware | Skylake with GEN9 or newer +| Software | Intel oneAPI Deep Neural Network Library (oneDNN), Intel oneAPI DPC++ Compiler, Intel oneAPI Threading Building Blocks (oneTBB), GNU Compiler Collection, Intel C++ Compiler +| What you will learn | Running a simple convolutional model on Intel CPU or Intel GPU +| Time to complete | 15 minutes -## What You Will Learn +## Purpose +This sample demonstrates the basics of oneDNN programming model. With this +sample you will learn: * How to create oneDNN memory objects. * How to get data from application buffer into a oneDNN memory object. * How tensor's logical dimensions and memory object formats relate. * How to create oneDNN primitives. * How to execute the primitives. -## Pre-requisites - -The sample below require the following components, which are part of -Intel oneAPI Base Toolkit (Base Kit): - -* Intel oneAPI Deep Neural Network Library (oneDNN) -* Intel oneAPI DPC++ Compiler -* Intel oneAPI Threading Building Blocks (oneTBB) -* Intel Graphics Compute Runtime for oneAPI Level Zero and OpenCL Driver +The sample executes on system's CPU by default and can be executed on Intel GPU +using a command line parameter `gpu`. -Refer to [Intel oneAPI Toolkits Installation Guide](https://software.intel.com/content/www/us/en/develop/articles/installation-guide-for-intel-oneapi-toolkits.html) -for instructions on installing these components. +## Key Implementation Details -## Building the sample for CPU and GPU - -### On a Linux* System +This sample uses example file `${DNNLROOT}/examples/getting_started.cpp` +from oneDNN distribution. You can find this code in +[oneDNN Github repository](https://github.com/oneapi-src/oneDNN/blob/dev-v2/examples/getting_started.cpp). -#### Using DPC++ Compiler +Detailed code walkthrough is available in [oneDNN developer guide](https://oneapi-src.github.io/oneDNN/v2/getting_started.html) -When compiled with Intel DPC++ Compiler this sample runs on Intel CPU -or Intel GPU and relies on Intel DPC++ Runtime for parallelism. +## License +This code sample is licensed under MIT license. +## Building the sample for CPU and GPU -Start with a clean console environment. +### On a Linux System +Perform the following steps: +1. Setup oneAPI development environment ``` source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh ``` - -Specific oneDNN configuration may be selected with -`--dnnl-configuraition` option. Defailt configuration is `cpu_dpcpp_gpu_dpcpp`. - -Make sure that both the enviroments of compiler and oneDNN are properly set up -before you process following steps. If setvars.sh complains "not found" for -compiler or oneDNN, please check your installation first. - +2. Build the program using `cmake` ``` -cd oneapi-toolkit/oneDNN/oneDNN_Getting_Started -mkdir dpcpp -cd dpcpp -cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp -make getting-started-cpp +mkdir build +cd build +cmake .. +make ``` - -> NOTE: The source file `getting_started.cpp` will be copied from ->`${INTEL_ONEAPI_INSTALL_FOLDER}/oneDNN/latest/dpcpp` to `dpcpp/src folder`. -> You can rebuild the sample by typing `make` in `dpcpp` folder. - -### On a Windows* System - -When compiler with Microsoft C++ Compiler the sample runs on Intel CPU and uses -Microsoft OpenMP runtime for parallelism. - -#### Visual Studio* Version 2015 or Newe - -Start with Intel oneAPI command prompt for Microsoft Visual Studio. - +3. Run the program ``` -C:\Program Files (x86)\intel\oneapi> oneDNN\latest\env\vars.bat --dnnl-configuration=cpu_vcomp +./bin/simple_model ``` -Make sure that both the enviroments of compiler and oneDNN are properly set up -before you process following steps. - +By default the sample uses oneAPI DPC++ Compiler and can execute on CPUs or +Intel GPUs. You can build the sample with CPU support with other compilers +and threading runtimes: +* GNU C++ Compiler and GNU OpenMP runtime ``` -cd oneapi-toolkit/oneDNN/oneDNN_Getting_Started -mkdir cpu_vcomp -cd cpu_vcomp -cmake -G "Visual Studio 16 2019" .. -cmake --build . +source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh --dnnl-configuration=cpu_gomp +CC=GCC CXX=g++ cmake .. ``` - -> NOTE: You can open the oneDNN_CNN.sln inside cpu_vcomp folder to edit source -> code with Microsoft Visual Studio integrated development environment. - -## Running the Sample - -### On a Linux* System - -Run the program on CPU - +* Intel C++ Compiler and Intel OpenMP runtime ``` -./out/getting-started-cpp cpu +source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh --dnnl-configuration=cpu_iomp +CC=icc CXX=icpc cmake .. ``` - -Run the program on GPU - +* Intel C++ Compiler and TBB runtime ``` -./out/getting-started-cpp gpu +source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh --dnnl-configuration=cpu_tbb +CC=icc CXX=icpc cmake .. ``` -> NOTE: Zero Level runtime is enabled by default. Please make sure proper -> installation of Level Zero driver including level-zero-devel package following -> installation guide. If you still encounter runtime issue such as "could not -> create a primitive", please apply workaround to set SYCL_BE=PI_OPENCL before -> running a DPC++ program. To apply the workaround in this sample add -> `export SYCL_BE=PI_OPENCL` in CMakeLists.txt. After applying the worklaround, -> the sample will use OpenCL runtime instead. - -### On a Windows* System - -Run the program on CPU +### On a Windows* System Using Visual Studio* Version 2017 or Newer +Open "x64 Native Tools Command Prompt for VS2017" or +"x64 Native Tools Command Prompt for VS2019" and perform the following steps: +1. Setup oneAPI development environment ``` -out\Debug\getting-started-cpp.exe +C:\Program Files (x86)\intel\oneapi\setvars.bat ``` - -### Example of Output - -#### On a Linux* System - -Enable oneDNN verbose log - +2. Build the program using `cmake` ``` -export DNNL_VERBOSE=1 +mkdir build +cd build +cmake -G "Visual Studio 16 2019" .. +cmake --build . ``` -Run the program on CPU or GPU following [How to Run Session](#how-to-run) +> Note: You can open the `getting_started.sln` in build folder to edit source +> code with Microsoft Visual Studio integrated development environment. -CPU Results: +3. Run the program ``` -dnnl_verbose,info,DNNL v1.90.1 (commit 9151ddc657e4c6775f17f3bcec46872e5fac47ee) -dnnl_verbose,info,Detected ISA is Intel AVX2 -dnnl_verbose,exec,cpu,eltwise,jit:avx2,forward_inference,data_f32::blocked:acdb:f0 diff_undef::undef::f0,,alg:eltwise_relu alpha:0 beta:0,1x3x13x13,704.982 -Example passes +./bin/Debug/getting_started.exe ``` -GPU Results: +### Include Files -``` -dnnl_verbose,info,DNNL v1.90.1 (commit 9151ddc657e4c6775f17f3bcec46872e5fac47ee) -dnnl_verbose,info,Detected ISA is Intel AVX2 -dnnl_verbose,exec,gpu,eltwise,ocl:ref:any,forward_inference,data_f32::blocked:acdb:f0 diff_undef::undef::f0,,alg:eltwise_relu alpha:0 beta:0,1x3x13x13 -Example passes -``` +The include folder is located at ${DNNLROOT}\include on your development system". -#### On a Windows* System +## Running the Sample -Enable oneDNN verbose log +### Running Samples In DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (CPU, GPU, FPGA) as well whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide (https://devcloud.intel.com/oneapi/get-started/base-toolkit/) -``` -set DNNL_VERBOSE=1 -``` +### Application Parameters -Run the program on CPU or GPU following [How to Run Session](#how-to-run). +You can specify target device for this sample using command line arguments: +* `cpu` (default) directs the application to run on system's CPU +* `gpu` directs the sample to run on Intel GPU -CPU Results: +> Note: When executed with `gpu` parameter the +> sample will return an error if the sample is compiled with oneDNN configuration +> that does not support GPU or no Intel GPUs are found in the system. + +You can get additional information during execution of this sample by setting +environment variable `DNNL_VERBOSE=1`. + +### Example of Output ``` -dnnl_verbose,info,DNNL v1.90.1 (commit 9151ddc657e4c6775f17f3bcec46872e5fac47ee) -dnnl_verbose,info,Detected ISA is Intel AVX2 -dnnl_verbose,exec,cpu,eltwise,jit:avx2,forward_inference,data_f32::blocked:acdb:f0 diff_undef::undef::f0,,alg:eltwise_relu alpha:0 beta:0,1x3x13x13,704.982 -Example passes +Example passed on CPU. ``` -## Implementation Details - -This sample uses example code from oneDNN distribution. You can find this code -in [oneDNN Github repository](https://github.com/oneapi-src/oneDNN/blob/dev-v2/examples/getting_started.cpp). +When executed with `DNNL_VERBOSE=1`: +``` +dnnl_verbose,info,oneDNN v1.95.0 (commit ae08a30fff7f76759fd4c5093c01707d0ee12c4c) +dnnl_verbose,info,cpu,runtime:DPC++ +dnnl_verbose,info,cpu,isa:Intel AVX2 +dnnl_verbose,info,gpu,runtime:DPC++ +dnnl_verbose,info,cpu,engine,0,backend:OpenCL,name:Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz,driver_version:2020.10.7 +dnnl_verbose,info,gpu,engine,0,backend:Level Zero,name:Intel(R) Gen12LP,driver_version:0.8.0 +dnnl_verbose,exec,cpu,eltwise,jit:avx2,forward_inference,data_f32::blocked:acdb:f0 diff_undef::undef::f0,,alg:eltwise_relu alpha:0 beta:0,1x3x13x13,0.125 +Example passed on CPU. +``` diff --git a/Libraries/oneDNN/simple_model/CMakeLists.txt b/Libraries/oneDNN/simple_model/CMakeLists.txt index d54283bd9a..82baa34153 100644 --- a/Libraries/oneDNN/simple_model/CMakeLists.txt +++ b/Libraries/oneDNN/simple_model/CMakeLists.txt @@ -1,17 +1,12 @@ cmake_minimum_required(VERSION 2.8.11) if("${CMAKE_CXX_COMPILER}" STREQUAL "") - set(CMAKE_C_COMPILER "clang") - set(CMAKE_CXX_COMPILER "dpcpp") + set(CMAKE_C_COMPILER "clang") + set(CMAKE_CXX_COMPILER "dpcpp") endif() -project (oneDNN_CNN) -if("$ENV{EXAMPLE_ROOT}" STREQUAL "") - message(" - use default examples") - file(COPY $ENV{DNNLROOT}/examples/cnn_inference_f32.cpp DESTINATION src) - file(COPY $ENV{DNNLROOT}/examples/CMakeLists.txt DESTINATION src) - if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC) - file(COPY $ENV{DNNLROOT}/examples/template.vcxproj.user DESTINATION src) - endif() - add_subdirectory (${PROJECT_BINARY_DIR}/src out) -else() - add_subdirectory ($ENV{EXAMPLE_ROOT} out) +project (simple_model) +file(COPY $ENV{DNNLROOT}/examples/cnn_inference_f32.cpp DESTINATION src) +file(COPY $ENV{DNNLROOT}/examples/CMakeLists.txt DESTINATION src) +if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC) + file(COPY $ENV{DNNLROOT}/examples/template.vcxproj.user DESTINATION src) endif() +add_subdirectory (${PROJECT_BINARY_DIR}/src bin) diff --git a/Libraries/oneDNN/simple_model/License.txt b/Libraries/oneDNN/simple_model/License.txt index 8b5e331b54..e63c6e13dc 100644 --- a/Libraries/oneDNN/simple_model/License.txt +++ b/Libraries/oneDNN/simple_model/License.txt @@ -1,19 +1,7 @@ -Copyright (c) 2020, Intel Corporation. All rights reserved. +Copyright Intel Corporation -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Libraries/oneDNN/simple_model/README.md b/Libraries/oneDNN/simple_model/README.md index bef7a82ceb..0053b89137 100644 --- a/Libraries/oneDNN/simple_model/README.md +++ b/Libraries/oneDNN/simple_model/README.md @@ -7,305 +7,150 @@ in Intel oneAPI DevCloud environment. | Optimized for | Description | :--- | :--- -| OS | Linux Ubuntu 18.04; Windows 10 -| Hardware | Kaby Lake with GEN9 or newer -| Software | Intel oneAPI Deep Neural Network Library (oneDNN), Intel oneAPI DPC++ Compiler, Intel oneAPI Threading Building Blocks (oneTBB), GNU Compiler , Intel C++ Compiler -| What you will learn | run a simple convolutional model on Intel CPU or Intel GPU +| OS | Linux* Ubuntu* 18.04; Windows 10 +| Hardware | Skylake with GEN9 or newer +| Software | Intel oneAPI Deep Neural Network Library (oneDNN), Intel oneAPI DPC++ Compiler, Intel oneAPI Threading Building Blocks (oneTBB), GNU Compiler Collection, Intel C++ Compiler +| What you will learn | Running a simple convolutional model on Intel CPU or Intel GPU | Time to complete | 15 minutes -## License - -This code sample is licensed under MIT license. +## Purpose -## What You Will Learn +This sample implements computational part of a convolutional neural network +based on [ImageNet Classification with Deep Convolutional Neural Networks by Alex Krizhevsky at al](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf). +The network consists of 15 layers including convolution, rectified linear +unit (ReLU), linear response normalization (LRN), and inner product. -* How to run a simple convolutional network on Intel CPU or Intel GPU. +With this sample you will learn: +* How to run a simple convolutional network on Intel CPU or Intel GPU * How to compile examples with Intel oneAPI DPC++ Compiler, Intel C++ Compiler, and GNU C++ Compiler * How to switch between OpenMP and TBB for CPU parallelization -* How tensors are implemented and submitted to primitives. -* How primitives are created. -* How primitives are sequentially submitted to the network, where the output -from primitives is passed as input to the next primitive. The latter specifies -a dependency between the primitive input and output data. -* Specific 'inference-only' configurations. -* Limiting the number of reorders performed that are detrimental to performance. +* How to describe tensors with oneDNN memory objects +* How to describe neural network layers with oneDNN primitives -## Pre-requisites +The sample executes on system's CPU by default and can be executed on Intel GPU +using a command line parameter `gpu`. -### Using Intel C++ Compiler +## Key Implementation Details -Using Intel C++ Compiler also requires the following component which is part of the [Intel oneAPI HPC Toolkit (HPC Kit)](https://software.intel.com/en-us/oneapi/hpc-kit) -* oneAPI Intel C++ Compiler +This sample uses example file `${DNNLROOT}/examples/cnn_inference_fp32.cpp` +from oneDNN distribution. You can find this code in +[oneDNN Github repository](https://github.com/oneapi-src/oneDNN/blob/dev-v2/examples/cnn_inference_f32.cpp). -### Using TBB for CPU parallelization +Detailed code walkthrough is available in [oneDNN developer guide](https://oneapi-src.github.io/oneDNN/v2/cnn_inference_f32_cpp.html) -Using Threading Building Blocks also requires the following component which is part of the [Intel oneAPI Base Toolkit (Base Kit)](https://software.intel.com/en-us/oneapi/oneapi-kit) -* Intel oneAPI Threading Building Blocks (oneTBB) - -### GPU and CPU - -The sample below require the following components which are part of the [Intel oneAPI Base Toolkit (Base Kit)](https://software.intel.com/en-us/oneapi/oneapi-kit) -* Intel oneAPI Deep Neural Network Library (oneDNN) -* Intel oneAPI DPC++ Compiler -* Intel oneAPI DPC++ Library (oneDPL) -* Intel oneAPI Threading Building Blocks (oneTBB) +## License -The sample also requires OpenCL driver. Please refer [System Requirements](https://software.intel.com/en-us/articles/intel-oneapi-base-toolkit-system-requirements) for OpenCL driver installation. +This code sample is licensed under MIT license. ## Building the sample for CPU and GPU -### CPU - -#### Using GNU C++ Compiler - -When compiled with GNU C++ Compiler this sample runs on Intel CPU and uses -GNU OpenMP runtime for parallelism. - -##### on a Linux* System - -Start with a clean console environment. +### On a Linux System +Perform the following steps: +1. Setup oneAPI development environment ``` -source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh --dnnl-configuration=cpu_gomp +source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh ``` - -Make sure that both the enviroments of compiler and oneDNN are properly set up -before you process following steps. If setvars.sh complains "not found" for -compiler or oneDNN, please check your installation first. - +2. Build the program using `cmake` ``` -cd oneapi-toolkit/oneDNN/oneDNN_CNN_INFERENCE_FP32 -mkdir cpu_gomp -cd cpu_gomp -cmake .. -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -make cnn-inference-f32-cpp +mkdir build +cd build +cmake .. +make ``` - -> NOTE: The source file `cnn_inference_f32.cpp` will be copied from -> `${INTEL_ONEAPI_INSTALL_FOLDER}/oneDNN/latest/cpu_gomp` to `cpu_gomp/src` folder. -> You can rebuild the sample by typing `make` in `cpu_gomp` folder. - -#### Using Intel C++ Compiler - -When compiled with Intel C++ Compiler this sample runs on Intel CPU and -uses Intel OpenMP for CPU parallelism. - -##### on a Linux* System - -Start with a clean console environment. - +3. Run the program ``` -source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh --dnnl-configuration=cpu_iomp +./bin/cnn-inference-f32-cpp ``` -Make sure that both the enviroments of compiler and oneDNN are properly set up -before you process following steps. If setvars.sh complains "not found" for -compiler or oneDNN, please check your installation first. - +By default the sample uses oneAPI DPC++ Compiler and can execute on CPUs or +Intel GPUs. You can build the sample with CPU support with other compilers +and threading runtimes: +* GNU C++ Compiler and GNU OpenMP runtime ``` -cd oneapi-toolkit/oneDNN/oneDNN_CNN_INFERENCE_FP32 -mkdir cpu_iomp -cd cpu_iomp -cmake .. -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_COMPILER=icpc -make cnn-inference-f32-cpp +source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh --dnnl-configuration=cpu_gomp +CC=GCC CXX=g++ cmake .. ``` - -> NOTE: The source file `cnn_inference_f32.cpp` will be copied from -> `${INTEL_ONEAPI_INSTALL_FOLDER}/oneDNN/latest/cpu_iomp` to `cpu_iomp/src` folder. -> You can rebuild the sample by typing `make` in `cpu_iomp` folder. - -#### Using TBB - -oneDNN supports both Intel OpenMP and TBB for CPU parallelization. -You can switch to TBB runtime using steps below. - -##### on a Linux* System - -Start with a clean console environment. - +* Intel C++ Compiler and Intel OpenMP runtime ``` -source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh --dnnl-configuration=cpu_tbb +source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh --dnnl-configuration=cpu_iomp +CC=icc CXX=icpc cmake .. ``` - -Make sure that both the enviroments of compiler and oneDNN are properly set up -before you process following steps. If setvars.sh complains "not found" for -compiler or oneDNN, please check your installation first. - +* Intel C++ Compiler and TBB runtime ``` -cd oneapi-toolkit/oneDNN/oneDNN_CNN_INFERENCE_FP32 -mkdir cpu_tbb -cd cpu_tbb -cmake .. -make cnn-inference-f32-cpp +source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh --dnnl-configuration=cpu_tbb +CC=icc CXX=icpc cmake .. ``` -> NOTE: The source file `cnn_inference_f32.cpp` will be copied from -> `${INTEL_ONEAPI_INSTALL_FOLDER}/oneDNN/latest/cpu_tbb` to `cpu_tbb/src` folder. -You can rebuild the sample by typing `make` in `cpu_tbb` folder. - -#### On a Windows* System - -When compiled with Microsoft Visual C++ Compiler this sample runs on Intel CPU -and uses Microsoft OpenMP runtime for parallelism. - - -Start with Intel oneAPI command prompt for Microsoft Visual Studio. +### On a Windows* System Using Visual Studio* Version 2017 or Newer +Open "x64 Native Tools Command Prompt for VS2017" or +"x64 Native Tools Command Prompt for VS2019" and perform the following steps: +1. Setup oneAPI development environment ``` -C:\Program Files (x86)\intel\oneapi> oneDNN\latest\env\vars.bat --dnnl-configuration=cpu_vcomp +C:\Program Files (x86)\intel\oneapi\setvars.bat ``` - -Make sure that both the enviroments of compiler and oneDNN are properly set up -before you process following steps. - +2. Build the program using `cmake` ``` -cd oneapi-toolkit/oneDNN/oneDNN_CNN_INFERENCE_FP32 -mkdir cpu_vcomp -cd cpu_vcomp +mkdir build +cd build cmake -G "Visual Studio 16 2019" .. cmake --build . ``` -> NOTE: You can open the oneDNN_CNN.sln inside cpu_vcomp folder to edit source +> Note: You can open the `simple_model.sln` in build folder to edit source > code with Microsoft Visual Studio integrated development environment. -## CPU and GPU - -### Using DPC++ Compiler - -By using DPC++ compiler, this sample supports CNN FP32 both on Intel CPU and GPU. - -#### on a Linux* System - -Start with a clean console environment. +### Include Files +The include folder is located at ${DNNLROOT}\include on your development system". +3. Run the program ``` -source ${INTEL_ONEAPI_INSTALL_FOLDER}/setvars.sh +./bin/Debug/cnn-inference-f32-cpp.exe ``` -Specific oneDNN configuration may be selected with -`--dnnl-configuraition` option. Defailt configuration is `cpu_dpcpp_gpu_dpcpp`. +### Include Files -Make sure that both the enviroments of compiler and oneDNN are properly set up -before you process following steps. If setvars.sh complains "not found" for -compiler or oneDNN, please check your installation first. +The include folder is located at ${DNNLROOT}\include on your development system". -``` -cd oneapi-toolkit/oneDNN/oneDNN_CNN_INFERENCE_FP32 -mkdir dpcpp -cd dpcpp -cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp -make cnn-inference-f32-cpp -``` +## Running the Sample -> NOTE: The source file `cnn_inference_f32.cpp` will be copied from -> `${INTEL_ONEAPI_INSTALL_FOLDER}/oneDNN/latest/dpcpp` to `dpcpp/src` folder. -You can rebuild the sample by typing `make` in `dpcpp` folder. +### Running Samples In DevCloud +If running a sample in the Intel DevCloud, remember that you must specify the compute node (CPU, GPU, FPGA) as well whether to run in batch or interactive mode. For more information see the Intel® oneAPI Base Toolkit Get Started Guide (https://devcloud.intel.com/oneapi/get-started/base-toolkit/) -## Running the sample +### Application Parameters -### on a Linux* System +You can specify target device for this sample using command line arguments: +* `cpu` (default) directs the application to run on system's CPU +* `gpu` directs the sample to run on Intel GPU -Run the program on CPU: +> Note: When executed with `gpu` parameter the +> sample will return an error if there are no Intel GPUs are found in the system. -``` -./out/cnn-inference-f32-cpp -``` - -Run the program on GPU: -``` -./out/cnn-inference-f32-cpp gpu -``` - -> NOTE: Zero Level runtime is enabled by default. Please make sure proper -> installation of zero level driver -> including level-zero-devel package following installation guide. -> If you still encounter runtime issue such as "could not create a primitive", -> Please apply workaround to set SYCL_BE=PI_OPENCL before running -> a DPC++ program. For applying the workaround in this sample, users can add -> `export SYCL_BE=PI_OPENCL` in CMakeLists.txt. After applying the worklaround, -> sample use OpenCL runtime instead. - -### On a Windows* System - -Run the program on CPU: - -``` -out\Debug\cnn-inference-f32-cpp.exe -``` +You can get additional information during execution of this sample by setting +environment variable `DNNL_VERBOSE=1`. ### Example of Output -#### on a Linux* System - -Enable oneDNN verbose log: - -``` -export DNNL_VERBOSE=1 -``` - -Run the program on CPU or GPU following [How to Run Session](#how-to-run). - -CPU Results: - ``` -dnnl_verbose,info,DNNL v1.90.1 (commit 9151ddc657e4c6775f17f3bcec46872e5fac47ee) -dnnl_verbose,info,Detected ISA is Intel AVX2 -... -/oneDNN VERBOSE LOGS/ -... -dnnl_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:aBcd8b:f0 dst_f32::blocked:abcd:f0,,,1x256x6x6,0.032959 -dnnl_verbose,exec,cpu,inner_product,gemm:jit,forward_inference,src_f32::blocked:abcd:f0 wei_f32::blocked:abcd:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic256ih6iw6oc4096,5.4458 -dnnl_verbose,exec,cpu,inner_product,gemm:jit,forward_inference,src_f32::blocked:ab:f0 wei_f32::blocked:ab:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic4096oc4096,2.50317 -dnnl_verbose,exec,cpu,inner_product,gemm:jit,forward_inference,src_f32::blocked:ab:f0 wei_f32::blocked:ab:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic4096oc1000,0.634033 -dnnl_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:ab:f0 dst_f32::blocked:ab:f0,,,1x1000,0.0290527 -Use time 33.22 +Use time: 28.84 ms per iteration. +Example passed on CPU. ``` -GPU Results: - +When executed with `DNNL_VERBOSE=1`: ``` -dnnl_verbose,info,DNNL v1.90.1 (commit 9151ddc657e4c6775f17f3bcec46872e5fac47ee) -dnnl_verbose,info,Detected ISA is Intel AVX2 -... -/DNNL VERBOSE LOGS/ +dnnl_verbose,info,oneDNN v1.95.0 (commit ae08a30fff7f76759fd4c5093c01707d0ee12c4c) +dnnl_verbose,info,cpu,runtime:DPC++ +dnnl_verbose,info,cpu,isa:Intel AVX2 +dnnl_verbose,info,gpu,runtime:DPC++ +dnnl_verbose,info,cpu,engine,0,backend:OpenCL,name:Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz,driver_version:2020.10.7 +dnnl_verbose,info,gpu,engine,0,backend:Level Zero,name:Intel(R) Gen12LP,driver_version:0.8.0 +dnnl_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32::blocked:Acdb8a:f0,,,96x3x11x11,0.24292 +dnnl_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcde:f0 dst_f32::blocked:aBCde8c8b:f0,,,2x128x48x5x5,0.26709 +dnnl_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32::blocked:ABcd8b8a:f0,,,384x256x3x3,1.16699 ... -dnnl_verbose,exec,gpu,reorder,ocl:simple:any,undef,src_f32::blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,,,1x256x6x6 -dnnl_verbose,exec,gpu,inner_product,ocl:gemm,forward_inference,src_f32::blocked:abcd:f0 wei_f32::blocked:abcd:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic256ih6iw6oc4096 -dnnl_verbose,exec,gpu,inner_product,ocl:gemm,forward_inference,src_f32::blocked:ab:f0 wei_f32::blocked:ab:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic4096oc4096 -dnnl_verbose,exec,gpu,inner_product,ocl:gemm,forward_inference,src_f32::blocked:ab:f0 wei_f32::blocked:ab:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic4096oc1000 -dnnl_verbose,exec,gpu,reorder,ocl:simple:any,undef,src_f32::blocked:ab:f0 dst_f32::blocked:ab:f0,,,1x1000 -Use time 106.29 -``` - -#### on a Windows* System - -Enable oneDNN verbose log: +Use time: 20.11 ms per iteration. +Example passed on CPU. ``` -set DNNL_VERBOSE=1 - -``` - -Run the program on CPU or GPU following [How to Run Session](#how-to-run). - -CPU Results: - -``` -dnnl_verbose,info,DNNL v1.90.1 (commit 9151ddc657e4c6775f17f3bcec46872e5fac47ee) -dnnl_verbose,info,Detected ISA is Intel AVX2 -... -/DNNL VERBOSE LOGS/ -... -dnnl_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:aBcd8b:f0 dst_f32::blocked:abcd:f0,,,1x256x6x6,0.032959 -dnnl_verbose,exec,cpu,inner_product,gemm:jit,forward_inference,src_f32::blocked:abcd:f0 wei_f32::blocked:abcd:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic256ih6iw6oc4096,5.4458 -dnnl_verbose,exec,cpu,inner_product,gemm:jit,forward_inference,src_f32::blocked:ab:f0 wei_f32::blocked:ab:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic4096oc4096,2.50317 -dnnl_verbose,exec,cpu,inner_product,gemm:jit,forward_inference,src_f32::blocked:ab:f0 wei_f32::blocked:ab:f0 bia_f32::blocked:a:f0 dst_f32::blocked:ab:f0,,,mb1ic4096oc1000,0.634033 -dnnl_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:ab:f0 dst_f32::blocked:ab:f0,,,1x1000,0.0290527 -Use time 33.22 -``` - -## Implementation Details -This sample uses example code from oneDNN distribution. You can find this code -in [oneDNN Github repository](https://github.com/oneapi-src/oneDNN/blob/dev-v2/examples/cnn_inference_f32.cpp). diff --git a/Libraries/oneDNN/simple_model/simple_model.ipynb b/Libraries/oneDNN/simple_model/simple_model.ipynb index 077562c61b..7a9e4333dc 100644 --- a/Libraries/oneDNN/simple_model/simple_model.ipynb +++ b/Libraries/oneDNN/simple_model/simple_model.ipynb @@ -1,4 +1,4 @@ -f{ +{ "cells": [ { "cell_type": "markdown", @@ -9,9 +9,7 @@ f{ "## Learning Objectives\n", "In this module the developer will:\n", "* Learn how to port a oneDNN sample from a CPU-only version to a CPU&GPU version by using DPC++\n", - "* Learn how to program a simple convolutional neural network by using oneDNN\n", - "* Learn how to collect VTune™ Amplifier data for CPU and GPU runs and compare performance results\n", - "\n" + "* Learn how to program a simple convolutional neural network by using oneDNN\n" ] }, { @@ -251,159 +249,7 @@ f{ "cell_type": "markdown", "metadata": {}, "source": [ - "## Step 4: Analyze performance with VTune Amplifier\n", - "Use the VTune™ Amplifier command line to analyze performance and display the summary" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### do CPU profiling first. \n", - "The script vtune_collect.sh encapsulates the profiling command and flags that will generate the VTune Amplifier profiling results." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%writefile vtune_collect.sh\n", - "#!/bin/bash\n", - "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_gomp --force\n", - "type=hotspots\n", - "\n", - "rm -r $(pwd)/vtune_data\n", - "\n", - "echo \"VTune Collect $type\"\n", - "vtune -collect $type -result-dir $(pwd)/vtune_data $(pwd)/cpu_gomp/out/cnn-inference-f32-cpp\n", - "\n", - "echo \"VTune Summary Report\"\n", - "vtune -report summary -result-dir $(pwd)/vtune_data -format html -report-output $(pwd)/summary.html\n", - "echo \"Done profiling\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Run VTune Amplifier to Collect Hotspots and Generate Report\n", - "Collect VTune Amplifier data and generate report" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! chmod 755 vtune_collect.sh; if [ -x \"$(command -v qsub)\" ]; then ./q vtune_collect.sh; else ./vtune_collect.sh; fi" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Display VTune Amplifier Summary\n", - "Display VTune Amplifier summary report generated in HTML format" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import IFrame\n", - "IFrame(src='summary.html', width=960, height=600)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### do GPU profiling \n", - "The script vtune_collect.sh encapsulates the profiling command and flags that will generate the VTune Amplifier profiling results." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The profiling type is changed from hotspots to gpu-hotspots in below script to do basic GPU profiling." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%writefile vtune_collect.sh\n", - "#!/bin/bash\n", - "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_gomp --force\n", - "type=gpu-hotspots\n", - "\n", - "rm -r $(pwd)/vtune_data_data\n", - "\n", - "echo \"VTune Collect $type\"\n", - "vtune -collect $type -result-dir $(pwd)/vtune_data_data $(pwd)/cpu_gomp/out/cnn-inference-f32-cpp\n", - "\n", - "echo \"VTune Summary Report\"\n", - "vtune -report summary -result-dir $(pwd)/vtune_data_data -format html -report-output $(pwd)/summary_gpu.html\n", - "echo \"Done profiling\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Run VTune Amplifier to Collect Hotspots and Generate Report\n", - "Collect VTune Amplifier data and generate report" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! chmod 755 vtune_collect.sh; if [ -x \"$(command -v qsub)\" ]; then ./q vtune_collect.sh; else ./vtune_collect.sh; fi" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Display VTune Amplifier Summary\n", - "Display VTune Amplifier summary report generated in HTML format" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the VTune Amplifier summary page, the GPU is stalled/idle all the time. this sample doesn't utilize GPU at all." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "from IPython.display import IFrame\n", - "IFrame(src='summary_gpu.html', width=960, height=600)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 5 : Modifying the cnn_inference_f32.cpp code to support both CPU and GPU\n", + "## Step 4 : Modifying the cnn_inference_f32.cpp code to support both CPU and GPU\n", "\n", "In this session, we will convert the above cnn_inference_f32.cpp to support both CPU and GPU and compile the sample with DPC++ instead of G++.\n", "\n", @@ -627,282 +473,6 @@ f{ "dnnl_verbose,exec,gpu,convolution,ocl:gen9:blocked,forward_inference,src_f32::blocked:abcd:f0 wei_f32::blocked:Acdb16a:f0 bia_f32::blocked:a:f0 dst_f32::blocked:aBcd16b:f0,,alg:convolution_direct,mb1_ic3oc96_ih227oh55kh11sh4dh0ph0_iw227ow55kw11sw4dw0pw0" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 6: Analyze performance with VTune Amplifier\n", - "Use the VTune Amplifier command line to analyze performace and display the summary" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### do CPU profiling first. \n", - "The script vtune_collect.sh encapsulates the profiling command and flags that will generate the VTune Amplifier profiling results." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%writefile vtune_collect.sh\n", - "#!/bin/bash\n", - "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_dpcpp_gpu_dpcpp --force\n", - "type=hotspots\n", - "\n", - "rm -r $(pwd)/vtune_data\n", - "\n", - "echo \"VTune Collect $type\"\n", - "vtune -collect $type -result-dir vtune_data $(pwd)/dpcpp/out/cnn-inference-f32-cpp gpu\n", - "\n", - "echo \"VTune Summary Report\"\n", - "vtune -report summary -result-dir $(pwd)/vtune_data -format html -report-output $(pwd)/summary.html\n", - "echo \"Done profiling\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Run VTune Amplifier to Collect Hotspots and Generate Report\n", - "Collect VTune Amplifier data and generate report" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! chmod 755 vtune_collect.sh; if [ -x \"$(command -v qsub)\" ]; then ./q vtune_collect.sh; else ./vtune_collect.sh; fi" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Display VTune Amplifier Summary\n", - "Display VTune Amplifier summary report generated in HTML format" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import IFrame\n", - "IFrame(src='summary.html', width=960, height=600)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### do GPU profiling \n", - "The script vtune_collect.sh encapsulates the profiling command and flags that will generate the VTune Amplifier profiling results." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%writefile vtune_collect.sh\n", - "#!/bin/bash\n", - "source $ONEAPI_INSTALL/setvars.sh --force\n", - "type=gpu-hotspots\n", - "\n", - "rm -r $(pwd)/vtune_data_gpu\n", - "\n", - "echo \"VTune Collect $type\"\n", - "vtune -collect $type -result-dir $(pwd)/vtune_data_gpu $(pwd)/dpcpp/out/cnn-inference-f32-cpp gpu\n", - "\n", - "\n", - "echo \"VTune Summary Report\"\n", - "vtune -report summary -result-dir $(pwd)/vtune_data_gpu -format html -report-output $(pwd)/summary_gpu.html\n", - "echo \"Done profiling\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Run VTune Amplifier to Collect Hotspots and Generate Report\n", - "Collect VTune Amplifier data and generate report" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! chmod 755 vtune_collect.sh; if [ -x \"$(command -v qsub)\" ]; then ./q vtune_collect.sh; else ./vtune_collect.sh; fi" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Display VTune Amplifier Summary\n", - "Display VTune Amplifier summary report generated in HTML format" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import IFrame\n", - "IFrame(src='summary_gpu.html', width=960, height=600)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here are the supported profiling types from VTune Amplifier.\n", - "\n", - "* type=hotspots\n", - "* type=memory-consumption\n", - "* type=uarch-exploration\n", - "* type=memory-access\n", - "* type=threading\n", - "* type=hpc-performance\n", - "* type=system-overview\n", - "* type=graphics-rendering\n", - "* type=io\n", - "* type=fpga-interaction\n", - "* type=gpu-offload\n", - "* type=gpu-hotspots\n", - "* type=throttling\n", - "* type=platform-profiler\n", - "* type=cpugpu-concurrency\n", - "* type=tsx-exploration\n", - "* type=tsx-hotspots\n", - "* type=sgx-hotspots\n", - "\n", - "For details of VTune Amplifier usage, refer to https://software.intel.com/en-us/oneapi/vtune-profiler" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 6: Offload Analysis with Advisor\n", - "Use Advisor command line to do offload analysis and display the summary" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Advisor Command-Line for collecting and reporting \"offload\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%writefile advisor_offload.sh\n", - "#!/bin/bash\n", - "source $ONEAPI_INSTALL/setvars.sh --force\n", - "rm -rf advisor_offload\n", - "advixe-python $APM/collect.py advisor_offload --config gen9 -- $(pwd)/dpcpp/out/cnn-inference-f32-cpp gpu\n", - "advixe-python $APM/analyze.py advisor_offload --config gen9 --out-dir ./advisor_offload/report\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! chmod 755 advisor_offload.sh; if [ -x \"$(command -v qsub)\" ]; then ./q advisor_offload.sh; else ./advisor_offload.sh; fi" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Display Advisor \"offload\" report" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import IFrame\n", - "IFrame(src='./advisor_offload/report/report.html', width=800, height=600)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Step 7: Advisor Roofline Analysis\n", - "This sections shows how to collect and generate a roofline report using Intel Advisor. Below is an Advisor-generated \"roofline\" report" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Advisor Command-Line for collecting and reporting \"roofline\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%writefile advisor_roofline.sh\n", - "#!/bin/bash\n", - "source $ONEAPI_INSTALL/setvars.sh --force\n", - "export ADVIXE_EXPERIMENTAL=gpu-profiling\n", - "advixe-cl –collect=survey --enable-gpu-profiling --project-dir=./advisor_roofline --search-dir src:r=. -- $(pwd)/dpcpp/out/cnn-inference-f32-cpp gpu\n", - "advixe-cl –collect=tripcounts --stacks --flop --enable-gpu-profiling --project-dir=./advisor_roofline --search-dir src:r=. -- $(pwd)/dpcpp/out/cnn-inference-f32-cpp gpu\n", - "advixe-cl --report=roofline --gpu --project-dir=./advisor_roofline --report-output=./advisor_roofline/roofline.html" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! chmod 755 advisor_roofline.sh; if [ -x \"$(command -v qsub)\" ]; then ./q advisor_roofline.sh; else ./advisor_roofline.sh; fi" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Display Advisor \"roofline\" report" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from IPython.display import IFrame\n", - "IFrame(src='./advisor_roofline/roofline.html', width=800, height=600)" - ] - }, { "cell_type": "markdown", "metadata": {}, From 45221ec27a4883bdd766ea5d3c0c71817f37fe6f Mon Sep 17 00:00:00 2001 From: ltsai1 Date: Thu, 20 Aug 2020 15:07:36 -0700 Subject: [PATCH 04/11] add two new onednn tutorials and move all notebooks together --- Libraries/oneDNN/simple_model/q | 32 - Libraries/oneDNN/tutorials/CMakeLists.txt | 12 + Libraries/oneDNN/tutorials/License.txt | 7 + Libraries/oneDNN/tutorials/README.md | 19 + .../codes_for_ipynb/cnn_inference_f32.cpp | 0 .../codes_for_ipynb/cnn_inference_f32.patch | 0 .../getting_started.ipynb | 0 Libraries/oneDNN/tutorials/images/bf16.JPG | Bin 0 -> 29853 bytes Libraries/oneDNN/tutorials/images/cpu.JPG | Bin 0 -> 29189 bytes Libraries/oneDNN/tutorials/images/cpu_jit.JPG | Bin 0 -> 27128 bytes Libraries/oneDNN/tutorials/images/gpu.JPG | Bin 0 -> 29101 bytes .../oneDNN/tutorials/images/gpu_kernel.JPG | Bin 0 -> 29464 bytes Libraries/oneDNN/tutorials/images/vnni.JPG | Bin 0 -> 24263 bytes ...N_Analyze_ISA_with_DispatcherControl.ipynb | 872 ++++++++++++++++++ ...oneDNN_Profiling_VerboseMode_JITDump.ipynb | 755 +++++++++++++++ .../oneDNN/tutorials/profiling/README.md | 25 + .../oneDNN/tutorials/profiling/__init__.py | 0 .../tutorials/profiling/profile_utils.py | 206 +++++ .../oneDNN/{getting_started => tutorials}/q | 13 +- Libraries/oneDNN/tutorials/requirements.txt | 5 + Libraries/oneDNN/tutorials/sample.json | 11 + .../simple_model.ipynb | 0 22 files changed, 1923 insertions(+), 34 deletions(-) delete mode 100755 Libraries/oneDNN/simple_model/q create mode 100644 Libraries/oneDNN/tutorials/CMakeLists.txt create mode 100644 Libraries/oneDNN/tutorials/License.txt create mode 100644 Libraries/oneDNN/tutorials/README.md rename Libraries/oneDNN/{simple_model => tutorials}/codes_for_ipynb/cnn_inference_f32.cpp (100%) rename Libraries/oneDNN/{simple_model => tutorials}/codes_for_ipynb/cnn_inference_f32.patch (100%) rename Libraries/oneDNN/{getting_started => tutorials}/getting_started.ipynb (100%) create mode 100644 Libraries/oneDNN/tutorials/images/bf16.JPG create mode 100644 Libraries/oneDNN/tutorials/images/cpu.JPG create mode 100644 Libraries/oneDNN/tutorials/images/cpu_jit.JPG create mode 100644 Libraries/oneDNN/tutorials/images/gpu.JPG create mode 100644 Libraries/oneDNN/tutorials/images/gpu_kernel.JPG create mode 100644 Libraries/oneDNN/tutorials/images/vnni.JPG create mode 100644 Libraries/oneDNN/tutorials/oneDNN_Analyze_ISA_with_DispatcherControl.ipynb create mode 100644 Libraries/oneDNN/tutorials/oneDNN_Profiling_VerboseMode_JITDump.ipynb create mode 100644 Libraries/oneDNN/tutorials/profiling/README.md create mode 100644 Libraries/oneDNN/tutorials/profiling/__init__.py create mode 100755 Libraries/oneDNN/tutorials/profiling/profile_utils.py rename Libraries/oneDNN/{getting_started => tutorials}/q (77%) create mode 100644 Libraries/oneDNN/tutorials/requirements.txt create mode 100644 Libraries/oneDNN/tutorials/sample.json rename Libraries/oneDNN/{simple_model => tutorials}/simple_model.ipynb (100%) diff --git a/Libraries/oneDNN/simple_model/q b/Libraries/oneDNN/simple_model/q deleted file mode 100755 index 8377675780..0000000000 --- a/Libraries/oneDNN/simple_model/q +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash -#======================================== -# Script to submit job in Intel devcloud -# -# Version: 0.5 -#======================================== -if [ -z "$1" ]; then - echo "Missing script argument, Usage: ./q run.sh" -elif [ ! -f "$1" ]; then - echo "File $1 does not exist" -else - script=$1 - rm *.sh.* > /dev/null 2>&1 - #qsub - echo "Submitting job:" - qsub -l nodes=1:gpu:ppn=2 -d . $script - #qstat - qstat - #wait for output file to be generated and display - echo -ne "Waiting for Output." - until [ -f $script.o* ]; do - sleep 1 - echo -ne "." - ((timeout++)) - if [ $timeout == 60 ]; then - echo "TimeOut 60 seconds: Job is still queued for execution, check for output file later (*.sh.o)" - break - fi - done - cat $script.o* - cat $script.e* -fi diff --git a/Libraries/oneDNN/tutorials/CMakeLists.txt b/Libraries/oneDNN/tutorials/CMakeLists.txt new file mode 100644 index 0000000000..4c8993ce6b --- /dev/null +++ b/Libraries/oneDNN/tutorials/CMakeLists.txt @@ -0,0 +1,12 @@ +cmake_minimum_required(VERSION 2.8.11) +if("${CMAKE_CXX_COMPILER}" STREQUAL "") + set(CMAKE_C_COMPILER "clang") + set(CMAKE_CXX_COMPILER "dpcpp") +endif() +project (oneDNN) +if("$ENV{EXAMPLE_ROOT}" STREQUAL "") + message(" - use default examples") + add_subdirectory ($ENV{DNNLROOT}/examples out) +else() + add_subdirectory ($ENV{EXAMPLE_ROOT} out) +endif() diff --git a/Libraries/oneDNN/tutorials/License.txt b/Libraries/oneDNN/tutorials/License.txt new file mode 100644 index 0000000000..e63c6e13dc --- /dev/null +++ b/Libraries/oneDNN/tutorials/License.txt @@ -0,0 +1,7 @@ +Copyright Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Libraries/oneDNN/tutorials/README.md b/Libraries/oneDNN/tutorials/README.md new file mode 100644 index 0000000000..6755ed53b8 --- /dev/null +++ b/Libraries/oneDNN/tutorials/README.md @@ -0,0 +1,19 @@ +# Intel oneAPI Deep Neural Network Library (oneDNN) + +Deep Neural Networks Library for Deep Neural Networks (oneDNN) is an open-source performance library for deep learning applications. The library includes basic building blocks for neural networks optimized for Intel Architecture Processors and Intel Processor Graphics. oneDNN is intended for deep learning applications and framework developers interested in improving application performance on Intel CPUs and GPUs + +Github : https://github.com/oneapi-src/oneDNN + +## License +The code samples are licensed under MIT license + +# oneDNN Tutorials + +| Type | Name | Description | +| --------- | ----------------------- | ------------------------------------------------------------ | +| Component | oneDNN_Profiling_VerboseMode_JITDump.ipynb | This Jupyter Notebook demonstrates how to use Verbose Mode and JIT Dump to profile oneDNN samples. | +| Component | oneDNN_Analyze_ISA_with_DispatcherControl.ipynb | This Jupyter Notebook demonstrates how to use CPU Dispatch Control to generate JIT codes among different ISA on CPU and also analyze JIT kernels among ISAs.| +> Notice : Please use Intel oneAPI DevCloud as the environment for jupyter notebook samples. \ +Users can refer to [DevCloud Getting Started](https://devcloud.intel.com/oneapi/get-started/) for using DevCloud \ +Users can use JupyterLab from DevCloud via "One-click Login in", and download samples via "git clone" or the "oneapi-cli" tool \ +Once users are in the JupyterLab with downloaded jupyter notebook samples, they can start following the steps without further installion needed. diff --git a/Libraries/oneDNN/simple_model/codes_for_ipynb/cnn_inference_f32.cpp b/Libraries/oneDNN/tutorials/codes_for_ipynb/cnn_inference_f32.cpp similarity index 100% rename from Libraries/oneDNN/simple_model/codes_for_ipynb/cnn_inference_f32.cpp rename to Libraries/oneDNN/tutorials/codes_for_ipynb/cnn_inference_f32.cpp diff --git a/Libraries/oneDNN/simple_model/codes_for_ipynb/cnn_inference_f32.patch b/Libraries/oneDNN/tutorials/codes_for_ipynb/cnn_inference_f32.patch similarity index 100% rename from Libraries/oneDNN/simple_model/codes_for_ipynb/cnn_inference_f32.patch rename to Libraries/oneDNN/tutorials/codes_for_ipynb/cnn_inference_f32.patch diff --git a/Libraries/oneDNN/getting_started/getting_started.ipynb b/Libraries/oneDNN/tutorials/getting_started.ipynb similarity index 100% rename from Libraries/oneDNN/getting_started/getting_started.ipynb rename to Libraries/oneDNN/tutorials/getting_started.ipynb diff --git a/Libraries/oneDNN/tutorials/images/bf16.JPG b/Libraries/oneDNN/tutorials/images/bf16.JPG new file mode 100644 index 0000000000000000000000000000000000000000..cb82831853c27438a5ec900c18c7db86e7725577 GIT binary patch literal 29853 zcmeFZ1z23kwl>@Z2n0w7flhD;?he5rA$UmRF2UVhn~*?of?I&#PU9}Y-7R=w!gNBqjt-dG<5Li`B-6Pv&S zSVP)JgrD=3VWB_e;aOQhps=uC^1m<(Q?~HqbU;*U{5sr`OSD)uU%- z*3+if)?;O+*JEYXd&bVp$zs5)L-P0L^>lx3-OAp^?5DQ%bQ$%{^v(4x>}&xvm>EeJ z|JLchs!af*pBw#6_<=D3Ww?cG^tJ8uUjm8$`}EQ?J)>t~k^je(GqAEU@iP9Z953T{ zF#f5de{Iiy7h3$hdb-@Y-}`Q9^S9>Z^sWDUvu$R~|4(%KZ-WHx{VfC34v5~pd-r%5 z|GDA+h{R8K0LcTs_p_V=-z5J3;Ps=CKLqj*xPHL(hYAl>1c6@8Ce-=SSV>|X}(tihlGrbat{Ry6%~sH9~+7{Q&dtFStAK2#9x)knf?O0tLz*g6_b< z!{0%GM?^#bjxOQcfzLq*7>JLaGC#kIDWi=mJJE zC)hZ+%`>z z!s62M%Iezs#{R+K(ecUY+4;ryalwJ$|2C|jNA|06VF2U0gMa{!fb@M_aCaPm1doA$ z_>}qXqvtY6+SZsPEN_vqUW6tWHQpm-mE8mD*bJgPCS#i?-~T?epGNlk2IlprM)vc- zei_#!2n`+%m^^q4kO1gH#GmT(oxd>MrQ2uSQjsnfo5VH@F10l zr#$g$w~K0biJ9@#5xOJZ26+`HP;6;=bAQUD`X-sYb<0IwR<&23y|siUX5{AZQ7Gvu zmu}#NG*UL3ueVTfwyrnAB4q*6^6P>-*xOgnp1d3Cn%lmgJ4*r~KRTE$HQWm4KkEs0 z;84OL@B_b?YrA2H8*94k7?P^HJ}(R=Vi4?*G*>~jY{Y3(A_@Yz*n?M}B|Ws~BZbtL zjg0jk^O#Gq?OS6#a;9{-hv37cQmIHp#pgP<>QxJ|hh5jBE{l_ugua*&GNrGz%|-Ki z^y=et1VV@jXd?^uc%Qtr|0Ft)fw+8JzqZuMoY5SHW4_fP>&WdKL&F_fR18*HK?hdd?i&kYR_};#dGqY>qcMIR$b1h z85W0*p`=hBBx1P0V|IOBs6A33of&!k@a{GvjBtQF|I=VC0xc7TB015{LqYiPZ9&5K zVPpX@6Uv4-a&fpXrD`UWtiN=&LDHQ=ntMGqb}m+hhElx|^N&h2d#%u^)Vzb9<4|7C zASLHiRG(D9Zb41dc^dR##!A(tvTDt^OmXCWO16)m`Vn{qx(*=a%VOn2B^e`IT^+L_XO`X$y?s*&}l5Ni#{d-kd;q4{{% z%Z^n4rIJ>bw%0Rjt;&Iif$s$I>|}&NSpR=dffC6y>Av_>yOP2nYyQW34rJA%Wi`h1 zFpC0<8S}uLS#>$F-m>zg$BkQJ`2hmR6BB19=BV3BC0{!dW%Gu7m?=QWAK*c7yJ{oS zcNO|Sm{~LwUCpuHkR9I3FPv*3F{-!JitJxhNZxaZMhuEfalX-r{8|&h*dK%E$ZNwo zS&{V_4KE4F*ikBrH_)Z< z$XXRk8j05Z8!F!A%nRG>jgO+)L`S(s7GRwST?C8@PSW7AT>bf80m#VY$ry(yN4IH- zV+X0GC(3AwC)Xt-lFIt2mm1IZ3tD}LP_viK9f*FE%Wc+SPQ7v!XSQi-v#PHuo^2CE z4r@%Ql`eMsb(nqAO`dg$+obyZz+tKpSL)Nr;pF*Vzopf1UMB@kh5HgEe)dsL(G}NV zN<&$tIIOmYC#g(L^}M;$`!FfedU%R`OhyoYL=$01dt_`~uZEetS%W+@`t%;HUNmap zQW!+tRz~xRd%$vB6Ep1=v@gIM-&CxN_~7|#eoetEZ+m&^yYe0cg&$-~>H@jD_07yv z;OaF#jJS+Uas-(s8_($!%`H2JFz(_}GPH(lNNJ^bqEP)vKQQ=zR0e2*ZYna2L!%q& zy$}f;D{mCqO&YM>1azy+-j5HV@7&LU;`+(I_n@$sXVFpQ|L54NEpeF<8?zW zdLTL&ekHs238yc$;ijrh?Of4r4u|&{&h_}D{)LF>Aq*iUd69-7bbcClrsD7x>o%lY zkY)wlm0W>x=!v=Fy5L%dtkMCm+|7Xaq>KP}&pz88=fr$;ZR+@vJD#vp*@xC3hp9XJ zZ)ym-DTZ%B!3))<=mC>vUva%Zjjh%^ofV*&8M=CKVRsHESh6Usk1fujuXBEwRDnia2C3;bZt6h;9a9RXbuH&3;I8H0J zWE@+1kma@o31C-b zV?>kvMtU-jzUH4!R)o;C^~u}bg5Y{JhRe(al*%tfVx8|#^OQS>a%v~N<{3{EXiktR z9W6i9)u@(NJPTHQzCxF`42SRC3_@lfJaZH@=jYOo7dco>T-Ud?&8nNagJ~4*y0r9w ziAVZwizeBiVcZ6ly`)muAg-kMqrjHbY)e7v@>$A(UGU_%Vo9W~y(l#kk4i8Wk&USr z6wxVeS31tMxo)!WWF{l*dPbim`(8QgK^qdzm(zR=NHev$C2zG~@Ek+~bIW31uUHHl zpm*h*Y;aSpj+X2#141mkg%8Pv%_7L|z!6ju->|dWYThZXf7gsLyD11YF_Ai{sb}`_ zc(HgL*>nRQ8h{hVlK$UG;DkKWNrQK$s~Z*8Qw99e?JJg_H;g&LYG@utEO&z)j7D9% zFQvHQ*^he^_d^y!Jz=E#PugOa4L>xOH<7I|P~az}jK`a}jHTxF8H~WlD|N3=77q#} zVk$+&Hrxp@KNNx8H~J$zC`&ezaM(=m53VzHW=U|~KPA2D7f5%WE!!7uiK|~yi!6D6 z3qq`349V^+;P#(SBuTqSlJV)>b8?rrxU7SIdjzp&@9~lJ(^TS9`C4>s!cCQju0mG& z{I`LFi-I2+xSL0upC=)L0y`wT1*y8z-btgim&Uq*$BOKwSGih^RlI29$k#lu+y|CV z=ogwIMMQA~JCX{v2As`F@jK(vp&y~$Tpm_Xep3|xtS%YHfFkQppLd_=jLF+2?&)XmB*%2oGc{W|pdUE3&j?LdCc|)+D?W-N1mOh-X5OtaUd#~{ zH*sOJr70Y8NnO97ZLt&ZyJ(A1|?ht=jj`QWkgUD_rOl+ev!-nXk z$RfoAh=V}D5g%Tw77m%s$t;dkz`%3|m%wvP4fiaK4M!JR#vB?Mz_ZR+Dd3^)lmEgZi+2*N-7P_w%ZSAvidcAzA&TFViH=lO~>S z(cFUQ)UHsM!denD{HY4$&TFYs{6YUn)I#7l@*hQCPy;-;zd7kqnFfB^vph?4Q9L~X zmJnUdqGYF3$^HA4VNLUriz|M?I8c=Hq4e_V#J+jM(%AXP2MU9Q!VZINuM2{!frY&y9`-+Du&U8mCCsBN8351Y6aIaHl8+Wun3@y4C>n&vG zo7_RxEeQ7ujG>&oUu?^LRxPv{-6Zlj+9KfU>mwCE9dX_;*Fj-$n>(?LFc#>)N(wW) zXcA(p@OQHngFccBhn~h0-$b&ULE;zuXWOZBE+^c#5`ihU-Shk{K`D6GtjRo5%g%Yp zb3WE`b+Y3m01;BZ2FpZz-hHf2NhKM_&~riBoJc$DCTT+0rBMQR_~nZG7UX*huFzsp zo>O11Ft*jF6{f-Wk<;w`*6+(Pc|o4|SrK)QiM6SFhwnOfqInSvad2T>vx+$zJ*6?a z1+kW;_|p#X3I0}p{@^|MfKfNMIP;TS3%`?CHlXTLj4xBcBX5gp9MCWJT%e?hNn#@c zWp=r)&6ey%vGC5e6k|Aqk*!ls6q^d;W!tjHiLH#32~Yh7Siwf-sSm8a60uit?jWzL zja6?Kcw0V*&YLYg{g{*N>dyV^`hG@8fMV9_z8Y;5SBc~2+kJsF3cEZ28)<_0emYLqO)iVmB3z5* z@u@XBat9rqGMjMh;axht8F^kht&hR{MwjR2h2%4hnK~A8(;MunF*Eb+p_3WJv`sQ< zzU-)H(&-Kz@ohI%8(i@Ipv3MrZsCKLfv$~WwbJFuC$#J% z*5U~Cj|mk%>WdLhuMYS(Lw$s6SxwU!@|VRCdy3ii)e!IFe^?Eqvf$)Isz^84AiVE7 z_edz8B6(?gVoMUN>>;KDzxsmp(1P!*?a9E>9ij(kIj2hFyL3ARY-{gXI{ZwE#7Z7k zHd{~{?3o&`-K~g%xZZ0utL`UXK zuU%2f%tV*&;m9!MbMG5mAG%4d_B;$@V%ikoVU}OdtHZ)kMKg-yota)#uq1pj%)~jy zf^%)2Y)f{{Hc@rRqSrKiqQI(3i#%oXE|8fFBT2O+>&$2T^+?L_RX}cT_Cqn<@&M8(Zeely4G8fQl?2#xnMFFk)5yc*W;^DzvKWXA8X~f9=#Hh)vqSh}x8Hk@+FH@;BTAx8(?_{^{V~R#g z`J8LlVW@M^H@|?FY9z~|{0L&l-jiw^QS!*jd?i`pQl2u0z7j#(6JNjc+PZ>k_f^st zxFbH?A)a^@2D?aF{AdRqqcntvj-r(=`#zh31RRk$s++3n&rZ*RiNHx-#v9(LnG<5oCIw$O#QDjC)nD|SAA z;RCWArYO#HyXQz0PS95zzP>jDaYdjtk{J_sT%@mE77VTY!o}w*ji_8}yny4zoN1b* zTk4ORAcAVF9rr{DqnrCB%GHd>)oLaIt)$xiyNR>-OOmmbrjZu^p-z{AsQ^2LNfFK+ zr?gQwyzCh#rPqUYGn!))W~)AqQLYrxJw_QSN+7XHya@R55D5*_MkVgr;g0H{+WSzm z^2xWCgo`x6`&YN1rx!b)AJqH5ln%{d6voC$2Yk=8!TYNr^tIG_+7+}M=_EHhyn9L8 z&(7q#Bv7_yy-{NP{S(+7I^E0{YZ&iMVj&tfghCyW`S_+L>pjNU<7ZrD!!BiG(_^n8 zD14wT8obNPgx42pHS2N4%jSJsE~75{n~IT2OCCW{%Mpc4yOcGzAVy+96T*-EZU@Gw zw+g+E;?0ieVz0hpH)WKAAb5q;S0 za^X4-bE0h(8N)8^Rdx6XSI#wkbizNnCf%cbk47Q!657aLL7f@UIutHr!Mu6locK-{ zw3-gyPSE~s%K8`cfAEKeIn2p3rI%qROFj`-g{_Gi4NVbo{xxiwuk8(z5gmGNL4;1T zb4{w+7?I)9F*?|F0+akr^-lHmRU#p>`2)y0HdgEzjxs8sDTh~HV`DsG<)WPe&UGd(611zU zT(8xKrmq9gKcxGlWy>o1^YkM*!5n&@$%ZF}O0>g@3N~t~9;H3B5*5-6|F(;H{{@Dw z%PnY~IV3b>KV%=4*LnEbRC|-pMPT4OZ;pPsp2UAR=lXTAKNT#8|IJTaGEMP8@<$(} zWF)pykc^m$(k>H`oZy=|yLU%yAb{F|uw;31s14ubaEDnTvTidui$J*f-g!;=2ylbK z{B`@WOwhtoeAZMvdwKB#ga6@U7wgHY(!p{G7ep0iUlJR)3(Ts|v!Z@%G}D#l-%}^} zM(i`v;44g#AuJWF%8g_n(Rs+9K377S3{JL8H$K#y?I865Pq&Qs*0by@wEG{A*F*rm z6l%-IPQn1-8Un3QoLi8oiJ|CldT&Cmj-p&knBx_a#G(H|5qnh<$=A!R5c(6Wn<5$4 zeoOLO5CVYCB7ej``1bF8ugUa6$PY$FO%nPvTJ6;-F}Ye&4y8#fOkACxR#Tf?8GUk) z)_{7GEQ}>jM1v(SJN+C^hHbD-d0wax=Rz($jtbN5fXwuI^$pUdaLiWwpbTCNQgm!8 zq2Y9QNYeTyL`ku449U2$am}a%m3c%)_|DJr9XB>6nf(G#yR#AHQDGXrMR!4$y3P(TVoL{+`)c&{#yIETD|0K?8d{e z$sh(jXJjR$>Ez&m5?HMhytdtLPwaDgAB5=k$ zw<=XU+BAnVr|1*g))R|*qEn)`jq>0LC?U;$wanh3o}fBfAwYWkm~4c+A}H01Gru+_ zg5%kv^q4R!Bz4^~Q;v9@1KP648t&vE<(4WG5tLwV-7=8#EhxZEJ9}M|xGl-atwyxs z$-|PCqAoJho$MFe?N*Cm2Z4thVP}b259-$aUDaYUDyt6019YCXFo|$8sULTZ>oY`W z_Oo=e2bG3VHPtkwyUmZ4K;q)0CvRW7^a!tEvZ<;^S@zTv znJK=NdkP}Pv@W-Bzs}5Z za&jUPe>Qf48`(05Y?cy}N*#{f2SE)65St||a~Ej3uuq-7F5BKF4eZ0?H&%dma9ssI z9XN{e;#89?W)=|_>#a6dKmO7VR$YBPrnq#UQogvjSUQT~a}V%zNmhnl`|=3%c6uY>3MLmj%QyoES-?rn&c(Q(@c)#`B4Z7&GB&&!g7o)v&L zcLnk9Z1Pj_(}#7@p1pjmL=7jIy%1D@3C|!cTv@ciQAjviwa?z=bay{W2IG;?WfN=3 zX>R|#2G)gn+F`(lvI^5kBC>vz66)=Kt+6asgg>eVlO+t5` zIPiPb>+Xv*2>9_nx6XR~nxF`Gb%$z=HX2hYj6&g#P{JNr3 zY@7C+x9?^8R}#d_bDQoEh*rAR;Lag$l(&-2oxU$iJ{^~H>ib9eF4SF_EXK)QFc#y_ zfYKucecw=LZ_^X*m}fVI)B@_;!P1dobY67UQey417mw3RYc#5BNWS%gwS<51^o3i% z)9hHtr2J6a%;|yF3fV*Q(ifaQpbd8u2`X4p#HTs73IVNDkpvWz1a_nRj{sAl0L;zb z$eafBZP{+o%q_@Jy<*2epoZ*vtl&kYa_U_j zvJi?|3#qTR=q4n^|NLCMb0!HUn~uUHzn)8844D7W&%Xi{u58|1eI0kiqWzTRDh%CH z^|M8J4-KqLh;j#uKb^eO7yya5G9KAm|8t^(tczFKW`3MF# z;n2@Kx3X?)n}n2`s0JG;I(3OD!_VMnU$wK*w~{-R>U{V(A*PpTpP;7~{PZdPr#?+XGv-!=Hb`RMai@ z;v@vimGx;ch+Ag!#vw+a+A!SF5oF@mecQ%uw=UI1C!MT#pY0k7d(rxDWmL&-nY8z# z-4~&=!)uyIqX~v7*)P{8jYN=JkjATWen-E}OZ<$Iov_Qvmq#+fwn^{)Ws9j=RODL4 zI>bDhZDxkMePC1cAb)d<_H!_sKT~Lyf{Z7k)k2Yk|8hu+yTSA3!yfYOTw^Kd(yU~6 z4RVT@?mg@#e|s#Yx71ubCC6QM^Aacz6(#k6DIjV3Esb1WOP!NItLgW3fg0*aIdCM9 zNc-65%{yBrnT^-9X1uE?MG1Vv%rNBF5vg(pAr45y7#(biPu{apu(;>2Z+p6kMFjw`Cfe2sks* zkDIu)+=3)l3hX6o^8Co3LaS?0GPR7ozE$Q*K9RkVxdlDD5pC0T>U+K4Bv@;6CwXaoTj_Sb$?*%igK-K zolDTU-DNo+>9>`V6LSZ529YKWiY20Lm_RrCHv7SjUv_;+fvO3{D#t|C<%y6=|}sAgMdPpaO6@?$WsN0!D#h)}gK86=o&Jj7O8bh5Nl`&CV7d!w>(yr>PI z3gUFKzx_!I?tSwBpI|dQXjLdJVv*LUfG(dUJq=pK zr>AYd|K!=6{tfi)(DY{1N2~Web6a*-noPYdPyVsXE}Hz$8HvL_sC z&=a+}RB?s$_Cl$jeUk{$9-lzyweC7g$iOX#=I{{NR;EFBsN2M5yuxt6me z;M#HneKlV{IUl%o{Kcdej5BG3k)I+j6`-YC{F7!kQqwny}D8U(#9&A0jn9 z&OxD=)^AFhzRf&%P{X?dc-io;$$swfA6$V|nHg^FeJGy%RkxsHg)W#*4t)R=!;L5d zQ8|yY>g>Z4#gnSi@n5`26yW4cp2H3uKM6>C`rNdm9ub7``1K!Fq3n0=S4Q@aw zA)qzKGz{-XbaL4OUtTj8YFv(Y~+B`0h(H1B)a|NMEsq{bp4$2?_WXqzhZs zlJuGnYz7-r9WVeioxVS=ZJ%GuyE?FH4mey)Y!~Hg;pnbjPzcoJO@z#kj4AZ{ro76} zGk>~XRY@dsBW&BVwH-|!lFb%*QptePC|#OcA#>fmqqX9u`7EHQ?0WLvbN5|!Vuoebd)!yJn|si7@O3n zG40^#L63*586~+xAW@SiZ@c`M{WiUwul^_nqxGF~7)cgM8;8KAXlCYcTe=xQ z+qJ0FNaiTxBBru$WPy2mM0SG}+1ms74)~Q#&G)(@eM@qcZ~IR`{Gko>L?k=?77~Dxp=6Wr6D?#z=YcoyUaCk)bT19W^d^ zGVv;8mE!Is7(3_khh$0w_uB;*?N|>!EBta(_7)18Z7rCkQ|)OmuBfT_04eMWb%htk zV~c51i-N{jaV*r-@i%Q?6R?}kXBU~}@>Np|uD?}si>dfO`4W>zw*H)NN z_!vKjggQ6sz_qcse8WAGB3a%!a+UOZ!?8;y(ThQ3wV1BVu~$rkol4VSFA<0m_WJ{s z&ubPG<|eP8S)W$BptJ|DG}C41qu!Gc%ng=Ro*nF)vgSaK4bhRk5k^g$tp*MQz&o%) zWCz&EKM{g2xHND#z4Fe`(?ERu8^a6v2z<|EACxRvzAvrd??F@BapMUdCC>%m(ZlMhEpVagtwd_+LlCc0_s=sz&nZouZ`GE^|8zOjZ?Ys{3Q9 z6>c`H5anQHjaKU5^nh$A)h#Ig7DT$RUu#I|G*3O%r?u43GgY;ZAJm%nLf|Am{01D( zH*!i}?W}SQ$BX`XTSi99@Lji647nK}TkXa5WfERMPm$BO%Q4=;`l%TJv16hW;0}Nj zA$YzDc&3%*=^#a)x>yJBx7+K;SF%r3^kg0!dGmIvIdH%VcV+tZt=TWc3IYE@I5e)n zu>rdg`z9kkdws|RwXhNS5Yx1KrCi<>#55Q5X8K&Cg8NZs53GpP6eEAQZE#*5pJOM@ zRpD$Rk=#%tP>|eqcrshVc*RkRjs#JwE+jU-+}&G1%FET&K`rdr;|}R%uRyqw<0Z_q zQQ`i$o>(fIS@JHblrrX@sM$Y)DLaEglDH1(Q+1CgS(S1V z_RwxYMtg*jlv*?3tC)~=4X~THa5qP7e`z{{iM7XbVJt<1x4&b*2Uxm)I^G1gU%lMF zn>xSEF}nzfP-{~OEQxuyh-g(weB5md1c!Qq1LhMnrA$ya^QX4?$2tuerkz6(An|i6 z46NH~^8XS5G=VRQS+$lU4hv^g?}YLsN{Y1~!*ba%gBaE>UfhB%?!h7`Z$Tk`0&@a= zwOf`G{M|lJ_O&uMSOB`fd-?FsY#=imaiZoQ+8co6Xj=U(s`0;N(FveDaMREiLg$8U zW!x+ol|0sI&*sYKdnR?r%>JdxUIF00r88d>ziGSVB;Fjmtjd;_c7(}PT9-MK)K*m= zr*Bym*+fSR9iYWiri8T{$jbPv^VeH;A0E-$VG~dpAbU@H%Z&Pc0ryXz1&b^U5x=|a z?*ziPI}uKEDx_v@Mu? z9*Tz(Kgf;VentB;d!5)kAJYc|cAubo&%ycw>n=U?n+H4gF`vH1g@6gLs^d+ID#(WlQ~Q)z8fUWSE`yd{dov=p zAQJpMZxPYnkSzAmfqxcrL@D`2kvjgU1Lzt>y=?R~5L8nOO>o!FqKwU$&OA-FzQ8@_ zw+`?_N=;hTba>{JWY|n}Un~0Wg87q|`gaRw68@v!BpMDk`$<;+n#Uom&;q@X#Mn-O zSJrB>&7*nSujykHT7V~XoXG+&6hs`FsCC!wnp)5^OfuHu%AzZNBWk_{!4wA-+3&E+ z!)G8#9A_<+E$53wFE|~SOkxbh*<7KCmDa@GmGEuELF`u@8P3x#PU?+5UByQ;2LYPD zNZ6&c*p2N;-L6X_AuT)1?_I`L%IV8!7WZ5daSJfCIvCWR`5KCwd7T6hGpUB%q8qS| z|FN;0H}-+7I3qoJV(zeOCTREj{CEAv8Ynt)^pu8p9 z0_zpmCjw0&TbGav^IMR6$X+g%OfSRyb^QCA7Y!%wnb#aZTyFcB%rCV8Epnw#3zS$T zbtv8Rm)?Rh=1#u?f+_HGZFw@HY&9QFEHKYeZb1_#Nk>yRmqhcTztpDSPqh+lb<^7f zeX4cIKYSApozuOd`sd1ugz<-Vh$bo_OTgn5Klh&K=gJhRQsXq5FQ47J@`nD>_tc*& zqiMymm8^ss%nN+{cR&OiJEH$zgae=L{3~jt_i4%%=9iL0F-zB@1+&09^`Wk4;<`E7 zf_^M$s<(Z(tE!oHAd*xAP65nQ;UANJzz3KU&8^_Q+@-Fe(#$sK1)hN;CM>h?2k9}j zS`v1UDi2>~RbpNI>(jId_H%gDH&6Y~8IC2i`ZB%9v@gcT$_~c;^KUvAdL__wyDNb^ zciD|%kEdA~J<{BwH}nPpW|d^}pEE0o9Oe1vlmjRr37TnD{hrSR@mPUgt2nu#Yx@rj zBIASonf0aQom!Po|`MaE(=*jO*s6*c2OO<}I;6?ANAT9SII+jj3?ZhG7 zXQw6)Rt|HQav8A^=QHWv;We2!m<;5<1p!LQOTRUh5bfaYMs_FBP=T+mb?OB&6bZvcy!SHXN)fBCbaC*NH_v+e?H%el9Fr^%lxgZOtVncsG`!cX21 z2rr+}jD}p58d_dC0USYC$BCPWwDH15m#BK?JPev|KHjJ3m`%Cjd~g;6kkino|H20T znHl_nycVaMR5?e(Lw1k|Z`&ge`y+QJXj?ZCWlMSV8*A&k_fF;fms)d&tB%xpDaWL6AuSft&MuD3?r3B zmBIwl)dOCa1rXKDT6?Ll3V6H7+teyRO7vHp6-wO5>9MMkqK|d&k%XzA8}$ZP;VmWIAeP5~Ci4tB z@7v<-*m<*`pL4z?3^A4|@k*;ow*q>5=P&JC?JZ~_*l+b~bMA{OCjmzWfod0`FRO^=10Hrl73Oa6(=k+UG`*ZaHw%7uc%#tsHvH{aUBwp{qLXJIWYW~t z)fdG11h)kG^KBM#F1cMSdN#VFD5U`Bw3pS-#}vQz=KeRX3ElSvfDMOHiInC9E13Y@ zR|2J?EA&GcJuHpVhbM**CxX|8T2XEqcMpnXWMhQ8RGuG>!IPqUC8=4DQnt7?CzKPo zQ>79DbTGy4Ke(Cy8y)-Yy5#2lR%1#0BzZpOAlV8MPp3$nf1~%Xqs^z@uA6gYxY3FEscmLo;9E!Lq+_j#L7SX};hMaRHio|(>va6q=>Th1RY zfJEi4HZ%d^*->(M;50p08fcb+x0*_~ zs$}S~WHh-t{wLR`s5OF;;x57(A=eXA>K)n^H?4U;H?EwApUcu(Vzh>@!*^(>b9~BD zr%dDYq8v>lwIj`Jfahq3KNPG>b_{(wS2>&krxqAha6iZ-PnbO765U!HQM7mM%%)AP z;@JRt&h#%%zJECTe&e0lJ_tqf)H#2b!e8+#ioU}){8L$D?O{XQG$kT~igM)&i?CtLa z|L_qv+G4t~j3~**W7bkv(;78HwK2rlx6)*cZ%g4jkBO7a#)7aE$n~}J0ZGpIXV1xh z$Scy(8IHhGN=siHbbN&$o=pyZ7AG7`PPb*1gG4r7>_pT=`+34WXIn_?6~E;L5NCIM zT;OsXNi~;^8RySl%l`TK{5Cm#O0&cqTe0pgyXl~?6H9n_4X2v<44~g+iT{wm|B=P< zx3ch0T4wRe_X*>)U;oYcQhFky+4n|-hQ>vUC1jYYO% zdxbT^oG=>&CbHrZvxzR`(Ax3Pk99{b#~p#-gOO~Su$9neAc6_-8Pc-6_@B^$zfmWe zCB~Uae~W_%ZR7+?$V?9s4HWn-sIep3nY-8du+;WbG9vr-EZFCq;QZ8yO+_hlDDZPr zc-2YLrX}cpb_t1Xr5l`UC;Toc)r?q9{D;jYPa5NTLpqrV_(GBk9O|8bkdzmlogn75c!HBw?BpBJKlSQVcF1}oNgx(8SXr-1=I2r|9u+yNO4=S#fVP{ zz~eLE*r$KR*830C0Z-w>mHfT*0 zGR0@fMJyPFR@Os|8A@~aHkc%;@j^ViI4ND9UT zR0cFBMd{(l-2lwLrKupEwHS;QXhM}z9Rz5UKS`Qt#*#8-O`=IqkqM2__3_cO(%+JUIox4=jR>v1aeuXI}l=@Sz7AIZmQARaxQBVS$ou%&E(`z$xG+l1;K-q z60~rmYaqK^t7>Tyxcq*)$rAzY{0{@Jq^@4Mi|3T4shrAQN7=U5xRUbbxV;u`%fc>+6?@f_-%6|5<_vB7Bd%bp zla&N&%`O=MPoInhXD5=u(?D^L))%Np&QykRL!;Uhuw>Mte!sz0q$|Cw6S2M@SsTu^ z6%Z$to_nW`ADg@?R;R*mIW+hVNf=~aV<%v*dc(bVlNA{g7V)X*%5=ak34Ohqj)#su z6y1@O&Lg98W0AU9Y@N<|0UgA^L#R+&F*C13kbqbm7(%w&0 z7Kw7hWE;FZ3gd3T!FfPUgvYgNbNA4Mv9N-@tsadWM)o6!=Oo?<=Z-k(y)G^$G z=2`H|vk}4(U7cwplM+wpc&Q{dkNsKUv9nck!_1pW# zjhF5b`C9A=`}slkCB8wAH^^l%X8bzoC;A38-|6lp?(f;-oh1{rNNAleR7;}!RD+Z6 zb9dV{!(z^eJ{CCZQj6yI%qWw#Cs)MHmp5W|xPxsqC}Q_6)km!QL1lrrAgoLzB8d|` zDUPnj7Za1s$VAhE=&OpkVNT})Z*jPHlbekNP>-fGVA)-#!2<6IvM+2kA`^o0-;>bN zI(>V7#r#=oS#D6BHJ3XHlR4s31;EHk{&aNGUuoL^v$~KG-URx4NurATke7ScXVJJ& z&wOSv*em#8eN*$QLic7{HvEeRVfc+Y7&^mQ5bR&3Xll>}9Lz*oejp^YJlCeW5Px+G z@;08|x&`F|UjBxLYm-f*oRecH;MLIRd*#Nlw|5JQVkOFp;2 zQBg2YZdL)k!IL{q>UXrdFbbE5BD~ix2JS{o-h#sOl$H5D`HuTY<9xkegH>+b_q@H# zwtN54!m^I(ef0kOx+fFQG`zpYPl#P+QZDz};YRzs8M)~j*3xX}D-9x3!VpUyw<3nk zv@waTE0pdc7^~`6`h1->d;Fq(^&{r;4*mK#o%3g;HPwqv+`27m=XzPSsh+o8i;t*z zK`h+1VNjI0ygJh6F;%)DNdx`=GhG!yhw^Q85!bW6y>336g|u7%&$u-SEfI`f#SqrZ H{{JQbS(G?E literal 0 HcmV?d00001 diff --git a/Libraries/oneDNN/tutorials/images/cpu.JPG b/Libraries/oneDNN/tutorials/images/cpu.JPG new file mode 100644 index 0000000000000000000000000000000000000000..ecb4065003ef58410f396690ad46ca2c5ff43af6 GIT binary patch literal 29189 zcmeHv1zcQ9((fT?2!TLw4-(uNWUv4kG`Iy1?hsspBqT@(5;VbG26vYL!7aE$aQEOa zyvgp~-Tmy{&E30uzxR8Y{5q%SOrP$qe^*ykSJmNW;^r%GPewvo0zg1O0N%s@0XGZx zb|G#SrT`!(2QUHvfCeBT2mrU>H4OL{nE(p_30_Bl*VPjezh41D0Jzt`NeZCC@4?#u z!ter+Zo>aV`(FF``H8?!1b!m$6M>%y{6ygYM+8(I4J?=`pV&EDm;eCsd&ECk6S#1z zA>%#LU+dKo;eV^xvDc1li8FgGt4#1H&;_W(c@a0Cnh3xFA*1fBqPfHS-fw?^RpjyvS$PEOCk zY;3lUtOmw*h9;~=b~bEo2G7|zSlQVCK@qp-21ZsUPLzfwW)`+WbbED8bd(mxLUbA+ zId-|{;wI)6QXUQ_DjxExMjlp1{Kj-5!gtXH-N0@(&uvVc3@F`ftZf~^ZbDRl>KzQP zf4|K}B`oM*YzkI>Eb*5X@NYs?f0>J`t1GK37pt9v85;*bKR+8gCmSax3%mu3qr0t> zfg6jhBlTZ8cx>WmT-@_UB{hIY#Vc}v|{bk8n**Q1`+5S^ILALK; z{M|%<-*j*#C2@ZEYd^cjok;iG<(#a|PZzob_(szAec1 z_XGb|B>r><&Ux_X{>rEDCn^68pPz&L5Xe8_`U%$$A@Du%tTDd#>K|SNyo^<_`MSZR5Ud7+vvD=?%*<#5|T3h@aN_e zfQ@!57ZHSrKn2{wMnJ?yxcLlFz+;if@aX0D0Onsl2)7WCkWo<4Zlm9UH>kJ=+(JM^ zyoH2_jEn>iT_SkFe+Q7Tk#Qbyh@#wAGC-xW$K`w#o{B~-R?>{8Jh)H8W$5608y%m3 zkcgP}AssyfBR7bLmyaJT{um-5DJ3nVqN=8@@l;dG$k@cx%-q7#(aG7x)y>`G^&3C` zfWWsw5s^{RF&{q0#-*iaWM*aOKyypW$}1|Xs%vUnT0gh7cXW1j4-Jouj*U-DPAx1h zEw8Mut#51|93Fi;J~=%*zxYlU0)Y5uvi?fhf1(Q;PS-6YBt#_C?{p#Da)lQ}Y$W6d z94I)VN~i|*_o+Bvq2Y>!rQwN5S>!m-=A5M_tU%kpWivHVLyg(!@lHf=!zv#XSc}sNRe+|IQiyBT(QfJwpCH zi~ZE>92Dv{Muy(18Y%PECeAg6Mj}EC{p;ON!Hxq9x*^6}!h}P|8&r`>e!a!ZG~rJM z`A~CaGDu%2qN3UtxcN>?k5)7nN^PX;2^CEQ+b=Ft+sw?fI?TFJI%jPW&j&749tf9V zR?sM#mvM=kmpstKC0Bc^v$SqiN{+aH156v$@2hyE~^}gmKpH-lOPMl%2 zUbxq=Yu1tE`IJ2w>OaH}Oa8#D08PO(*R#|kCnwPa5jj(yIQF-@McPkWYpUWQl7$lk-M zn9=k{fq6|9$^ielq>5guCeuP917)e7O$k|W0ZaAVJT>X^@fvhXYXne&;QlQCTQHEY zA1kZ4>mxff&RZ4U{sLn;&XQHW)5Ur)ty)feF(wfO;#dCxL!hZU`(U-^-3{A z#M+O=WD?=u(&hN%!yST$xqT8}6gYLa(&fp~4KPl70}yvspB0&NomerD47($qYs3s{ zcOXmKYNg+AieGFvy1zUj_Ijj7WC$vIAfRJ+>@DN-389cDXw)F5s8qi!z$nKju#w8U zN+~Skkt)m5z&&2=87~67{y~jxiyMHEg60Jo))gK22DrKbcvs~0$=zOg=&fz0jPpat zyOR%7R>F*+3W0M}3g_;@*M=wsZ3p673deaE$@}+2GVPs|oft$){kE4TzM(ZT>lIzh zf(cG>u&`J;Sn86#nn*6`%K6u1 zZCccJ@A6Bg2rgKDu<|i#?Q2vV1PNFjvj^%#6$c`X;BMWK&h$ zHgfSUuT~()ue+v$UY^hkVxfWf?p+y&MFD*B8I);o1L+SfX*IR}{j=FM;f5g+vK*iA z*7Ru3K01tDpmi7t_=su^r-KUzArhF{Wc>w&8yC#vrm4?M4h{>wO)t=o?CyEW)HkwV zM%>L8;$2&{+b`tVJz2bPiQ6@ek>IBd5!I_8C4G=zVWw(9TYYX4VR5H>d%J@sax4~t zH}!o1e>0OyBlnJ#zO`94pOqJ9XlJVaBK%NyinBGpXo_lM6{}vPy&B8}j!IS1l8S}M zI?8tZ_)YTGTReL^Z~*^4+iTPY=WcgWc zI_=v}xWMLGz2p*^Nb5nA%Tb~IXGV7leVJ8HjRO+XOBRf)R{!d67aH+$+#hhtBLxIX zx=i`|Jh<6y^^WC21C<&}LC*8{H^Emcg_%tF-cLXH;e|o)Hrp8)J2#|hjp3`K2F**7x%EzLRV1EICRqC359l&H&Z$dL^-$^u$)y}zA; zH-?l({cx3Z+nfItBn)S|c;g(bw7>7M$cah%l7J#u?sILqM4tnd2T}JtkYSYRz2F(z zy|ZOpT@h~bwq2Z|Pfm|$!3Tf#%w-*TQkn1V_Vya`f+sq6Y73vi*@V)`pmR6-%Xol3 z79(5dVj}OY9b6@yc7uP#-2X$Lc(FxRm9;2wr@T*ijnEP&hJ$qy-w)YL-W@-_ltWao zdKZIya43PrY}E#u_1M#EvWl8G6Ux-k z-|aNw{Ng+Irm6p7tlycPw1r6zXy*fp-V}3~OJ6TP-ft<%fkvrDe~gpUOcq_Q9nGqF zR;4bFM!l*ss*qZMzR!kv=DXE;5iz4(!jVK1Cdgj&d2C_NWjDE`>Y-_Ak_LMlq2ThOQy}Kg37S$E?1WYB~-*Z3K{re z58KUm?K3!1M_ww}p45E-qI(b!WO9I?CWccPPAZYqc-*g1<=1)%>SKX}ey%pR0KwI; zDE@bxIMh%5JQeI0W*@QB?j$TeJ-P^LPRmkK(aD#4m_LxV&9-yQ#J5{Og9D-Lq{`QL z=f>3LU=H?ee!e^@^Ze@#fN+ereIOP2xb$&zxW6dbR&x<#FuImh<@IYaI0^h8{}m+M ze-){xa)8yAY+DW1eUn@r3_FgxKH7kJF{K~e06|Tq%#E$4^BXFKw<$hVEG@?vK3$HC zuo`Y&xB*s{8ztoky|UoK+#_4+2PTKmvlSN=F`H6R!vV45WZLm0FqEWtLY#E?uzZxo zyTjF<I3Kn5M|o%x~iEi6EfrCFE!`s@aX!Bl_==-J6jnlEdF1eGTE3aG1D zFqW(scYl|x5}ChcA%o}@j6~JrDH)x6Ke_N1^YX= zaxJ+mYoo?LFl#PbU!xkR(Wf2mub#v z`pj8Kn552hmh@e?zJP}F{?g|rgGN|Sou?@O^(5VH)eWE@w9u}!76zUr9Q7L=+~QV( zxMdUmz!vxw=Le58KK+z5nwp|CU2zJgcE=ngn8)AnRf(hHAzSgT5A%312lMBWE;^dA z`FGA8K+BV63;hZhV^rH$YLJi*S#EhNKQ_RxEaDqWfh_XyQz8H3pMj{vPcA zPxbK#>Dy}D9;s}t1s(xk7YHZeSrMJ#(rvjZ#_ci_rxT4YGwo953B2U(jjx9b$8G>Q z-xV~fr>*u|p;oKz&TiC;G@R^03g|usF9=%X+&1S(W2SUaE8f3s+D`H*aQm3AM_wLf zQCFs;ryZCrIW5m4wYQ!IS7(Z8|AYgh=>FO39Iv{z$lOY0fQrK3r@bNOV#PZDVU156 z4nF9pZ)V5Qoq33~tcGSh{iWaXvXi3LjDHY)kKE;t)cK)D#fABob1$x zX)3{Z%iB5z40RCQcaMlR`jkGX`-&XP3>6Emy*eLdaMq4R!K*LEl;}shvg#>_U)b27 za6c!nz(~f^&}3CxgGmh6+viwhsH(I}=PGaROMaI6{^(09qK{?xhSEaP8Aark_Ie)T zvqH5*56Ou~Jm-B`<#K1bdGg$?5<=>+&qgH-du4H?H|1#YmPhiyp+~MlaZVOP>e;xy zb@OkuI!=6ok@H9;-TW{;Zt=sSo7Hyk56Z6{`Zq8IYoa1|l6+)TS9-X6OQO^~1veUV zE-HgUr%Y-pf}Ac(Q|!M{gJ@rLEca1C{8k zeZDKarM8a`HOX!np6MeFT++*Tg~G&M;b$MX1lZ*9#DXr~wNY}55=QsiN1&jJKN+gG zSh%*`6tH1a4qen#=?xKMpKDBckC0!-g1^aj&#iD{2mRg1TZ2f%(~npQ-iD>z3U@d0 zwIqG1dG6xIc^cSCpcrl>z*G^|zx@XHkt4-DHt&uFl9!km^R*o%MfZ#3&Pv<^tqX9E znR`W;HDWcYd_$o6HPaBS2j7n*Isep_{Gs3hb?IY-=rMK=d?8B{zGgl7wsxHOarzU#_Bf4}vFW6r#wK9+6| zTMQ?S6__t=_+ZgP;~%bYnmE_1D&^h%6}yGP^hd^XG-@PtMutV0dvh*Hb){HufF!u8 zR?kj?pG7CLjdV%&RKgsAJn{?+X$XON?!o+Y>8e<#*R9oz&_yD!Odjl@&834VAZAFk zhxSWE5cT!0XBq{??5Zz%0`_ZN>;#kjx9YKvw1lC0Z);XZEVbqlNk;gpjaZjQht$td%zY*%|s=heDP_!iB-_f!ndfI*4)|Hpd2+delFRpV>dn@9kigz8KO7(dI7_qO4J{^_LWbOO%1# z$LXKbHtiUvRzc1K*hxOb1&+?7XePB$RBJItnyoe`+1OQz4=U&!!xjy}=(bs9jd|wp zqx5boZ@tIFvdeTIr3IIsF3&?J<#M^>v+)|DW8gs|sM{v~J9L?GH0t*YB!c%d<^ z^+kM?62#aQZXo{!aMhBj0nSzW*p3`rbmd^R0`_^+`qPV zX+pO148xh#=2$crR@w`{Mt%|PdHt1WXt@0ZGbbm%P20TPTSfoTc#&^1NJM=3z&(+) zzpeN$AWWVMipvZUHb2|-(z=k(&z^Wts8+KKN2x3?`+)qoRgnY@p^oHtafsYQQPDdA?*#CSE! zzo!l=3Y1OR7{h{UYl*(_)HYVuM-BwIlDTX}WPly>e74(eS<`KC{EZcA%ArQ#N~bvn z>$B|wV>|!Ye(d_l%-Zo1cX&%gH+g&N?)GGR=f^LgypkiKtM(saYUG&4A8MA?ZHplRB4Y_zHdYnz$NI97!L0%p{xLcl2}zWJ&H%YI7fvh|E1>=)`! zOP(Uic3f72g8KVTKPE%JTMv$U9x6IHC+o&e4+NTF$RKgsGm=$t~@q0Ml zSrI{pjA~G&Ioq`|`4F(o$Rhk8hZmcpj2-@ZC@H)?02^wwj$)}+mY8f2oO%P;1!3f> zXG-g3L>!wMYpDgZH8L5iuZ1TaGgrDCwNS;-8tc70$rxQ~8lFRyc-`fW!#mLHZHgF7 z;W3n~U=nzpWMZ}xO^KfUFn?MQ96P5Z%GaHtkyUjWbZZJ5cq5U3YI`tnIzDZ`q?G7E+t()Nvg3`#iga=2CS5a?C)MdGk zx|J~pdrj1MHuZkIC6B5VzwL_PVDMZ>W-Qjl1R;=mLppNP3d=kUzlxmg+);ARCruwJ zc?q_uE29{81yK%vuzhkS9M3q4+mJ43Ib#*8@Kq_S`v!K^`9_BU9!9lEPk>~TC?6CWk6>BO!^Vrx0^oV1yh;eNW}#!AT_&u8>=YaD&ws z^At>B1+>lQ;`w*MuukF3oQeek^{mHg^zW(=0z}yKJrjXua@M)mBAc8yxsU>5EdOy*kDpwgKxR>w*+6B2fhknEzIf(ArQ zFN0%{((jX=yWQ5XXncHYSt|zV5DD1Q34)*q^5w60fTX*r2Xfk4aak>J$~*ieAoMb*Fvybor<8IO2K z1DlVCRD!SICQAhT(f#;LzD*Q%AL1_aLY>!CQeJlumC3+g=?w`e_JR(!%s|;01dKwo zA6Cvx`rhU?+R#Yz6xq{VG?ZXG+a~h?XY}`byRaOC954B729j#NO<9T>*;U0vx|R?Y zk!Mktero^*1{vWt}Qwz8d0xz+25XGi5@88hepgf$w3iqZh%5}SoG_hFrFfk zXNEg6I3K#_$7hU~?}JyUT*y+Jj`}qLLFR%^@oWFF62!0AXfjr@s z^*)q6Gc0x6)bD*h(sIBt(RiL0b*G3iX#me$e1? z^$R><4Z91e#;hIX5 zGP21cOz_MB`%22k=0(I>R}|criyIS=Gz}-0nz+xoZIHyjH6XMR=8PBptk0Q}TL05icIVtId5a|6tQ*oU`2-2h=u*OvWUvL#KmK8#`c z<31LN;av_BHWDKCy39LE{8CY>sj8TGK6~K`6F0!DU3a7vW(rJA@HkoHEK#ZD*~S(3 zVxyF~8W}EDO1AM34yqnQ*3s0lR zbUrCuoHY^P?U0k((*o6w5eI8w(gPt6=C);MekqQ&oa^*&q7y z&WgzFZ#*p1)^*sNI#F4aIumRSiqwEfIwh($#{D21I3bD(3YrFXeQEyL;Ftg zEOk2~$49(1IS?oj-p0k`EP~BQ8L+l-M+6!Nzb(Gvs)5B)pHhyQ5d- zfzCa7utsKMUW)c~8}Bstib8ysu=TV1GYkWVxl3X|cTEM`OP00_D` zcC(<%04~A`Z|MXXlWa}Ih0-Fw^T)AW4vzV%9RTP`Lg{rODsnC)-)hw&-g>^J)fA{K zMXUfOq;tdJloTTb`h2-Y#%)7OC5lK;B!E1IPU^9j`2Wf2izRSSkRklwfPDzA(Cca{ z`~_ayv$Trwvy|V;&v>tTu>=}%lW4aQ5XT?KSJcFHtmGrDrbdjArb&htvbC$<5&4WC zuQ~)`k?7gB4HPD@y@wV$378I_6BJP) zzqqk~x$0DX4m-!TYnm<0sVh_7pHPIc+>`Wx1v^mx9WdO5tOA0MtvrP z09C$N1hq(aRHNh#s%Fw}w;A`8Y${Wp4;l++-v9;54>JO;t*u37M3yv1G#=YWFO|o# zVtd_vx|ibM$`R~S&UKZm8poiG-I+aeaJ7XWv=M5iYiBB(dEb-Llv2>sr;-7t(C6ni zP3?cAif}P50KszvAF2DjUy_s5?OQ9Wi+JLdHAUaqe7LN?X%W!6AA@o6#x8V7>aL-Z z9t5JC*C1W#lzW_3An)1umTd^0RatW#ZW@f0Xj{Eoki4cW)R;hU`axHuHvNhzIj!U( zU+>T-;GERiLZ<3~W@U5v2Eeo;NiEN`78(f_LIZ*OQW1Kv+VpB_=`W%&3-vv2fMt1S z-;BiT1Yvu-qwryt{=kr^aLFr#5ZP;JW|6cC?3!Y~P&r6nvE_L62KdzUl??k$`o6>+ z*o^+FGo8k8)XGz{DNd10P`#*SldJs&`YL^@$pmKk^bJrITUFW4=AWj2vIu57lcpTF zh{alDkbKMXqM3J%7=Zs*5Gier)wKwx0GlXd>caY{k z7CHn~`1WXARl^&`Vip?S0ISqD0Ij5$r!>KSGu^ z)r2RYe*AC)yi7NP;Yc?xqII73@wvs>ogcq}(Op^L1Pv;|`)rkjH|77{6zbJ!_70>r zVr>H(H@Po~*TVIzn9`Q)s7*Y>a%|sEH@sT9(exESh#F_=uzit5{j`xBMtNe|{5f>CN}%f?v><7ExFbU8G*c1fPQ5z!$w zXpK;|D+I8SAl2W`0I#Nb%urk|`}irQ6&SH^ zSqplvz#a>h-8=m}u;;U?18z8SPJoG}+lcMJzV)fC7vLL7!*#=WXW#NM`SHrNXP5Ax z$kjjvXfD+{u1-I|F1fw3;E4=9 z79x9PN`nfQ`mn^Jt!1`tiVHYD7JWPt?8q}->^S-n#vP^RV_gB)T!SK;SNd1= zKCV~TOdD*731_?ta3e{q(Y7eV^B%|7SrJ$bdUv4#v+|04MUcM+>HRh3hQ15>w3@!h zcD3HoOByiZ#Pax_5#Pm`qQK0UMe~Bzp5?VYh1GOixp<^Js3MOn$oC_{AmJY|Ol?>tnvqE9qaw=LDp)=NTHQeik^ z>@gGa`^0n~j)6WBYI0H1#fJ7E;BbejwX!JBO$pETn1nE9p}uFGCp_Axz8Y}+f~}X5 zo0F=DtVRg`V~Gsr|1ml4HT-~p*zQbIUw4o>TMr{U5Ug~m43|WmtiWQUb6k0QB;nZ6 zgg5IcSkh23$T|j_Ivh2ww_sUls6NV+I>`YlS9h^Iw*H%pyPBrJ+fAQplP}q03L{Dc z%_!o(HmIS}7JO9gG1V0bp#Uk#xN%2+dhv$_KY!60zg+d7oGFViwRo5xSBanJ6vf@M zbtcco`v!XxFDQ^rcWPrqakgFMvgUJk?9Mo8ja(Nz^H5H?_&!|!O(Q>2L$PY)qJ8yrltj1r9!~WXux3^V#M_7h0 zhjOkVUXo@Mkw!>9Ga(v>ct?@}EPdl|nW+#3vQ*AVZvdGKms)e{0Rh5eDY&WlcD(laO)JNtUxOLw)@%qC8$sNyU=LQ3#jdokw(eudW8E(!deLO8e^toJrJLCUYCKOf8 zE7k&IU!|yUnc`!-Ls*{ag?E9Y6=f^rfr`HJ`0Fe6A-RBeaVrGavnif8Kt;b#rh%q~ zV-+fs^yh_jcr;mvJ?+19@cf(a6)4=D;K^K+B~c@qXQz3Asj$w(+Hn6XEb^r3UZ3~Y z;Z%0F&y=7KwHlr~jhVVmvGqyLpkauSsGlM(mVta)(4OEwMMfdw;%@ZGz@5xRR=uGo z2kUUP%(%Ci1vMatX3l8JozexTo^(PZYD=6}xgEEb=_l&hRlk6XQ>G>2hh87+4k&)uT3qlP{mySf|}_`Ht2aRzoZr|EQQk9Me*4K=HC!eTu+vk<+t!oTM>Al zb&dsvyLFE9TQCYH@DqG@PPH zNnc!p;()HTX*!9>{ejNRlU0V75GR6`pycWv zXC4Xr<4&AG^!cIh|Crq$H#Vj{owHN$zP_|rO=%_$*lr{|GrF|WBly4&uPsKpb{1a@ zQ`-ZcJ$ueX(BgriN49^3Zyz>ip+4VKyhega${lgrpFI|}jhw_iwS75c)xH3mO4Iiy!3gW+4>S^m8TF+iVAdHQ2| z^%q(8r|YrLn>n22t*XI;Q=8F%)%mrdY9CZLQ@i_H;nVa~NDs2?ubA{9Q(A2%W_T5~ zZ$fF&Q`Enuy7u-ntLdqAzRgJs+7$fsJ97wP%)$Jdr30RzIjvXj!0a8Pq6*J*K~8P? z{oYJtcp7xlXN}S5m3dsjd$!mW4XkW)^f!PMgu>0oqd4!*X6OvaR-=o$6K*>Fm<51H zqF&|>!mF`@)V`v85S?s`>6q-z$h1-%H^gA@j+YDh>7&b@u>Cem-KWN@2)zV@!ryUWREi?qi*+?j3}`;;twI!Jn5<(&-|5!D9;iK=*P z_n<7Xkc3ywc2FAi>x=_AtX)yX01I_OrqGEj+3Z&KouQ3ifXWPn>|$}nz0YKMzaxJ$ zd~p=hASB?Tb?aM^89)zUAbe4I;4k2&PJfTaoIN-FCByMFVNM=7NGmj>uqB8-kMlq5 z_Wv;x@i$12Jg7zGRKc5=4<*bcQtYB+LYt$wC>{NCT>B;ieieu>hbB9HoLNQ0_jKC4 zWOFTCYTJ`(6TivQC}Z4v@gitnf#{5CuVvitJ@*)^x3vB6#N{}xjAw27XlkI!sYy*Y z{MEa+V}EZg{Qh;_Fkivjj*C{|_721(NFrR}zOa@!xjHHnT4WdAPjjf%2SQ(yo)12K z?Cshye}cECuC9LJmEkgG&#c(ap4$LrW@ag$=G(DOdzu<-paywbI3WrlhgkNfAHqS; z{{$C6|I)J?W5aPsW4ak_!Llr~G6tT3I(-U{AW8cTfMr`R#HBmIr0}eRpY-E(h&|r1 zQwhx=w?kSjdn*$>+g@@h*`Eb97V748Cwnep!dc&5r-DKIX;L(X-1TIVHiUmL-64WO zUBwrz8F%W_?!SbEz~8dla3i}*T7q{w5LAuhj>B$UaZIjk>OD}1-;=a{@uKQXO@dWH z92`w8r~A-GIn+4oPh$E0r8$7HU5ROI{&f9jI#QA^W^kJf>lhGgSl z5%IL*t2Ae!4%Bt4O~&4_E0OFo-`?p>MdsHOOh&8vsrhEovZ~I5pQ|lcl#cN5rp~Gs z8dFmQW;9XiO1Ekxdy|!PG?7TLU$0w5w`H4p*e944VZ-?h^847o%DFMmaB&nKAp^AL z>r?otMS2C5iHK644<{D9x(dr$L@!VLJ!Kd}rlw%<3( zzwrgzzo`=7(KAsZ)SN<+HW3V`S#zQGB;@GHwG|2`3j1rTnzYCuE9E7A(K+Vx4?6q( z(1puL{B-MXuES0gwMPF^tIhZxCaX)+s(QbNZ~f8vIzg**v}fW-&AWmkbP?B{0hih9 zFTB)q~ z%gnj^d8Rj>)kHnp{&F$IUT!)?cg@fDSAkpkzjlP^<`il-$?9=}EpVn>$M<;Y5N zAGOCxdiNM^Z^IpV(B9u)8vl6gPouu1VuWSt&IFG6bZ&KLk?;X(3kg%huz5thCiwgb zXZFU1UL@p6T4qfKqYT9zVVkjfqJs6u@zXpUSm#z0Oy^^_3Agk$YMF!7M%aZ!@p`O8 zFg-=`ZCxra1v}0b1RT7!ZYyoOv=TN>3A?8?P&_>of==6dF)j*}Z&mn@Q7q|5WTskr zh)0iDXT3(vg92l$oZ5-@(A!tc1OK?e#ia0L|0FM;o!B{e81YIb8}T6Bo+oD2gdQmY zAyO|g{fcaa6}_>@A*O32?e6Crz|Zx9t#t~ysv}BT38}wWSpS=@NS`$f9>7?dpb2$s6v`ZcA*$C~XQ)@G@Bo|aWKZ#R1M6D+pk5+H zv-YBod!AT2tmgi;Ie!O)qbF*QE1_cnB5!-Y30%^ zq3$xue>ySzK|Y8-_;0EFn;q5pJ1^mn@RyLtKa#E$05t&$GrVqNpy zSWi0S-}P6Ar_#qF3YbU;COxrH0nFwN#BciEjBQd>#urE3hgnitscI5 zTSq!t7_$@8;&poUdy%<^F%%1z28Nb5I?-VUq8xBA+jGM~F2`ORI*1sd(S>ze(_n?V`&BT!scnd3FQ?tS#%_c8ER&i-dl=>vAYy z1KyN$k1Bt%4`zPpJk<+`_*rgI93t1jW7l$5d42T+W_gxV3Vvyk(mXD4wkEvs!`Ait zxQNweq|1t^`ZfpmTR*rc>o&V`R0TUesC>^v8tzBq*+HGMnlHX&gYNav3t~G-S@kS} zIh<39@EZx*Y-D&Sf1%!VOI&7CD;Z9o^@p7c=OJBbs~R8oO64>7J5U^yt~nG5`=~7^FqIrfGhdyWEo&~7)Je6Z;N34b4T{nqp;O_)(8~SY2OTE zmmOW)I9UAMwaLut+ia8G6XLgkcYL!+iM1LCbO9ARj*8=Zh1@>ddlge1CvJ0(nRi3# zEEuJ_I`lJz<*+^rQ;pKa?8$g>K1gRtb1zM|3)iW{9CG7EsDWO|4B0pd6o~b)uHmEt*6(V?AHSUySSAee;Dw-gBUqbYP9i>J#(7C{>IdbE^Ip>6&9flZ> zyAczIgoUi=emAI<~N+*%$)C>Ip@qdvs{i{&I0a;3yTQ@FfcFxpV0pRmxw!C zLiSL706cnY8Zyat#7Y|wSIH-e6L;F23y zTbuJTF_~I1YU!G3>oMw>nK0RFnKQ94GBW}A!S?1_I!1cd6xw?4pr!&e+f{Wm6i{6O z8VH9Zv!wY;Jp-tyqotm_qm+V}Za^UA#v{tE^ANr3V%bFs6tW3*#qG_!oi#KObF!^F(W#LCKmreLsg zFtygQXE3#*`b!6|^sIC&q2|_5GgFFh9cpQt*;oru+SovKdG)pQIka_kIT*CH*mN0K zSah`*v~<~672GyspmQMmM*yH2%}rx;jjH#(E}trq)(yG+3A@ znf^TK-)l@XM1LFfXW>Wlh^FCvX{o1Wt@jFD1ivjW12ZQB8?(Y+mz{)^BO$?#6F5(~3^#iW&Lg2fIe@NH= zGPwTk>*|@J9b7xKCwuu7@DhNHg>?<{X zeNu9ohooeW$nFzSGEzRGeM0|~o`iyljfswxhK`=@TPGOU*RSK;z#+iJC7^pi^nmWW zKbKzs_}8z*U@~K3Py(*tV_@QATs8qf=v*WgI(zvof%%sY#udz~Sl6(x-@w5|6O`Nm zT*1J^ymA#23+pO6b&27E{ypF-KGxlbEYGjqlhwkev>;%03rf6B^`f92DA&J3&8BVX zeglW_J`pj=qsKI~Pw3b=IJvlacwfE}5*85^6PH&|RDvkKQPI)W(|>0GHMFv}v9+^z zaQyI-$45^uZ=c|h(6I1`$f)R~3J^ya-AxsJ}tQCbt9Bw(Q3tq-IAvLVjcIPn`WS#@v4u zXMbbt?|e-FZen7f7Y`F300x{0`_Lv_`EM%Mnars2ON_jm{Cu8Ka(9+d-~JGHpu%9; z^5$WnQ!Di@yG2Sts6L%2KlvoN(+H?(HMI-{bE3Y>FLxvNi>~(dF64){LpWbSQPGeY zKiDR(`=P&$(Mt6yqdK5z;O#rAJ{jzCSv8n1lhzQ_%$P z(%5=2ez#9};`FI^39T<4LM=o&LX(i4+wx(+oX%L5AB?v}aaF{D zidfsA(SfQWsk(A`p8UosVrk}2A{-;Ql43z)8F?`s{Fj$#k`)mVGZ$Y zN6I((A3gE_Zw5!_(liC$Z6863oEEsfsdiyvqluPTiP~m|ypQ)1qyq1r!M3BNQPO9x z@XlamixQ_V;`OBuQyIICPU!3J&2f3(OI-mgT+3-jiZlhCuOckAaR!x7)T1QWlZV+R zGvFk?b4!Ks>n5`y`xjx!X2^h-ZZ>HYM`AZ}2|0b&pbuJapTuPfnRC_=?n6aYeVrl$ z^ZG>gayzBT?QOR)Y>4^;cgiV7`kUm!2Si?3D7-5U>X%ha7&6`p<*4bB!yLR#*Q*hh z1>G=>c0#Z8Yb_xH8I1ptR31nk))iKJb3OjmMgTqc7+h8fNGd>6~6XmHmLUai*(CDPQ1eh00fN>C+ zf$1Pm`R?qffq+j#hf@-*)1X01^8MHIS+7`)J`G(kbIw&NdVL*VGzN84*ZLshcymho z_6+qEmv>gIk1hds&V*o}%wE$uz2Jn=EpM5b!bxq0n?ks<3plWVopd4bOkeF9k_z|R#itgeyX+G&V)jm+%GW?(?|%k-edwY#F&&snCyD$5yJev2TFvGSqM zg7tharS3|_1$iBW8=jDf@H^9BW}?P0PFC5Zg|ccbfk@03!-q+e=W#k`_ipN*LSETy%^s=mz=uZWyh4 zMhhze!ECO;7W=X}_gCb-jVQa7QS5{hn7@O~@Z?cZ@u*yJAxpbH51hA_llcg`)=P6Q zvW0w;L37r6`Vq=4bG--oVrisr-ye_KDSDxudf{_4Eu7LSbZVjZTB_Jk1p8Ej4{Zh; z4sFu=-&9uk1?B}HjRP5Fk?m&j(f1cfmRGX;QnBp|5j@ZLgvR4e5p`rxS4u^H(8iHS z4x4*2d`9PCAL*x-bz0C5;__*gDyt%+L^EYUZvH)+ z>=)2tlp$t-Yl06d@r)iCh#ezz!VC}LH&LaHO{IyspG9~B;|RgbmjIIEwVXGaYOAAS z7J6(#bPqnttJ^y(Wbz5f&Um!+d?*^B8MKDr&GvdJos3B^i00f+c+0~527Gcb38|1u zF%*fW8Ia4yDOmb|7OrBB-y~tbNJ8NP$nC&UNs&QzZ%noPrr-eQI<4(DL)j#;nimIE7qdZt&)1-2On`UgFu%6jcWa+HKR_?xHYA42#>zb=RxYArJ$5B->L`U zG}MDe%j=_IQ1uPxTAF+OuksEz&@1xR5^Vtg5N8r6pugOHEc;0t#jMYPCgz#&-D5}7 zN))V$tbTjLsKWpoo9cr~e=eO|wUiH$cV7g%bcWl_gv>U6cfNtq5PRIY8>I&OHHI&= zX^iE$c1}Tt2OfIXH-BQyZ=@_^#Iw8m(B??J{iDq?Tmt1XKWN&& z%G(s>g}mA2x?KQgPk8Vk;FghiPNqck;~{z}zvCpASJ(Bhq@FL}Pls1`B6UdS`;_8sSeE6K%{^oevGme)6oCThu1T}HOrE$3fq zJj`!g0>XVSIOf8%TMk`#Pw1;4Po7Fvh;r26^!RO+^5&O&%fz#O&D@^?dLWqYHqNP& znclOp`L( zmnK!;Jor1mz%KsPnZ>9;NaOb+y7l?I+L;T!(!JYeaKX@Xx|aA3IGpY2qj)m~(q*$U zgL?)!)B!NRQOjGXA~)-1KxPjRHw z9UlZF!#LA3Bpoc9(T44=VF>}Ghx2Qdpk(RP7#f%B-@DGgmHtV8TVF?^ux;UIcRmsm z0*vk2Ye)(tp?e^`{@P9I#-Wo7jzLIo+)X4d%1P5lyLDriEEt|4u)ac+Dwgzy@@?BZ zU1K6}pGbUqgk&o)^UIt-iGbb9j3l3RzJ}i(zEpd*ZI^L2M%Z|!YGybG+j+V2+} zoY8-o>a5V`(dT(w3HK7D;GOYWP>$yunW45{agnqP+e1}O52VFVEfx%)m#A|-N3ZHH&EqyO<)rLv3RG9Ie`DAV+AY$-Y#+gpj zYJXC?ofx`+AEa@J0~g>E=-)qfF@p;CS~C#Hs;7Um{y!Tx`6cb|j83AVB*5W~_}%Us zm_wsIT=&{-tf==`pw!x3(pDPTNIT`-oZRA!uTiR9&bF=3Pewx^o&lNN@?qA3R&3e5 zkRWTc5iiH1{jFg9*7q^$+u$xc*9y+z2tHfeDRxk$^8xp^2A29Iput6FqjI6r+23mm zLLb)!Zsi#NklC&@)%%hudt;sY(2=|yy|-#zo_&W(|0iuKBZ}VPHc3VNA{qYljb#zK zmw;@)`N{p%$4PnZp`qFfz4*wH3>(e+8lt36NQpuQG5tpQ#JhSHKQDT>)6_h2u3lKA zp&3GTgjmIdtfALU>}O^E=k~!L)dsP4d0t$jJ#r|R-v%Nt&r*CHw}G17p96Wqr1SY9eIzAc zO8U3bOs~9Z-G@i(+J+0;eKS4PISnepG~Us@qF+o1WHrPP=t%b;auogEe($PEjg-mu zf^Lys1IcPitnT^7@=ojV;1PjSo!d2^+uz;ZjJuhyZP^i{x*om1d+gLhQxqmO7%KG< zt=-=q{TlFYznddN=I<~jQqDV3DE8(FId7gPZ8SPV1BHm-Ca7+U1r#SWc9iLr3sSL* zu-V~-d5%n-ja}oS8{Q38+D|goYx)yxu1f*DBZk7e`KMIhngn?tsVV6Jo7?XhPCg19 zo`?xdhV970YXvIyk_GE4(vPLfYzNCmfxZ$ob=5H5c>O@~vMUI^GrB^JQrLn~|C@%x zMR|iT_Y?U+nJ@?`m6;_^=HWYkRW}QZ3!rkHDtwPgRxx>q3ios@D&{=AS%8T`D?(q| zq$0JQsx`MEs)QkP&yihHh?7S=JN~few74 zyKtdhomOouQKRG<5iAmcfyZ~Dg*Fd78~-H0wAwW{Krh9F0AgfK{SR6nUQ1Oy{;*|9 zeF?BvG^FLpn%x<$^OckseymOsu-p|ytNCg!<<$D-m#fxf?L0YabPn?Tj*}drBdCnj zRhwgyc!M(Ps}a;Ed2}y}b?ypVjWKfs3R?_BP@<{uBYIJmp{~%2kNy6`mI`j`J|?=y z@bkm7HM26c%?Z6nF3fZ;QS(Z(yuK)oh;CTeC4lOMPZ|f|cv(lFVD(apD$3Tmzjj-j zMSf#^eq?rXAig2A26Q}DAoMCiX`=#)6EB%E6cid{Sx5oeWG1N4Mau<$Phjf{O`A&q zy{`~mCaj#x3C{l09dmmQnlN6gv`zNW477|8d;Dr$q40QXQqAF-{n_^Ma<%DM28rZ` zq>u;lvxSaO#rb57+gWu-W0~@=!aRE^;fUz&(Uh`azK5au_eR+D zj@VdzhA$e-j#2WL01OT~gCJO^7&`M+^`=@K>oB0rdOA%d zWA9XW&|I}<#1)WG8T$#r-=o^h6xGwO{$y|yygY@yESG{WazrJIQZi#ssbnjBKUjbS)Q;TIIXeuF znKN2)m#t|Kw`5FYcNwg!*l~7(4jV?XVS(yf0JHSK==>H!ys60M{wT zU?{dL*!@Kr*j}f^@m8bPW~!}vy-aXzwKbe(pqMRNy?Ks|t&uS3QU4p@F_dA6RiJZju+d@*(AiyD@Xn`KYqw(k9R-NUvrmTQ06!*Km5HD8KB#yxAte}iwq;znNnt8XQ<%?Apm*9KGNi=(Na{JoseIF}J6>cf zc{+PdG{4~_w&W$?T9HQfSyZ-=4A&7h#^*bHCcuY7g$WMV1?o8jyW<2*uXjDnVdnNXP z>$@rYOThgr5%l~~vla@AoxGOP{eDR~xWsrOo)?z@Ce%=S8X-uayV6BZ{v7%y;z1QL zgW?mHu4EwtvL}E`K;Ws_>6S-fk>PZwhJs&jRjk%6Ux->IhZz-F1w{O5zTr?==wM0=7-F8UL6wlmJiQzXs_S$1E? zAJwzhgwv!FyR^QxcTCneFkp&-1+zTqBnUjx(LZ4@iPaQuJsImgFS-P*%Y)0<_fG?l z8EH>yIX0hPTtgz(aOazuLRDF7yTNBWAr~l)r;a8=$@HKl3y5k{cyfa68T3QbNDU%n zM@xFmA!NdHuC8mEhg`XqU*cHk(T4#&t{P4+YQqPRbFjytx;;s?+c%JKgpY{^) zQ*`fzedi^>1xIuIxM(8ybo4k@wxf{fOi}+r`VtUy?-U(CT&{%8JWi3sXE|+Gy}**HvCY1;LxG)VBg^eG;9gyJQ@jVg%^G2tX@b}q=q z^YV4U;MUb@LtIy7<`?4RU*a0q)xl?vSYf@zpeS_GXC4mExdzXgoK~)17WHWlH@?^V z{PR6^Zf9TS52nQt9ilcC#|k^b|z+I1NFguce6kgRw1oKd4>%( z)dOaEzKu8&GlFeXxCvO0MS#s}3mtCN@fJTlmY-6C7zCqKs$+f4sw!7@AdQRDF(7h8 ze*lwn_SO4Pfm3Rz2o*KwcBZk@bS2G?e>5XGcXuSGoWFPTvOv zYM*QI1ln2?fK$odR1gh64B!IeTCni4PKxm>OL1Fw5hdiw#v>`=)&+N=MxhXUwEmd67;nBsrcS4r&Ur#*G&MpD-*%t&bvo+A(-X$Oa zj#8rkfD|x#i4Lk#L7GDyW#XqU0aw>n@YSI&_ z0`Fj_8-tRKafJnF2~hI=0JUjy1?{|^xv$Nu6+e9mXxo%*qCZOYb5T~h1i0r>3euL1 zvm1+2VbpUhK7M^*d@gANx~&Xru+sT3IWmNj`baLG1-B!!RTtD}oAXn&uaw5qV_<@7 z2JK6tV;QYP5WaZqHOrMm;H3t{O_4T_pJNGsQ^4wt4EnJSpDMm%OXUiGm8k?5pMN zpJ*Zb>-BKz!5pQ=3;gv>&DUWFU%6^Xcd}>)$s6(DgnJ)8Sq)Uq5Y^ur_1I1sFxjFG zev%ljd?=`VMQ}y%Lw>^TY}cUlr=67@eUu-VJ0ADQ?;&Rf5o%E&{awm08h=jilmlbGy;@p_4oD+CM7-O@H$IOU%G9z&o& zBXur{`h$>B_aC>(d5|6aGf=sCH&XZ-ef#=^E7#X$(fsn(sTh6Ng!okr1`W%IV;pdjjZlJly!K_}7F7XI+`Uk_KX7V+y`T^e%`Tz^}m5oL!)YGnj@J7~#8x zbuvQE-5N?PDt3aG08ctAEj6caK>3|KOZuO1Lkm3zxF(9sSkZ>>vpGT@ECtJTz;ary z?*td27*eU0qHW*0V_2n5HZ(q* z(+X^;m9KEiHBEJwt|0}2Wi?gtgFa}o*&jBZ^0iF(DSvTz{oaYC+gyh#Dd=F5O=&Pw zq6U2!5&|y88~#!I_j~&V?vK9TsI8y%=OiS8(K8%$J5QxekeW=M+XvAr4K@8_1D-rFh% zZgDJM0*I{ZN>Q)WlNL91Z%QZ4E*~^o%~5qr#ut!k<=v>!8bmR zQ)%Z2T+di9KqaTZ^iUIvK^A1A+o7j%(tCNl)x@pgQZ(zzjvX~b!|IA?&5_CVeK!n# zf|w`y(D}Xim1-Q13)P;8JA>Dp^{OJg zKY#23u?BmtL97W`8^K?Tmh28~e8R3L3J))#zhT`iHP*!ae;F#;JCl7sz#Wdu1zyI-M`QNWoUzwzBYe#&on{hvc~0{+4+G=fK3y zf+ieZCHD7@AXLVYWJIt4nE2sZJu=R{mU5?NI2=2w%=y)!)<8k7^u+Vz!44!$v02#6 zBtBVCJ1L%B`f4vhmSP{mZ37xM2g>*ZuJU|OhVU6NHvz`z8GGODin*1W-j&l|j|z3o zE&5zJ_TLf#Zo8MmJY2~xns*8(dtyw@EU0uX@8_RO+5tDTGI!-iGmlGcd{26wWNeb% z|Cal)j-&lG$)A6C2V=I5@QmFWj{dR?R5guZ+p-li`RKxCd3gWGk6!@f?aX7L{pE&5 z5}~%>KLd zY+l3{4Kg!)RN6GQoXNbPZtIwBo{2&cmTa&T0G=pFWUqOO*=QutU{Y0$F+&wb=DDtl zSUnXxCYLhvSANZAH0MZF@=Cdn5&#qikYySq)gDf3rQ55D91{zjcicbjE*@a9_^*G9Yw#kX% z)>c=q{Sx!g-?l?U*i1Y^sW^B|LSK~7+CQ?|_|z(>Sy0hQE#8DivRinQ52oV!F+hkx zTjF|MzL(0gtOB-c9x)hFcrcxAhTIA2j-y9jLf0dS0q?HHxB56o;~%7Ug{TPD4O~?F zXVI<;>D&HO!sVA{8l1Qqq_4R|gEpsx`Nb%jEAv;tHENoTaBQ(T6R3WyXhG@Qfl@)_ zM#(soGBnq`y@P7Vco<@Jnw8`@0J2qf@zR z>E*xFs68YGzFHOSu2l8)zJ8T)3G~=0fbN`N&`6s`iVYPc-YKd2;Cdd7E~sJS`h_BO z=||;Zp{~_3Fe~%bbNg&|`6ZLdFk$>aIu&4Yp%48_QxEr2J5C3lb=l6;dU|0yQ8xlc zf>~y4{tFIUAD+0`l|jR90}3J{f?`>@?>;U7GZ9nQ)|(uglsFBg15XGK3bl7x*r1 z@I9L2ue9mS1R-4oFFdAtfC{=HVh_xo`aTd>A48zOGh{UB`oNi2lAK_e!)ti^fj;-w zAhZ!D83}u$5EfJ)*g9A*w;XqV-*U7u5QD}u$KxW&4ZfuIUgRWk^&syAy&OLgP;~@$ zA-SU?qw{h_veC|b&hj7T(a%@g%+hNDRfrRCZzmERQ)6zQl57}chAIq4)!-a39{|No zyFoODDkdW#E{{^r7zheJ-)V%`D8f_D-p%?l5h};d3fX@+vcl-?JMlR_Rqtau6UY!q zOU9*oa;yrX50o)#%Y8oRDzZ{x3Y8|sHQ}g4M}9edX!EQ_|1h4MncFQ`o3F43^E71n z1sfXO5?q(BmQW>9fqtH)^EooZaONr7S&(qbz=}Kn=s=8risv{HMy%*)>`?JhM*<7C z^NqC-lPB!fpor*!q#mI}3vmEdo9`t6t`Q;x+<1ZSUM$_iqMTKp6_5&nFQE0!n~q{A@Hz+`J zZ=6rAaZ{l;8i%He#v9c&Lb!>;UCDcXtmSR+`^dV6%>99*X^>Lf*X-w?3ta?+9K!PR z1ot@wm!I}N)EFutk*E{75Q*v+UukPr7tde2UQZC#e8Y4slOxQXmH**vEVv>3?TaMUQc!iQC(Y`rItKwHlPDvb zYBpObQ^hn@#J0hQHjDKS6ThE15OW6-IkOKszk*vC5DuIY-%iF^o`k6&Z`n0E#(Tm^ zX5`9r>k(f%@(-@nre#HkN9bjbmy{-(hBVDI%yZ|dlpFLaCJUU^@#T6H(>e8@K@xXe zfNQRdsOJKur|?@L{4SDJE{J`B^)6GBcg0bO72ZifQaJf7^j)z$^0vW(%eelDzJVN1 zH1{acp+2^p|4Lc^qTi*9kai%b&u;`D8%~n zYZ1l;H=TO}qiWM@3?Zo!xI{(_Z;(76ogn!3vPU|ox?OyHoo5+WGSG*z&|+D2&+cD; z9pn3B(EsEYX`wvpPr%;VRI;V=jLb!}je#v?v?+6Fv-V^t_K8*fcpUDW*-w>up}m|T z7qP}X4Sw8b^ls5NOcq#rNrl(UU*6QNEzc3Kt3P4#N~V%6mVeA0Cj7vaqf!1^T)tp3 zc9Xn+vNuG1WVwp~l7{*)JCbP7&FINGB{bUUO624;8n;HGco=4`HN>D>_6dfYNB_}! z8P6N^#IWhhE_2&r{r zjY#hjKvzB(I#U^7EX??kSR*Hx|6DQFJt>?f;YtRQf<&VvVI?*LP5C_!?4s|*(vmbE9m}OMf!Wb2b8-6h!+e~Fs~67^b)U; zSt+d~jZS8?H%~Km?fIL{P6s(v>_^3QyzIg2s*3t;$;W1v<{ZI z;0swv-;&Z0Df!sT63T8CYto+u;r#82?-+D@ljqE&>4W7(k zaRP6^!<*6DXezZNcOxytqis*%Bo;DLM$t5>e84MOg*=oNv2WAtsr2WYt*x;7 z;9`N50N!CcYlf1@hqujT2R8Jh)~Fr2d8i!Jo*KRh1YnHR^opZ6k|wlDh|{Vp#>lc@ zw~Y!1ow<@%aqdeGB0MLVR8|9So6Xts8=i@L{1}?Fh@$#p9q1`YJ;E88Lvp?MtT=x0 zsuKj3ct%*{X&gQERgFuN@nJDu>*vwT_kb@%4GBj%*&4sF#QszYPg5@Oq5f zS+lO1GydRLu>R0vX-O1#phnoIyD#m8EWg_#eo#xx1Eu#@PyHmeb3nXD5FPz6tV2F$t*AC z5o!IdoukYwbxh^)S0hm!%ziA@q+%JL@dq|6z$|A=vn2|dWE=o_LpVB;eC1yv5&tuv zn~-?k_g@%^QWk}u?Cwf4lG;Z;F}*=U^PYQKZQx9$07^c#TG$LDt>nl#!f$ew|Hv2V z!7X^!hn*m+CUW+)3Ys){2`IAe^j(Gf`4k9II_Ld#O|EIaP9t<3X>Y zPh5xhIq#D=J}vz6MZB4mPuaM~su(%y16ZZa18E`+S5E@HlJ>SgK~EI8?EOSMZt5 zRt}dmG+ODgOi;Uv$=1u)U&nP?o+#hGiDFV5W=uuCY}I+RQ1m+cb|MA~zwv1yfq!E{ zi@C)}jgJpt!Q||T!+CD1X772t*&%zV*safcG7%JK$>dXOOsE%mmw%KTW|h7=RqLDi1~<}f+UTw!&KS3%&XIJDrf=iXq3hG(gDSDyD7bO% zCL%z6Ov$fSLZ@Jw*Aarg3*ZoY;bAie2clZ&%_dW)L(JY5b&}FKWDSJ32#b=A2C0+B pBD$r;kqQxLYa|fcU2V}5Y`TZZ=%8j1;4~`#J;C+gMhm+f|9^lYQdVjcf3b1P|^$xVw9R1VRTVK=9z&5ZocSyIXK4xJ&bQ zW>(&BcXnrHH@pA$U2h()qU)(HJ9nL`bE?iMxShOR0PabON{9k*aBx64><74Aytgaj zYGw=o($WAu004IYL^wVG9u`A~y+{U{0f?|T94xMqnDp%vzz2YP{aZu;3M>tl2M~k> zfM^T$>&~~>kLL#hKM?qVzz+m|An*f$|BVPJIp~=&kiE2VGBW}Iq;Q0vwFz99){qg7 z_?LK9WW>*L1fC}V5E=Q`_+K@PJ0HHG_d~Pz-^fEhX#Rn~4+MT7@B@J#2>cZRHWoG( zUKTE1b`~-=HeOCnUJfqc*U|t$32*@P05gCAAOl_kHh>c>4%0?p=N)%ROdTC3~j(TLSR+iQdysiS|KiAF+i+@XI zCKu$lH#Fu|cp>_y9I!V5@;~*(#l?lmg`LU9-h`Qzhlhumg^ihwjS-fE(ZS8yQO}jp z+JWLv6}&KVFt9hXbu_cFCi_;Qp1zHfqX4;+lbIo}v7RxfzM&x}qrM)yAtNiRp&p~2 zAv-IhAv?PvHz(^8He*(O@;^0iXz-WTZJq2be{S2*fZ52>$jZpt(E)}AD>FIsFP;8g zZNeb>OQXLEKa56L9^U8nMtY7$FJM9N+wd~7a51v8DE)cJnRuSC@H78SK7QtJVEm7c ze&3$|F0=&s4Gnk=zV+S4{+H$zjqLut(Y7=b{2%J{FGRx9eu=V@d*g2 zsA*{F=s7sKxS#OwK7S!1Dkd%=si>r^qN=8@VPI%vY+`C=?%?R;?BeR??*IBtK;YXT zaAef`=$P0Kaq(&C8JStxIk|ZyrDf$6l~vU>%`L5M?H!$6-NPfJW8)K(Q`3t}%PXsE z>l>R}2Zu+;C#PrU7nk4Yf&&nKA?q)c{S95%FuLFo5fKnkzR?8-?*a=5*oa7vS&{Dx z%cJPo;gGZW-obqqky_k@O2MwMkEd@xg!X`vW0C6M8)-jN_WKC)`g`-M4x0S)@nEfv;VprEc&_AFEW#vGONhMe`t z+&Sg-mJbA!Iu(zr!slB0Owm7g<`$UO|KMrx5wj+d^la?m^OaZwYjC^ngALgc#mZ|f zrGxW#V&T^{Nkz+SVcQ~u_C3X!BFfSV&9c>Amh%$tZvl-Y>QLPid(bUV^LYgC!zFE+ zFA}c&)BksZic02^6{+~MiL?EV(9gR<;c2IY(mUouItKaIMrE%{%pOSdVTRE)r%Z6R zn>XU{i{u6f+cTyMwcLLtwme?qq#ycr(Ok+4%e8vcvS2hJKS#eq`w&iRE&Tx0;_m&l z?r4Ryi7;h7Sk+fcM{BNPJ3q)+u!P5ET~0-&bi=U&g|uuyr~Z7znvH>D3jdTH+y8=a zeNT|tVuIj`!&Vb>Ms#mo3j+@`BSq15akDT`#A$i{zK%)%XvX^0E9OJISf=v4ml9F( zH8ur(P)<>BsBxbXdBuw&2mX|8>WIcK_T~)h5L^XX+B7imEku~a39b}2ijo(I<(soa ztm@8Y2~g|Zm?z<)b$+QhsC;Y1y0T?|aI?dG1nCiar}Mb7BY8+``DzQ})v6cLSlN&^N)E-w)ih*{+Z z;qWOc+y+80RN1{jF~AKn?|8pXdMjA?29!En=e0yk>5FmI?Vs0ow0^*uR#20U`qTWB z$&jOJ)TR#sbj$qG?U)(v#*K5%jD%ybJet(7F<9Kaj(vZKxOK=D9iBfe zy)a5Da!f(zpQ-)7>ND>w_O!+mgL7vS-JDImAJktbcHlle<4B0EYwDfsu%ABbKH4qG z7ym3V>t@vb#Tko6yJ2-S`BOHj3eejLD9DUZ&$Mj8CX0CDHOlNnxCIH-nA6m|EpB^p z)LTFW6W5yRTR^g0 zmvM8t{4~mzdSuto*LU6E9@eN!k=XuTJF0vn*1f01?rsMF))FX$?L@>ETvly~f9Ck7R) z7}e$9MU`-`L_ZF$E6mAblOocuJ8L7wD=3>gou+?R>)RB5({>9G9u?uZ?x`G5l-YOx zR6cM|)U~4#;(#|mKS%B|+)P!_CvDNHk=$r&G+L;mW1pK6?d?~xkygFuP)!*f4jVx9 ze~0NSv$tI0l{>3Ob}9XY=N=Ih$?Z3`wRy#Sp^(o0SS5VjVSXq{pUzDjoTuRh4GNsBI^5Nry z;;`R8POpQ>mJcXW|>uU8kL zjyi$rHBNSm*366?A|(pUl~#v{9a(n0k1H50pyZ?cBHk0cuQRJJnBD2We_XyiB zLdtI8h&f{TR4s9DRb7^Btl65O9fCiqvFoKN;@0@8fubXq!QHxIktwi;DBG8&_^e30 zu(L}-o8-kJEg!7g^l!f#EB&*%w{-2cT;<7WZe@kml_M^wymik#KA}U0%fICTH9lIG z5B+t}&47ars+*ggj|)}bEQDd0sFZbEZ8X;kdnzz>U{C6&y1J^y8jpl74fPBjV}sFA zTzRd)oc}Zz@errmZb+6S`thlL^5j2%TWLWORVazLyc%FEit&dR8P+IQb2F{)I)*e;)evI=!V@ zo4aO=9IrWh{qMESrs!sxS!lFX1ZMVmxh^WOex`{L^lNZBpm}RuCs82b;n;z=#If^k zQbn1jyNuWN*ektuGCS8?mA4|5adjs^!q76(!$Vn;wvc^d^52mWf26FZLLCl6F{{|~ z=%3t>>qi#XLq=L3fDF7BZ5Fj0ZUIKIqVYQNf=OL1v&tduX>o%XuI_O2MeYX%8j-n= zoCLGes^^hi^GIeKP2w+lg)kpRk>PxQ--v_dnPgj^;id!c)EI#9~7UalRH z=vqAClm>U}LP0R-O~u^QS5bQGRoEKyR~7t^;P=P#zK54Vp?-BAqA|rW&ym95-B2!2 z{z{Hy!C{oaA`@7l1Z8@z5fik_Ixw5a%Fcg|W zwJLK&{uDFQ4bFuVi+fEgurb)}LUQ%6Fr z^u9;@rZ<^uYMs~;3U=`oV>R8qlM|t2i}!`RQEMheftfqYF14_3mhi#^@qgjmfZoGH zwaQvhX}3L(^?eG1N}6@6-HW3qYV8sbDG#Met^HvVXo_UylVK##z0bQI@lIZtILt~n zm+_^^PRwbrj@BEHeY}5%A_um4I~59qQb8w%pmO&UUv`hbRu1HQ zp1Wj_v5_uQ9`%sh0K>mK+h-s`yKk)5**S7nX2u}sZt*Ds=aFvENww3B_tL3Kcd0&W zGM<=ZcR@jclV0vo|DK_tya-;qdlO6)!~MqteLf$Oe55;^hV!@-Rv#0=6=ri~YG9I^ zeG?8HEkvBr(k0|*zdq1Gg9r)jBN8$2RhVa6#Ea_@Dmw3o>g?M4L_aWXVKe-nxu+gy zG3+h6Lqml9EkJQSxND{hDwX~gHEJb0PjTj2Fb#CU_S z%zgYF(u!D&7R#|s8T zT;n0bOwe4Nc5x99Q3-2Kemp8vYE`ZVA*&qHi(oVa>-X{D$5YL+a9u-^kKu~-4jxL3 zQdfw7wONv?Z=P1ygQGS=j8?3TdLtCAbql;6k>KLue%h;Y>0BA~45Mk3^(Rr+_K^z} zde`R)YIQ`7)we(p=eDfV(Jg=)@Z!RCWwq^jwkzi8e7kDoEWhp~II_#ww`7|mo2k|L zs#k~*n5;ZFjdgO3IcRYsl%;rX<3#!NIeYMR-qd>P*XfV@^TFPe;WJwa@mp)Vu6nHP z@NYyv4{%1CEi&KZJ~fVYj|s{$Q>At-R8TPdfMldh7Zw|^9X0&+nzV3p{8ZJMb$Y(M3YwG<@hYFN6Jls;@ zl~wx3+ueysZt|c(OFaS(a>?y{7PV426cQSss%BQv{&9R7e}ApaTfqP1#?DFVMieAv zF;kTqz~{Qg1kSCEoFsN}>@eT>%JHa^-;Jvfsyn6A46+NjsHC$z*gQf5Hh*ox{`0gr zj$A}$21q%!6sy3ik63)?)l1C9W~`fBdqIWXW-M-~Dck-!jG8(Oqdx3c6E^ zRjyzhA9Twnp8wPa+K9Jxaul688l!5b_1U=#HlcSx^T$>qoZ2K`4%u7ZqMmZD806ab zBsuYbMEh5_Xw&MJzsq)_kY*{g#xVu0ypFNLqGdJg%;()P=ZvcFwcPkEQ0}QtiCv*r zMxqAn%PRP2^k+bD(y=hzlLH@$;HqNp{AWi%aVcocwm3Lt_1VM=1YkgNsN&Pzz$yZP zXFeg)Hs&$gQAZN9mr7&1dTe`@n!C3EJL>4ht9Y0=YmHP9g)Q=mzY0*%%X9St>rgKy zl~o@%5+C_X)wuDLKqszY&TWfwOZqA-=;rpN3g_mPq&1}s|4{KU67FQi{pLt!Q8^Lb z2FHc(p?}a?#7Axt63-ol^M8m7@JO@;t?|W8uWhN%Mh^4n%hr2vw-c-Gd?EE~=4#QF zi0SyLstC7GW4N|>OYgC%gA1$+pZNYF9sVpOzS}4L*YY4+$^`oQ*lGyfNRqmJlXg&9 z^b|&U{wy~sy^DcHM0k0)0QmKRz zMy$dPpYZyP|8_S0v3L1dnV9iCj%-xI7=y1*NpKDnm9Bh(4ShA~%>DR14kd0x>^L2i zYc0NNY^^7+z5CA!jAn zg>JK2H(a21kW{@FdXlT;B!%2i87!(Kg`Usg(Lq4cFvi{p`jqrFl_Z2Ee(u1FY6_z( z8lLUS0KWJZsIVrFU^1?|h+oy?@(}ArMG#=usCe+u`7ClyW-!i#j5o{qm4LtnCvkg} zz}G|0W(`U@+HekY&-SKH#HA8*8T!_3{SRYBj3)#dE)n4mkRODh)p^!x60IDJR~%mu zN{JkqsAq1iiZ9QALl2vZtZ_Vi(amks3-@$e!2XgWG$mDTG^U>v^$XP3BVCBj))JN8 z`)aeRz!uSWzc%qW`MJFv=dsmi5nfTEbxbCb4%c7c^MZ;*3Gj|gpYRY{%-6Ac+@vn% zgMwGeT!nl)j*FN{F|r&~HNVwa#IW{jmHzIuwFWZXTfpR;`lhKxJb=t=QolyyC@i?L zLB`k26zlRr3LCmB)ShNNWL##@Cr{B9)U^iF>u_d%cLV?IyyXA*?j!o~M|g;)I>$Ww zbq?R<4ZVj2Q03tp^=(JiI{|s!v#L0sn|9r6qA>j!?-%)Xk_i)3`0XrO#Xm2W%6I;; z&<@6#euYLy9LX_Hkylh`UKbydA%pV4h@?2|pwRq79&Q52nouH))dEqiI!j&N$!~qCF#m832|;dF#!Clww}5xfzor zquFN@t7KOWiQvt8-ciL_nkT7v17V6au2{2!i<@4gGU_0uJseF;JX@+YUqA6d?{h22 z%Fi^9G>c86xLea&g7VVe57O>RG>DR@eO{~HSbV^goyo4*-Kf{9ioQkO9WoN;n*MY5>7?6O{}*Ko5_pm*&y<b>@D~}LBWU9ZfFa|+X4K+3yy_} z=m78FAKGl`FdoDl^&Vi9XSss!onPJpm-*8c>y1G!x@HGs>pX|Wq^l_h1r|b%+s&ZS z>Mqz2cDkp2*WDbRz%Nn`v%oz4A&n{I#oXKFpjEHp;P}zXaw%KXi;P__!T0BB;s!JL z9l5(^C&&0N>lM$ow))*hZC((Yrh!j-{r*l8^B(6RD8IW>QsZC;g|~`#a}l8@J1EkL z@^*#xZM-{H2K8x-cXQh7EZenrBCnD9wwZJ-ZBxDNLaH0ltH7T`viKohO){GE{qXVV28pS@e(B4%EybKF(kfGOJDB}g*(e7ElAylaM17k5DG~@n z7zS2Ig_#6IT%4t~IATJG%ZC?2o$5q^v(C1=b#K92Sp$5Or=LG|zB09`*tEFmc~2a5 zK!bLBI1#%Gwp~-@8J^Y*Nz3Nqe(Zw(0ZSv?Y#48DO;n(?50t1}%790@qzt}D%f$2I zI0r)yjWISJavYac$_I-2cui*Z;dDUS)TNm(JbYeEC1|U*ShN@{*Anot8VbFoxE$BA z6T#-wR}h*Z$e-qzH=1Br3r3u=ef<@G*)|(3SAxBz-7Xn9lSkn-;Cw=G5MD)@uVds%eJ(xL%f1LOsdu1`B}8lDHDX^8ji~Hv+mBbftZzP;`^P< zMpP5ThjH_ioc$~#!I{($b1=Hcioj?{BSbYyBg*@yAZ0JowIabRi#GxN(7<#ly0+0P zt`kl(N)RUhx?0nQ+W6hAhYECx?3MybB=n6mqZ3Jr{I@`g?bU!x%+s@cp{g%lqQhMD z!|d3DA3He}q)k>@US1QN6(Dax^ zr@V=qx-QBw>p5T5!?(9UvVoODO+YrvH6>Tp(u$4T;yaSpp`bKEhX8o7m+rZJ##@9& z_3fo*Ry;>~M#e^X!PSh9~db^u?i1S0^ z^2Tk|XD7vP0n;P=#yEzEiaVA*ukbRnH7IL1dszaGC>SDGtrP7o<1-PZFm9sDc-_h) zl_IDkq8{~+`0ovHM1iFl2be1H7Ura;x~THXA=wF|X(ja^a47DKHKA5nFBi_|@dUm6BSc4cu1mksPUstLJz&3MVdy>_zlA{OJW?T5-enL;+T?ya1+nbl2##8>QDexE7IY}t%9^X!WM7o^Lifn< zt^t}*uZGAi&|tRo1<5nR$De5QgJ3{r=(L&7SZm)g9{(XfxK1QtVrbllQHb(+IX8SU zIrf|??rDse7JH-sYVIviTG(-2+jXH7lri_@Dz9;mNX5Fqy>t1!1A}U7Hgo;^90=Bq zX2fVmm~n2@^UjO)nPaVSS!@X#N+q_J-5&C>vxY;O6wl>m`_+7E!EufrPX&Kkha#!h z86lm9PTDqDFkiUXs5JI|&W5QQcpb`Q;CXHOp+}GLuUPwDGC-c*0%5KL*T^N%!L-_> z!G-IlYZ=h`EdZPm)5~&;mD=@h+`9#q(;(wEfggkdce;bIm;se&uX;MVCJ86eL;tK< zs+vLiyFDN)(V?$4A48W=+Ba6Ta+?tCPJq$lVCP&xf6kBKo~$Vft?2T%kBUxKm@mJu z4nWuhNM(Yo5S`_6O5`d&lidt%zBw+W6QusaSa6v%{$|zm^el3^%btmCAsuh7)aIG$ zM7`Fhb6@^26RzBhn=si?a<-{i)XG@=PV}lr62v8@2o-5nAwnIoF75qN^)x%qycsXY zxyh}1XN{gSArFgHZ>oPGI8g2?+n?60?;AaHeuz^SV2Mmuy2r#L#EnwP0k?^(it+LK zY5mwEi3GVJmG&N$;PSfWWe-CGzgkiD6b(zEdqrzeQdQ6v1gSCW;V;wcUxl~`imRh+ zNM8=3xcur$gx7Sf@~NSIP;n<_LRF4I@giI!*Tf9Xdt<@gc6JcOO~lR{Gv-QeJabg1 z|FFBu@!aaQi`|E*Oa&>fk)5Xl<<9T_b`J@sDIe7}VoeF%F`VdeAR2fKn@qO=@1={b zRRd{*jkxB8YA>5lflY3q`3;sBO6eomK?Tp3Kek2ub<;B*!#)1Ja+J|zzs!nKvw+hJ zgOd{ohlmJvYZY;zgHFIhB;LKaO&lI2`N_~b_vdUAix-M>>4*w!DooS{(i0U4jj&^s zBRV(BQBe+x9Htj-hgR3bqo0*ca5JU&QsJo=W;kNlpgrEDvUNCDG?0F_5TpK5ec4i6 zypTR_oRAb_vYPh!d3h>n()?_pmJuE4hg*Q&$MraevcCLn$WcDR7oh|^ZAZ65?;*U_ zS7!h?nGxyChz!wPa%@#)38n15XkMWvX*qAoqW_J&_Q_k^PvY^iqWwaW1=E?Mlj37a z>wfS0EJE%(+IHd6`b?RpToJ0Xu4&N-lxt#%gITCK$_2{QrXJHIWA|Xqoscd6Y8kj+Kw>1Dz2eYMB&+FO z>~~ms!DV0Eg2u%E-o}|AmqJj*GqrRQZ7cng_(dqtD6H_|1E%6}4s@_#$OPl%J5I>8 zr^(VSut1b1%->-zI*`1e*q`DyI@5Ivcuq*#CfSzb%zaMrHq^~mxa@QDG@usFA;V4tALcNs{#Cy6I z;X>h+FKZJ?-CD7=_2cpD7hFCj)0&XxYWu6q@N|UF0|f27s+Ok9b7BO-GtvrIJKV3j z(lL!dm0pjok`o@pJTl1FVZ6F{dx3$LD1nL6Gf<5{<|_D z`O4Q-yZ*x$PNsy zBI<_-*<)Us#$I!u7^FpZ=1RqQLfmeFm)l4oIiv@B1lM$%R_G>YcJwz$O3=BH#`S!d zUufw|EB98f37EwYC?wtST=F^G6-y^F!5Ti-65KmSPIkQowiOJBL65IRtOZvr=QCMc zW0+10d=`9e%!^K3xRVQ`t|l%L7;XXTEC^wcI}KEHj>_N`XqL9Q1q35~=9{3nfvtni zWj5Bb$+R_S2pM{vs||67KkVn(c=z+yNX%y()j3ut(9_OhJP76KD5Zt zcPzDLz{Jq=aN1_Y@x3Y+zl5vMY9b00@M#Y_m#`69hk3KYDj*!Q_0~VLyMnPAfgnpy zOt-GfHBLrab@~#h<(efdE#)95L95nZ?-~ zis1Io?t~ijo&2TG_en}~%~YLv%MZ$jvqpEbJh&^#RWT^;Jzw8VT+hz5+JpkNAv+6W zDVW!ttk_)h^_}&_dqFo*6LdpKqKJgA%~n5y8xPzu+(cD8{d{4p`bH@|EwGv zOy@l7a!(*o!#QdHW=G~-lCbU-Tfc?DRe@WwaliXPND9Q*9LjP)6HsuXX{EelU4Ew^ zWUOGYxr-DLz92J5$kioZ(E@h0BrsIE zFHtdkEnqs4cMD9mpnuWf|B`&|cvdJnAv`{#J#_5hQ$b82*`U6Bw4*_#q=JI{tW~HI zJ}&dTRAs1)@10|CSKgOx?z=T0=Ojhbmy2*+M4=~no(4-Xk6JQNVb9bE%^d}^$Z#P6mcHKJTT%IEl7F$Jh%9gys3_M~_4 zeW@=P?xZgN^lZ5Ab31?-&YP%er{i+~51pLdEEV&~F*|dPz2AW|J;h(JZXN?52{GMS zd&PAA=!*HPi_cVydt0rWMco$H9$`ev;w-YXvs24CmZq1KLS1EoGOQ0zqOWUvxcp5i zgG0L6him`dirRZXdi_o#Kxp0HhyEf0W}W9Aofw(kpIf{d+P(`*3XnYsIpOTnb##9s z@L`%Zt-60d?celO%7rB3uVL6d5>G!@T1riOy|atly>)6{#TAJ?vrgpwetn!F3BEp~ z0Oe(;P;)VDey&2MG!@NRWLix7%(jYUf_GB{bi_D~uv-k8Hiom^XE>zhL2ose&MkU> zEWV|cn*yr#*&C)iPGn=<*9r~Sgsmks*A^sTfoPIXVY7jRo_Rx2Iw1)tt(7KF^=Cp0 zG}q=_6T34XgT3GiYUoGJ7;`&r0XW?i%*2$LSSaEx&|5d%w(a<#Xix#eK#rIx67vxQ=gG*;=H#fTI%-!$Bd-x&UdKe}_*Lu$eqH`uMwn>%oN^KNZHgy{-2z^O z{8K{fumkXU(Kr#^(-@i>SVo+iFn_4(S>qA$8qe0(ixCN$Txu}gEnv3Pn6Up0+TP^D z43lrYtV&LN>ei1#AfMSp$ZRu~ov|eAMX=c0I8B(F;JZFa%!wTs8@+!h;l$Y2`dG)f zeQ;q%e(y^zSD0-rWa$kT$2M(>Z71L-%6nSq2M+w8PqMc!!jOHZLigQ<$rrWy-kgv0 z?fH>&Q?=e&gke%;)qW;sQ9TM(?Kd<1-e=|J^0HTAI3Dx+!Z#8A<<*WSdSF%&JfG)!$iu%hpLp|UWq)DQZ> z@*<=^m%Znszu*&f;IoFGjw>&An#Fw4i1<&cGikhLg01BbMz-cJu?yR)NdxFAKdaDO z&4fYuEednj-`{Ln_YyxD@r;=IX0Zweh21f$G*P({`>?Fc8t1Eb_#R)O&Q4BjVNzGN zreyR)`?;duA{Sn5giO;!Fzs<6%=2;O7Fg8TJe-^t_VM!aqS|@(u(~SZ<=a>5XwP2p zVveO2oNLUeSx9(~IhZ(^qO~|!e&)?>UtPiW_nj%XmVZi=NQ%YLqfMr+Ij18=KF2xh zqA$GGL1FOZNr&EaAT>n<9BjCV!E_6;KquK7INO5)8A97Nu~<)YW_I=DiT0T zujU;3m6HkO(dDDETG-yVo&fJ~r3L57pY&=-yTMN$;0ygrcD(Bi_k!2n<$b!T8$Vrs z^Bbuic>0eIjsJcj`cLP*1T7A`dBcsU-UQqNgME%;K?@t+1B)2Ef*;-q#r0WDq!LFg zDvevh`eFEu5B`+^w?cT4&?e^<+vXnpq`j-ND9d8KD2Qv`_+ac*lz*7=`WCoReq$9n z`{-<%M-m2+?92Z&*}u1by3WiMC7pt&xYM=i&rd__+sW7)mN;*LjyLe`6q{Zo%rq}b zg3+yfg^saR=^|*N(a4~92#2_r$FfM@tk@rge|3lVJKSk>=ng^$dJd@S2l4MW;~c_P z>gnx1Z+5kd8ph}a(k(-J*{jK00Iff-P9~TGqD;d64{ghM)|?F7WebXvAtJy+lymH^Ll>Z5iD$YxgQK>Xj<@@WSvr z7RSq0MKLp?OcXhWnh^TfkwS1wxj*6|@;*-A3sD5MZSk0IbP7{aO zZnL{$z$uDASt^kw%{wc$f-as1r;|?PMX#X3*p+1}mZQ;w8b^(UfJB2*l&OwFL0(-U zThf1SQ${aY8`nO@WQ*+Wk6g)M?LA>yiZ?Am=DTC}T;)#pZf`Nnks&4p zw{lvIt(v5j=OhHO2C=3{iqgX%KT2h*9pW zW+%%~r!byM-O(3~H@`AJ?@1e2IFNa~b6wFJiW4Re`!xM7OlareqH%tj{bKZ3-Wf#B zT@fG+H58v|e*`)UtmpbN{ACo|3egT>LVuZ)6Bj80N8|GWhb$V0rM*%Sate_dt{%DE z+j=}vAY1;a`Zq|wF*^KzI@D(`d`Vd`eW)7BG9vv(Fb{xM7d_b4m2_p7f%fbNq*#jL zi&WPiN%ikZn&A^Yc6cBrpw9K=j+^kLeo@9!L zbhv8z)F;?oH6?AvoVJm(EFU@IN6X`a_p!8z#yja@r`+<3J)J%I_-QF_dILkuE%;aL;eddL913JeM?x=e zyKBKpOeaONCenE{t1L(@j0yLlD2rWasw>eLL|0?%nw>g7t0`aB`@x+Isr*QS1%{1m zN`MDl9A+pSZkVw;-79uGxXCw(TiLy+xLh8IP@IUE%iIlI7I-OAFsmzz8PUL#WyD6U z!@u$hWduChaG-rtQIuBj(26$UdCZo$FQWKDM}W}T&_^uMNTmR-NR>NaHnl`J$%4Yv zaK8*g>@RiQyUmwpi&p*l4oiGjaWL);ci$!v>$haiROr@VYmqvo#GVOdzpt{MwTAZOQoW1(lRQ$E@)>VKe>w z2ugo%J|;Jz=)3aVM$R5KEsT$@K+X4R3#n?TRR-i|E>-p9XPPTchnoHG622jQBcbzJ zIXVo@ek}?%0~9HLOJ|_3NY9?$nzm{SNJks6a3Ba(OL*cWAckiQ2Ku3&htFm!Cr6xS z$x-`d`~9CNUc_s9p}Z`sws_~k5vt;A)dTMqeE-=ETgMRujTV7$y_pbi?)3u~LPsm4 z2G30No>CebM4}T;{dwUL%aGFPsl57lHqE5g_b5CBG!3%48bfKXc)B=sv85?yQtrMF z>Uz$7`dSRlb*{2V#!mT~bpeA}IFv4{672PihtkfL8>_B$KVvzy&B;*?*--;I7CD+J z0q?zq342`_))QlAhK>V$XFf9U(?7p*arUYdS zp`{&v1b$K_`-A7B_OhAWGW)85WeMfgBga*#aR#RTyT;8#Ms75rNY+D7@hg(=4O&vh zedSmQ?$4xzoAkqy_p=p3tIpUMCnQdp;>fqNL3a4WYVC91{NxuuC{@%0-RHb10V1<> zH5Od+pP?kTK&FcmaVz$mrUYV74G(TU!YpANDE}dVRjs2gD~GJ^xqo7|K#_eF>g#$^~h!QI9J`d7vg*K$DFID#o79F6iciT_O@1jDOa5)?viw;z^d5$e%kXvDCRTn3TAI^Ri z%pX-{G@Y6~-j8M`Q621sxlkdL{U=fT=XGaRRL1PCJt2E`DBh$j=J1qwuUQxC(T!a> z&j^bKRuqLFY-eE&n#q6E98cs;y8pF460VME^U+nW$9SL1GydY@rSaUaG`6t&TD{MH zLz4b(1*dL%X&}I-jH7G}V$gfqavzDP#N^Vr{y`jv1V1xw9s#970hX<~5s8uhW?!?U z_=O~RlizMJm!EqHRUf#CH)wcpDv+O0_#ocdq>_LR^f3)ghxVvkr4`sn$u;_1rnRriKx}`-W$6LVSLIqFHsW)_oQBzRVO&NC6Ej#0nqVjL&$8?w1 zu51ar!_}C7x3=sS5U%90yagUP<1Qs8MX*P{c8i<6X50@SE*to&*?Hh(FZfi9Hs!|s zm{3Fx4AZ&s;bUsk@$d{?qY@IUVg90cFTT4d6|=J;9;*wNNnMVREJglt>@8mBo!o;Wg*FHPPp>w#NTTTgZrB`?eE`VF6dVX0Y#@Qmy7ZHV#VHH06EC^}yooGEt&^$LKh>uO|g*0EgXLUc%HXcA@M~ zg;}+9Ez#P^GUSzI&YCd5H4;;Qv+>`t$^W%Fv?@cciBZ5D%}R*%uZ`(gCnhHpMW~}x zj6`LaHZC+yyS)Str8KL09bk+uEv$2JfbhBa96b1Cl2-Z|%&zN$l09C>=e{>`K6-!3nQ`h+OzDSWOAnm+ix zog#E>B54(x9u2(_0sJh~P-92bD;+v^mL*esjSp(3TicqQlv|BkfBRRnD2 z*I@ptNBVDf;rHJhE<{;Ec||dyXZ8H$)7T9`BVsDjW$hE(Ogt)ME$W5cu=neJnr2@J zhSDQ23Dkb_m!GIT-&?#EeC%7~c%oL0NL+Q_gVcS0yH;YyI{C{kHCU}5mJR$3^zGq% zLhwcam^O#&Zx^xe_ns+YVw-c99rkZ(6raLOB`B1~Mat(c``bRT`P;?!8r|>UJDqWI zzY=`prlXTv)De3%q?!C>tzHiJ2(X|$1!W}(791Vmhm`^VEr1R;_ziO`{(mlazu!3} z=>0*gZ^5;^xi);0XZp$q%!c-w8aj^nQhGK!7qOn{h7ga``TDHXyJFMKo|melp_{0i9U&1n%jJdO5CG-cVYS{8b+zF7Dy>1IRR z5+mx^IPB9Ya+_4}$r}m_W!rSIjtB0G8Y(}ibX`)y9~f2T-_%u08ZDMKV_&ZUIkXC5 zk9qRgDx4y-nykhlD+eB5TVPgCTR03UD5%2Zf&%_;+<^Eyli{!BmqV68;Y`gibq}a^ zuDEn6?QT7My5J@4f%O{tP;FzCsa(I(<#~Aa?&C|B{9vLA)?5va@obDmtboetBdY$7 zgu^{NUHB%KXs1aOBE^F+FB}ChN)QxVgw-PjOPt(0Yp$#5lJJC(-UyGe@3Df? z+|ocSCnNclM3t%2T{AMLCl`Sz2YJdTB)RF67F_KZqSHL#!SFqumn36rI*-r!mH45g zDHKH!bT@E?X7UyWgW2HVvY5d4L#j^1>`7-fFDee*84Td z_M2(47A6iJEZ||3xXA@Xg2G#%r4f~RuR@5rQdAeOS$xs(}lr*&1n|wkgnyuieM% z9Yw*K$zCrt-Jd^;iOo>kIL)`o*36(q7L@P0Og?c2gB(ICPQDTFU!QjWU~0lcP7_RW zKkGGCggOc&3Afo2?loQAg$!?$mX`%aJUYxi_^J~n@-i*6I)h$@^sb=oxF=TOMsWLD zU&dRX9<>G|q4DZd710*>>_PBlU2rgoh{JTFZyAP(U3Kw`h?=f@(;WwR!8xaVkAtCx zo{J*x6~+5TpPAFX1PG*mFdz-%Y%}*pK0uC+vdix56k5&jgy=6))$lHBBvzORhzKl2 zvIk+q{^VF0>&qO@-;`=}-NEa3GQ&RgS(r88j-MEl>W{2B3AKs>Sy3LSWtFTO2s8-s z$9)tFbI0daI){8t+ZB(ipjCzIV&`^?UuiH-<(+7uJWy-zz)aIziNvEe#n9t5mR^-?SE>sv24=6fxo6mA$iEJHM!%1X4krUi7X>+ly5{y+ zq0iB@Tp9=736m}hC1Fv+`fwxbKAZ-gF^I01n}ki?e{#L!`$@omRVMX~^7CbERr3a< zAny1C4dQ6Er|u`ToVi)(G3KZ2Ry@51Ri|aP2@KigKG_){YdmMRkK=Vti@Zd|u{q%! z(7t9q;4HK=hdI$qJYc*5ZJ^N;F-`BjuLY72$4aM2Fp(3O9Vdfy8qCWFO`a^ub=?lnt~UzTQbKZvxp1{~6Le1lh?>U9CWa^?L>G@IGknO`t>u8QGHfOAY3`rnWx0 z!w|(y{QP~j=^3n9%?{G5eA!9Cl{)>6uz0I$o&xRVooUBIldSs{jDm#c$=jqG$mqAg zfI&sWYc!OnLi8G$A_R4YH5BcZg6Lap5jW``xCo6g+_%6R$8jN)V?NJET_)8OgFB?L zoEx~xd3M_9nQwEMvbd5KiSJ4^m5%%*M%pfCeZ6gnj*do~FToi6$Od+%K=<}v9oqj* I`_A-FW|PJp06f(Lh(#$AHDTWAObcX$6g zbI#1%JCj-OoSAd(yZ?G=R?*ef)m623ZL7U^eRp5(z5yPJ35f~;;Nai@A7OuhyM>3l zf=(ue0Dz<?ticSeEr10PYnFTz)uYP#K2Ds{J)3+1zQ~xx~CGB_9g}Z0MbW`H2lbM;5kqPjlHUK~YU<=Rzm;mSiPXQ7DOMpGB4zosJ{*DL4 z#&&jAoD2*Ww$F9+Ep-i^>sgvJIO$k1Fg<5v0C4j;S?TD34eXxk8W@>a@KEm6H&Z?} z(dVI5W|d@=v=T5dHW6{LF_3qWQqXe&>v8B)^6{dhayxN4nOm6~*y%iVGB>lZ<#gg9 z`OD~>u=@9A1`=Lw8+}7gxi><;_JDojA^CMJj*gDc9a)}R+88k~ad2=jFfubRGtKcQ)X}xHx8otPw>Qz}G}JL<)z#N$rPtMA z(Whr(($}Ha(Pv?z*JokTf62@Rcc>oFLZ8JHVb*xACc z!Nfqq@XMnAs5xOM`rD*GL_Z8hSRYOS8v`9XgEz3k`+a-q8QJJr7#04y<zyfCAI>Id3yBUB002vAC9ugw*J*0d0k&*ACK6(JNF;Ib+578ds z0r8*U0r4Ia5K}%QAR;Gvj7RdEgq(_+mW~$xDFX`w4KpPTEzS2q;E?a%M@2!!dGG*- zh7gaC=6ApDJ_8=zhj&MiM1Ug!z(0aRcm#LX4gkSokw~!U<@W&QuP-=w1Vp5J$oElD zAHX_PJ_NwSAt1mbA|N3l!a|pD?y&Cxh>wu4o-y&?!SN zPsXBa>V7PoLzk0`}+9@1O`P!Mn%WOeu|4v&&bTm&dJToFDtL8tg5c5t!r&-@96C6?&%#J z8=sh*`Z5h&SX^3OSzTM-*gQBqIzBl)JHNR64i_8%;TN#}2H8Kr^#}$RJR%|jBJy{* z;NTr$1>q4Q(le%eSp2fcI@Z`E%wG3#UWccZwxE!*$n67lZAMUW$zCjwAAASxFOdB< zz`Xx0$o>Y{-{FD+&=BBYn}_fSzz4YG!_iF=1ibwD|KGv@j;tWyPbms=vn_!BBVoKd zfcfWJ`)KDS^n2|o6{c2bp4Qc#G|D2T^xGl%w1IW!kcAqSt zFl<)35|0Qvxw^fb5?v&)KF8NNZ-ktRATJ141>6?MoN>~&#V*~eYt*T>%w^^Fd{X(I zk0+$yM%*)m82ao)&A(DsOW<*50p!9mtL+W|(7Tb;u0Eg8w|Z{Z(^bcKo1IeWZ=;2u zzwY7=Y@Kma+duwt6kOujkhyeN)W=8QcZNc0I!p2Dm51)jssrF{dHc%-fcb}Ta=Spc>lE>OCfzY=zD+@m@;5>F`W*O%@qq2I zWZ`9S^tcC^QJy zjsk&Niy)2++z$%kzP6^iXIEwah-d9H?G^Pp&#-IZoVNnGT)8_>F;fO>>!`CQ+iN=2 zGscehg3rPi>}l_>5Y4gp@r~B)&@P7gR}#{VHYOy0V7>$Rfj{RK0aZy+bWHlXFViP) zM^6ycG&&eb`AB_~XjeW+3LUnzseYA&L%vn*R=!X+%D7gYJl-#z3)s^obX!8i%-vGU z@5}4sCW~#|b3kQx>xuLnTxdbht_im{&C*6|Q+v zU4@(+Lj*$=;u#Ov?Q}jWtg#-3*z@8Z?B*@xVX@j;8)7nYd~@faO$XMAEEf1mtAGSX zRFa7fwBi=kWyyov`VrQAYYd9&)+KquIC35v!W1ae@1nQgOP@dNFUM7AJh)7g+e2BB zid6iv6ZCMKn?20jOmN_q{gsn+k*x>&c`9;T&bj@e$962~1;hDNlql%{sVMDZ2M#)E zijpLC6YRQB{M+L`=wQ7kM4NRoiz9r9e_A*dm?Q}7t6k5GC_|-6_wxPW^uLqEejNkX z%yPG4rlPhqE-em2fV#sKBDoYGEXlEb{3*fcuPU>td^f5wNU!_ zWy8bI4{b071f5d6dC7;em{q_QW2(oZm~&xwfFi9Sus!-tzJ^+NS;y1fxY_t-nrbfD?c^&6viebp2&0F!r&e;||%E@%1--yq$k z{V02K*_DVqp5UQXKWE<^U@~8e{a~dl&?2NU&)AHVkH#qWjJ0!$UMzNmI0=n2tdSnZ z8-*Cdf0k4zhO{(sH0*akGlR!eN-q{Eb83p^f=nuQ1EUn8<4%a93)H}BE?O#LWp9>B z*WJ-IAqm&ii!<{NHXqQTX?+db)1mYOEot2W(zaq;ZsTKPu#lBN%xh7+e)Z2}MGnET zcJtvGf;dY4?6QJ5fIqh|n`Bh`uEezpy(?#82go2g#(q>$?Xh(v>B0fm#)ZBc;)0)P&OFXI!#?u;4W%Zr8`{OUP>Iy*IAOAoglR?7Zg#>n>xZ;An7U*dfNG_dntN| z#vR>Qc~A@Ez<$cNQZQW9RTYCg;Fxr3OPhbAF1o5Hr^DIcsZ*wgH>R}d_WO(QWu^3u zW|9dUSv5i!I6t(Eeq=5E-Zp^LJ!$v`@S;(uvG;gKjYnJqG?tKb$&|7xi2mc zA%0p^5hK{b69&DtH*HK;&D)V$1zKTr5mI7P++rV>qrHQuGGF}}WTS)}%q@xO_d>W% z7vF(EC+uqA7FcO(4a};DoMjKC8ZT$srd{*IKTp=i29HWc&}VQxlQ<@BRvXN`kR5p) zTM<)_7dutH`EvBDCWo_udB14>v8;x0Y000l)*p|lT2=0Rt9O{~l6)fbKIgegK`C3z zvP|nyH!f3({D#^`^{f&u4F#rMKT41t43$hNaxO|=mTq%DjbKQ2;Hd=3uqo|^uHzY^c zeTfy6leuYSdTFK{GY}PSBYY;Er2&D%R&SfzQVg+Flq{s%3n)< zk+bMs60O#^(#CC2t-R-JKuLe30pzJ?ID(KBJf4a)eLSW)Tk33}+b`_`(O`~csofdP z&g+VXnHX}$f68I~ADsa98&V=rA=Dt1>vJNdkC-f+jZ~%qY}Ugd6zrN87KPs{JA^3In+n(*+Aco=b`W z8!6$5ZBrzcfF8HEQ{tr&M>0uot3MDddwhuWx(t7XOOnJhV)all2(MFfCOyba zE^|M#q{PlTT4z=UVLiQoKNqSoCk;m2JepY=2)36v9$GerAHY#~8Ed7YD(UD<(`E_( zge<&{#w$V+ASuZFB%{Wc!q=yZ#7~K+Wl#Dyo3sd7PDUMk*h%Udw}Vw~Giro{poW_} zz(yShJaN4XN8_y|g*XFbNH zC@vhz44-_1@t(P)o3xSMx*5ZMeuo}=+m_d;vNiZjYreOh<^uZBZgp`G<+(D9P-vN@ zsp}^yK@?Z2x6rzv(|JX4nUF z3&ih^Eo{GZh-BEMGqit~EnYJx%yH6T)8pxGknbe5Ip6+xDq>TbO9LuurHU7z>Fv9X z0ASkDN+OQbLykRlRmosX6A93+!Ege_6-$epTLb-rmL`i0%MHDSBXBCq52aQSvAV3k zww$J7*tR)pF-_P%FjvrgT|!Wcs~;(^n}xu<^Kj=gDRE;y(epEvu-9SQu^QtPpTMzf zw*rF_LeH$$aUdm~ywMw^t!nU_^Ux2O&<{H^O~a%YVTD=2g-ZK%vK?$AaUP~>`So9ujM!$YF z`jtAk1EXD`#eYpBaV_!Wv^c;?`;Dg2m5PZ&o6MlcSozxZrfVa%WOL9iZ6B4P_;C7) zrVaUnGe?)jcidx|wKb*Y;^IC^wG(E=FRE(7*P?n>*!2;ROP&RGRf<^b7SG73R@N6W zy{LcSOt0sy8`R;|qjRl^44j9as|%D>*LshMSNYLGPSyjxC?_M%TI+DL=&-aml&Uc; z*xHAP#W49_5o&$#bYE;VSe&d>AE4vcvt$+kbNIXR?zAVvj_%f~k(-aL+@#0t<}3R2 zax^QzY-t2KUOlZw=i>TDJ6e%>=X!4@GXqaFnV~!Bc)`7KGsk%p`?rzSME$lK`<$9o zs%?~)pE(gO;sHlILPs5!#f^p8VRj$y07ip4Rm__qs5^j%J9u)0SkfNWvggH%NAkmN zZ!fbet&J#BOb-OXXdQY{&d#&S=VhWGUl+l%_uPDW8p1rc;b894Tm{E4@>lEB_MrRc zp2~@mc^kOZa`&%GMmtevHjYzKu6b=28o6#8GwuMxg2rPcflqx&d|wL%0TnO+M0v-* z(a_dAFHn_y(B`PzFSMfZl;_JaPC{ADsQEOp;DLNQvckc$(!e96WO=|H0OYx%K`#1Y z$eOE<7T1wO*EK%0hHQ7&^<&U`1sRy${O~u1{wrhuq3?Omyw)Pk^mubd<0b2#;=vih z35gaFfPrC{?0ufLV_Wj-Gg)=l%`C}}5ryV%dGo7H#k04&M&4C(MTHZWasddl>p zC9++U7A9f0GfE8veAWhyQ- zCp|aRW#md22DEaN%^xCkoiRIktQse(!XCKga`y-~@81E`PHIvXb;`9vVg%LOd{{D`v9IBPIZp5k* zZ&-Ss@!3}1`stL(r~>kaGI$kgGM((uB8Vdg_{CWGw>|6#5$BNwcK`z?wr{k$>fR(0 z_D|OMvfM7TVh>^!jw(NBauBR|Jzzg$?>d3JDCR6a_JW7X z$%3`nk#$g|Q!V^*;v65O-vGsk)COJio`woEaj((5V&C$1;`3FhPUiZAYb;9D_B1@m zB&S3ZYeW#|^LLx?AIpiSFBAfO5m#}>;p#A3d(6`;wny9HL2L@rInbx-Lb2j5;}PA^ zn$W^!Calzs>rhnVhq2YOkN*iV%W)wJ>5T|U(i@z|T$?20H`P-TYY3k#T@D}PD?9Ld z9QdW%OXF(KM%KDbD)4Rlbm+=Od_RY}#ETux)Yt9_s;a=i5crX__|Mw10Hv+LDW(9i zTClGE#cFSk&g}T)Bnch&lCAxNjgtcX)y7~7t+l<05eKFuapU&@8ebXs(AJnB+I3PO z(4h1k3@hzG|3!spUoTNak>{~)@AaRytR>Uh#F3dW<3t0;Pn4eA%a&bL!d4d2igY?k zC~)4zwI(nZts#E9HWtcLKms;Ni|A~f+eFa|-BEo_*o}OpIG$DU2sFH$BiY+819w%yJ_|>ua6IKGM zY?Z8rYEjT`hFEI#uwlM2bQ^z0>pn>YUzvoC`T0e_F4J);x)@FMX-I6!vU}P5z#Smj z!*ut$T(+q+C#&M9Xs14_?s24kzJu4+5DuI}Q0sn95XpgI^i2lXgub!-ZTl$a%TGCJ z1fpHT-zM`-+2k|`oc5kN#}r)nPpLX!{RQUH{EJte601ODdbEbDz1UNkfs+C{@-qr|(Yl zU2`uDzDyAt_nxMoTDeH6_w?m?q#LHMhj1}guwD}9X5xCn>KQ)78L|#j6cB~(f0QDB z{~SzcR9%FCN?j&ONaj3B=e#a@2S6>nOv*HfwXG#W#Xz?i@6_3FlNVN`BPM~g+*415 zcR`y}i0=co%j8#i9B|ywG}I-H7vlAl+yTb;VlR}FLu(Amlfhrg#!$CkljNgC$RY^I%C&a-NQ%_upNVyyCmY*E5fN(mL5eU!P^ z@3!v~9%(fS0<4k|N=wIXiO+Sk?p4;aF);?Q8ejLY**0LH+yPu_#_Vlv7k7v^gu~sS z-JzS4=p6FBVXK21-rn9^ZMW}f>UE=dN{=PsRHYV{MkSdCxj5E{y2QSUj)c&x9brlPaT z1ObHrcXgS z2p7s|N1xQstgnu_noILrvSjyW1gmwDRUD%;!aNzDBg zRY?@xw?{#r7~Xnh;zNa$Uv=B$qyg8I21*$g%8q%spDVtPIrD&AhrFCLoRts>$SEbr zFPNL{&Qdd?@$`2DA_1_6v9$~&1WlU>6L|8S)Nq>ali?_Yx6l;@443911z(-knVp$_ zrdiCDRnB|Ss3zz_vvz7*fPVOls5f!kU;6#q!osc^+ORG8{xFK_M+OQWG<_?OWup-_ z!^s)TM^)~Q<6fi>!YKHP=S*zd4{5E7RWZH9v}rvIMTB^8Qd+Y@N1c_K+WTo`$?P<9 zJCujDLL>~1m0w1n)~a8%)_hWA{4!F8*mYNV&Iv6#CIv&Y z1*a#(*E078&H{FdUW~a~qMfamRgjxUS)gV5oCT~4(b7l<#EThELf?7sbac#rX}=$& zuWHqWXas6?o$!2YtTq);am8@VMIBPP#T|n!<(3do(&ETA*WeROEU3HgiCJmR3+*8D zYyBcnwD&kd_r#r2VDU202e<9zA@LGkC^tsEC%Rm$D}$6&{2e z)m2&#y_Zyb#`k*a>dD^vdx+iQz)k!PqZN*Dr)*B>b`$$jxgWp1$)nPhLG!5ILw{<_ z#m9WiLVffxUm7GP%ad8ja}Nug+$=EMQuL+w4-Jjb7}F;&G|BoBsT>1Yi^4xz#;!=U z)hoS&Q=mc5=Y-RW!A78&JFWk5; zWqX%Z$Rma0?jBHx^9hR?9h>7Z4~;7C^X$#J>~1+J!0M+%VQgT$LP#UtVhMReZPM#L z+Op7h@wlU_g0rPum>MuW%pH4O??31AsCxlvKr5@ccze=aZbD%pF45sZ%e84m{1_qL zGX>J(rFvFbItu$iS@*b7IpG|i6gJJAoACG})76@|n*tYvWMSX)O~K}uo-OF^QafM5 zsb76nWCtpgQj)D!V08lZR&hT%boEK&?vt*Nb|2fuALXbH%t@_cFE%7#6p7A{@~!DT zFJrB-B-$A$%m)o<(~uH7zMSHrErxpekQu*slnv@XKfBWkv$OM1O2z{M50 z{g6bL&D+})FRa%@NjaZhIlc(w{d)0v8Mp*Wvf~sCW-_rVJgB<%^qk9=)QyD}PC_+M zryeoHG0FB=E+8MS7CU*&$TZBYatzdhXeF3p1~GUFI4kK1s`dO-2d!0YWzG4hU|D$v z-HSoIh$8cQ!Q%mL=@PFfJnK8O@aacDhyCtq$?oZ?f`^$fUU8m5gpJLQNl)t{(&9#Els$#b8n$V7iwlomPDQV3|b+EC+bERk8$Cg0hBLr1_MRgqWUZ7 zoRY21e1;88qIu6+9vyc*v^14eiCs?JzUs z7V)j}*SIvy-{j0Hj>{Qvwz^7x55LGOEyL--y2Mp@J7j(nHJXBblfJf-&T_T$$+L;C zehlPm*>VTCpSDkQc1=290ge5#bY;;N=V&20b8}{YmWnA=T_awgQcOA&OM|R&(DEdw zu(Y(buJn|(WAjji8=ZM0Hlp%;Y=Ppd_HIs3jqlFkYRw0>kvqTw+dC(u@#<5wRcWea z=Y*xL+bK1`X~{v;D+4q8Cj*p8xF}fEpNEhuotnM3Q?C?WQZI7hj`OxJZuSz$6Yh!~ ztrD$$bkDoR7^SzH$8>riQyJW4?`}@$D7O{|jM4}7zT?5|>cTg;O(SYx(k!&)i$$O5 z2r#e|G&_o4-|Cti(bgDwL4@4?a1*!AAqPedxzO|CF){?00JJ z3_n-y4KYhTbqvg*f|Ssi_7+aJtZOK|cI zvrL5tk~*0j{>Rw(J&(FuEiae03i|7UmCH-p_AcTl&3wo0Q~mY>+IB7~e7-PIsj+7D zJ9Naq@ZpMETGIC?JbT^s#V(EF>&Hj$RNu))xS)?G!&F z^Q4I@d=8K+Wm(JZq#aKa#}uA#7j?nwmE2YuG%k?aI1@j5O(Gjs&p>&UzdF!w7W>#k zhUb-a&_-U8&O#=(Ckc)T^e|;M;fDS^B=X_XAPJ+(V|JkC-VUiQgiTu4$BLENB!-Kv za;F@|k)D?)x^u0qjCat^K zUM$M9gI6yRtR6@~v1mP{g#5zOsI!oT+IkkL!?JQW)PI*@LLokmHHirs1D0K5RuD4Q z)n~?iyGWU!msxaD$9Oz-#Bm2GRWF67TBcuDhaCuq#|-QV%se0ZRbf~l?Z+ch|UT`#`c++Q=i;AF|c*L)rR z!i7%Y23!sku_AV^ymLl3C*Ko^Bo`HK2@F9>xuAAi(1|tG<&okoWX)z|+HIu4nvg-< z_~oJHkzudNWG>$Xt!JZ3GDT45#PNnj1eLkchFg~fj*Fq!B3CIaOLu`!k#9+5j&x#P z7FX$P^7K%hz?aTf3hxv-eli7pwKO7Hl-4D_Vg8wHB|hiznQZk!t@&Eh`106SvT_sV z5D<-e;?i*;^@=;^p5*9DXk12fu)ivQ_;|JbCJjB-v8vXbun`#1s*6HT5&8btgQ3wf zyP1t`BTd1g9k-t6+|}lpJrOQzeA%>VR;%=qwW2RhFHCap0M+|q)UR>f&0;m}I0>?X zCNITZ-NmSr5LT)D_ny7xwdvwcv|FqrLRR!UeP)UFnu$8<583`h#)%4*LYfQR5HV<7Ts z=VG$JTW_*@;SPYAJ2PUweeouuT-k-l|hy+G_X$^S{l)&&gd)T|D7s?rO{c)!qT(j>Eim<+fUtBHq~4 zJoT_2RtD`itbLwrYM;_kSnu>Xm4$M;cXU=1K*01XkU5xB?^)bS{8KNzf-K*Oo2bW! zH(xZzMW#=;WSRrHd4R{K59d`HFKKZrm9?2lSb{x(ZKm<=<8oW^`cR&Uue)i^I8{?7 zD8{GWIphbvpNV)1#9P}Ckq!p4UxX6I{ zVIr^y_ieXB30*3o5qlvzJRjm1hRl)^?!>Pf4x5p`m%jd-@jYRquaI zYRbu({g5bzb0K*MFRd+cHN}%5kgGnYxdD}oKH4xG)kxT)2o;$pMJ;|c%Lxm^_B)lg z#l$#NtA(azPm9`Pb*BzyK5Q$uuCbjgRy+6SuW!P|jSoqzX39Q*7s-kic5?uJb^^D> z@)?h&$c&NS4Eeu79tgOGRPt)M9!Lx~@Hy%%K<+V)W&17OXL69xXEqRWytLsOMt0R( z^hDP-g`bg}prfp}e@^Nr_`Gc@D5$Fw-wyf$nw&+aX5OHT4(GfAz+xJ7I)Sk52b<@f zd$z@>d4$^n2O2Lm#hr9WSn#gqc^O5Q!c5DpQ%%Lqb?mK)^N_MCPkNH*GWy%u!}O47 zY|`aL|-_W#5ehTf~gY%a2>}m|rq3`kt9BaB z_FfUEP=0XZ!_7EFu}>hYKSI>v|71C5eYi9gu6z%Y^Qk!VaY8U7XEIy>m2~aJ~YxGs(K2GdXHm8i;e}0 z=ZOqGz$Hp9b2zh<(dnxIEp_)>7kqm*_DQN3_ReuZ)Kmxfq+gQ5ox9qdmH2?hDJBg~ zB~^!p6kF0C?D8)(7+%ANCmwuJ&X|fg?`xNQ9K2KEQvh}tp|kCyi-9^O46Z;k&Wvc_ zxWhW%al+*OfH|FGiovA4q&yO{Jz z{-IPA=GBspJ9|v$3abdS>=Kr4!{#F#sW`_<`zj(gHYBRbt1G^qI(b`guv5k-2XSQ0x`lsV#dD)s4!tih=P*Z#)k@g+RH0S( zwiq6m5BZmwV9}b;|JFrs9n%ki7`gH?ew#M>+$@0`KtMnEmAnneVsYRLCHi^GR2ZZtIngEvV2h;fYM80u6ulBjz)%*V*AWKr@x{tIMfW zDSF7lq@<7+lym={{PSt}%_s^!+4Fk{z$XY>(&_JDPdr6Hfc_vHih3EeH}*X0%(rVD zyb_1q7Y<7%K@+o!jJ!W@9e+h(f#Xh zBu!9i8-gB93O~3QXDweK&~seeVW!h}PkZ3b%pz&pL8(7|91`#B!1#2@%#=?oK3r%5 z;DDC}d9@G!1B&_0Z4I7Ff^nK+tvkY)1hgQxVC52JqyvYp9bPQaakpU!yc9G1!Sn{z zCSs-BFf^F!INl1huYgSfo~hH6oS}qhUJ?5U52A(wNBl^ezBBGZ^ci1^Y9FSd8xGKDbE$<3S!q&c;N7yv4T z8_qIH_bBaQ;RGtYzlz5HoA1ia*Uw9?9(zBvVYDFZyZGiQwyzF$tgj25dlnn+oqIyj zlsa03)h1e18LXolOa*KN77fzr7Xh)p;>c4j(1==b+coUtW#wV}RqcDno|F=G6GYBQ*A+u=n{Vrmq1%~7*xeoC@mcjSy|0!C zQ}Ms;+JMF`WXYXWusLFMZwF&P)p-t2JQLOBu}*6!rncC6!8_QMI>__1;;t-Jen-o~ z_JTE08py}lH5zA~A^ulHNX@}^TLbFbxUyOr;gnHR4`<3>hpcN@Q77Xa5SYdrCFS)Y z%JcNK;fKX~FnvZmbS}IDpqX6m`BOwnxHEGlP(PPQKd}IcuT+(5#T9DQ9T+QuUjg_8dVq#ootKi?sgq?7 zvQbDn$Xj2j;mt0g4h<6vuW6a*_SjqECM?{?9b73oDR6fCQY^djg&a*Z*sU*g`Lw8g z|AFdOtlwIj4ET}8SCr8m2zNQ&XArZ|>eJw1WI@LRhPG2R-<`q%2g%?qr2Za@PWHHH z6@xlBKf;NC2Gfb0owqS6ZdiFPpR@Z`iYL;hxzXa680e#$ygUJG)26qp ztU=@)$19lk3c(&&<9n6VSyg^4*z4BQXf;ow#B0K`xrUKe&STT6;fdae=pIFo8qO?- zU2Sqf8ttGCX_omW`>>$QxuHz2>kVe3?W1#x6E{JylTKLAhu2pqTz+rr%Y$9?8cm(} z#Op?`UjmaZ^ZUz0+G+LQ){>^ArK0!3q#{Y3mqih|_p2xRa+&xSk(y&5p{eJEWP>^% zb17?v)Goan9G7tYbjImVjANHSIopJP2~C+SEg~pL`XmZrm<7d zq`7-=-j;KGMrdTNXYk{EHqoGtGg#|4b{6bvRB~&#SiCKn^P9V{0brJP#9O|iS%3gcn^-^w4q_Ctj4ov=XLGQa=nOZ$$YF4Jg&Z?GO|KO{5p^Z>T6!2Ge9^x)kjc_H z4#(@W#nK6|Lm4qTDa-~-J}3eve8P2p=lbCO$o^Vq`}MDHd-wA-dOsx}oxM-4cg0-O z;{SGo-oU@84(0|iaIKJ*g;pL1%hE9xTIVXtzC$)$&j3|e>24aQ`omIsMxB4N0Qx^H z9GPip4-@mDldkcS#B3{fcbHq~e2Cj_uWV3B3o@ee#IpX|Der5^{hkr-S5rzGAH>U4 zt=N@z`J!zn!=POI(Y$T!@!!L6Z~mT>^3Pdsf9`kH4MeGY0S%&-#&CyT^RzBh+0kEp zDbkCFWX`1*hPHPeRdT&=KXaN6@6EXbXtS;>bnA#a$x$hgJZ;TgN8+tduT>1g$edkX zh7oT5(?4phv=2INLDE=G4zA3A1cG$E>3XFdoWIg*)<8p5tepB_7FkG8S#oDOME<;T zfsaPEc$%_z&AioX_|)=fLrYbIIgGY+XDmLCSRbaQMgQEC)_-vuG5-N0DzoTk;9d&4 z8{-QG%%YA~-Q8_uw@?KKP?7Ugn>0$2qw3>o(F88}$ndsm9j*O)oXV1TH`&hTbn5z4XnDF749UQkW2DRD44A4Q{TnZz08}GcX%n=#=kxCb}EZ zy?qn;u;9IOT!?8dF8#BKC{eF}Cd;XSgqIpGGL0BwtWTfwjv)@Bf6GSy5oz&YNdr(d& zL8nCmr3@kosUT@~=EP&K$4JUbgUPHzrO(1_Wt<$_COrs7M@I+lS;v9utBb(O=Ik~Z z-4NAr2lemf_n&%e_b;km;Y5>WOm%Fzyg5J=$A|Fr3}_Q{4|eH!|aJ! zrsdaOWNE!UjYfCrT$591QdCTJDP5@uBR_BI!U=;=sM0k9<16nQnp zDXI9t6{X@$E_&}Esv@ahZzKNO7Yu&$1%+&BL}kVb`Sc!wVA$<9k`zIJ(^|+ao~WV0 zxE&HR|5@^^jr_(YZU3}bsS@I&7DPKpL3ek{vNFOFpI=Pf+|>ZBdF3jz0vcV)H*m6D zUHe`}P<1^^n{k0lOq_G(98Yl?f&fk8!#2q9S$=fC3(C9#jF}OBEKdGSR|t)|t8;OV zig~(;zTUfBW=WSR?{Pr8+d7j26y?-M%n< z_J6SY{!nxF*SYTT1#mNEpGWk!dERFTi&4ZgxCVE)Des@j?=OeYiA^avFDg7Cg(b4n zfIA~PB`M!y~E-QMFHZ3t5uK0(LY zNF6s%ra;ak1a;OLAQKHqB^fJx8T$UD6c{1?hYFi!AT-4>PlFDZk$aTsGb$B&&_we> zGqPzid5vPl`^+a7Io7_QrE_K11?l$%pjgL6irKG%Wy#3nQ#mv0{3h^iU@SrWU?joM zzy_-7!cdx;%nC2B1xrMeag@bGX-Nm85|dzeP;h$wQ1bd0 z`ubnak(aySpjHIC(5v1Bd!#a2E$ObrRgnL(l@kbhC66)l?2BW-|o!#?Qkv24h5$(OS-%YeY zn>Nwr@zqBvYCT-U3Cfql6r-~_ax>5&N5&v1W%QSCG-y+q=bqng20p7+PTvkDxqZ5@ zOgn~1jJPpnt}`Kx6+Oc9?0e1Jb+Dx_6qUS2FQGu1K7KfaeofZcgu+{O2ha~VTEWnm zDll#$8nb&Ki;-X!{>|pR50tm1W~cl{2XEU-p@=i#yGfw?N2vclY78K7pz!g>PsM4X z85a3lA5GP$5d0^bfW|H{8}taOVy>Oc&C$-MrTY-AvAFnA*2VxkCQt(LV&iJipu*x{ zfG&-2(NOfRnMGkH(^vH#YzCRWAc(fy{DuQ(qLx+~7})9}k~&46cTKf_bE-D2U-9uw zAR6SWyWZrlVIA&?#Z4S^93uT~H4gzhTT{@+=hnr||8TWd9Tvww_mS+W9W(sG7a}C6 zzd&BcNvec3{pp257-DGzS;%;3ty*a2_`f`ZU zQnb!FZ%f(_v8>>W!TBS%_~$-K&;c`gUA``+PT?xr*@~S95~B*}oV|GLn*ngFX=?ej zu*Z|>T~Qqk5G-Is8&ye488v*gt=`S$o2)M&;vABEFQ{Wqc%MrsRWY8{07w)ns*rd~ z0ozpBKSSXD(PJWsH`26T>zAC{Px7MHAE+mxM=8(bCgOb_RHmNcflUJyFxmboQ znlRRFjJC^X1#XWl3gd23^(f|*hfK9=OpTF!CMjJi3I%>@6)*av&i@@l>pXV#b-fTT1eV#87rDR?n0V7?aM|nG3L{#cU_n@)z_xu`b<2}8 z+@9Ml!wT~z)b81@rfEDebl-1})=^(kVI3%H%MY42p>I4|+IeX^uvNWMlpyWrfmhK< z7YuufV)8)!rLwx9+7dfP5PDO3 zM|uhEk9Y3ex!>IRXVg3M{pB+}JK3DG&)Ivud+oK}we~q@!)KoXm*u2nr2tr1Sb#9h zAK+~2@;b!T+ynqnPynz2004Xd4weYu9Hw>=^N*arIRFP!$HLUr;uF690w@50%iYT~ z06fewOdEhWrT}P`F<ee(K*d4doSosmjgz@C0Dv2Y z{R^8Q#ju9-Fr44&6%paT)Umm50RRyZf2;qES>V6=ir!CV@jp?Ae#-n4fu9KcMBpa^ zKN0vF0zBM2+``-f!aO_-;9J5xw}km_0shf306+!c0MG}R1K0r!fcpR&fD@*UVI!D) z#|1hwM@L&>E-q^aPJJUA17l7@8!IkXeOoRtCpQ;BRKnF(-w$Y5Y>YHlsY@};_- zg~8lNj76Pafm^{=(%8&g#@*gn+5Lfvp*!4A$cRNk{1Tz4tFWt;t(CE(K7*^3rL}{w zs~F=icNfOgzaHjd6c@EOG7(m~C-rL!%zt8xzmCPl#f8&_m(#}HlnX2*B*ewd!^OkH zfoZ|v;AZWp@5*8A!1U`4?io86+MC-tn%h`2e7&K*fsK=+7^9PuxskAmz6rm9kr6+K zfj+Mh2N-Om&!KO`3+6E5@9!k+sKg1*wWa_*xJzn zg9ez3k?Svm{&)3>f#|nR{~`Ps88K~yCGCy%9gXi{iul**<=__J;NemE>y&eH-vWzr z{argzuCHMHZlM3(k$)3f;-W@|!iHbRZe#zK?v;)0{@iR^nv4IAPX9$n%(1^zFn7nO z9svP?DA(^h{v#5<9KmQF=DOeX6muoRf5YczC*K9~Pq==<^<4;jSL2`B^%Jh|Lg2d^ z|J1Jk$Kd)K*EP1r5L_1wlRX;)NCNP1anIx8;GM@kkB^6kPe^hB!!Zbf#FsCUPyneY zDS#B@)O0M@sA+G|l2b5pGTval31SCPF>vv6vGK65f!My@1Pc!zpOAo%^uh&FHW~^V zw(tI&y#tWopA$XTijBnxI7fnoO@ejS2%yL8MdD(1FTZYJ{`$c>hmC`K9uJ>@@B*em z*=4{vENtv^IM}$jIGC+VEHBJ?01gT6m22QT=dUX2<1yNi@;nJo!DqT#^d6|xyK$Y@ zz}}aDkc^yylIjLC3+qibKK@$*fo>{%PA(FRT<36buyOFd%7t~# z1yis|aB#1I&tJKth^KFNm67KOKIz@?l%n?pOuR}PKm+?;LbB_8Q#UrhiuQ|S-zJ#v z4@vf$V1JWq6mSt63p07xBmfBjS_;9MeC|({^Y^C(bvvvG)1#a1T6EM2yf&#&avQVg zeDxX4t-j&Ihr=E8R}$$Tx8NAf&^Uh4Elh2X)kO&r;&4Pji;zqkpPunsNx!?=WjMh|k`;BXXfxv}WfTeXe+N@I(TSyYCy^Q#1yf zhn)dP4NZ*bR$}WVf*eJ>*iee_=7q}U1madgAJ!V#=J}$T!?Kb zLLZ>CMhz%D3snGk1&@3_5EN*Sn3K!JaM&4gpxdmBJsIpHbxH%;4_hXhrR&Q8?hoQ} zv@G^WCYMkI1+(;^$I|_m-SCEo$n|H|v{Z`g>iR6xP+T^H&A0s45}i!Eo*+oEDMhD3 za2>^o`y`4SNvy51-kL^bxGq#|k6#_)`jp`|;s3bIj12$t#<-eryvwxD8nU$6Hg3!N zp$kn27y;OU9^Bty%F|g&T^pSrK#XJAT$7QZJWR44_`J_oWfSD6pd}m8zI=D1(FAm( zCOjs{hss*9)@z6 z6RU8cX-_>?H+1iLKirEWV&Xxzza=~cbVEws*TzLx&tO|^4X3^uF1w|x5`^Zf*#v`)q4 z%4JZD50PuA?hyNYzhkyzD?^`2k*Q{HTe$D%$64!}r~*w?4S&9cz$l;AU;;2WGu)6P zPxyYJbs$?7z)|O%jM(OFTF^nq?T~p7m;Q&cHijoDtizw!DY}nmc2?F@zlbxqT7Q^J zuB>}07oHxHW6pbM)^IJ(A|VYg=N)k2=~UJW_a?5niqWX*YDHPN`h{-GVJgz_iUV`57JSh z*w>p`%1aP8ZwocB0o=Z{77zkhQxX4H6~FcFv4>?5gQt_AIJfh3H^YSA>@aLt$jrzX z$V@|Idhh0Dzk4VQxY_>@3qty{{KVj&>3}J%86Y)Q7f(&`ppP<@*67X|;2de8zlk^^ zw8#I{J?muWrsra)?A22=sRQR!`w?+!>~+cBmnWBmU-|w?C;4Zc>_5Jqt!zHcRG;ps zBVdYtO@L)%Mfs)_+m#Z%UMq(IB5> z6QGs;nncp(^6K3PA7z~c;JixLW|{azEuR>bn;H?l!D&@oq-%>#oKP*R7zwG5TpDvp zPP@a6lQy&*L@~8u;RR9xVRbw*Lc>qt$t^z{c*)E8ll!vaZdktpd+2^b+_L#gHBKdu z5@U*jubzIHb27NZ9-<%%aAuVa?30WCN@RQ=a2$ms(kk5_~def$>E zQ;{%UA*C!X#LeU2;Tq#Y$Qghuqa{r$3#}Ga-l%X_^g^834Z38YM0X`2C^u0*Fl(dV z$LkEh8|&sx=7EAa)id>4oidJyi=)ih^{O=l4%C{G0{J{TajHp*PASh{asvVvm3oS7 z57xM14rzcqqU?3l$d%50!TAv52~xz&fHog`liK5`TI&4hv`p*i3e`mM=3s%v?bi`= zBK{N-uaH`ymldO<42GuNREw#42je4;i4{{TFTesu6$70!>3VR^HCihsWl3Z>z7)F=?%gV9CnuKYRe&!NW3vd$ce5Ma(LMF=V2|4h+`i?gG4 z9xMAKNtk`yLC3b2SvVT@D2`C|r74wG-png;F5$k*XE`*)rOCEA<3tP#zXhqG>l`#(To_wl3 z91`N&8kf;?qmmMnr|EZ8?^0_VEc~LTkOYywB;{Ksm`7{(*7B-G@C-n;Q+uI#ls@2Y zLDV$+!l{OgbN#IQP*;C))T2A^X?#I)!-T5*AwdYC=N9hrDqi{!S1!LgRyY-BE7z-K zvb5mhqy*^_pJ4YP|6_QXYY2ExBrMW!-dEC(enyYq@`jdhnbY{*qTx-oUJ>z@I}lR! zYr`1%0slSa4#UMnTmVp7Wstoz(1Bw$#i#0Nv`uoMPoubiZ->*{=zH5C7n45J&**u% zxY>ExqIBi@L|hDEVxiqRA8S~T`i{vD6HoD2an)q6cM1+h_Z2ySiB?u<&#ZZy zoHGF7yezvc%k^F62*kni0ixoC!$8!?ZolnLwsdrCF(GT}n_HN#$KO;k^2Sf%?uS2V zkZ7$Bg`xPJD|<+E4r9#EB{EV7K&UwU)2Z@Up7H^HX76pf@F}i9a(C|z=w)mdt6o$3m8&Z! zQ|O_>rP#8NBC~Q~rw`DpM7eJCilcQ}1sv3{{?ZFYc9}Y*_lZFR+QW!#I|_W2{gkw0 zV&zM(x|6DvRS%M&y*lboy_qx)f|Uk03NllIEDg8XPM1!jMu(m&#V>V4o1qDv^wmT^ zx?rYW&E&~{lq&$`RR^p}18-GSdt&XYSJU577HTH3k1U4Vy%PV@pm4-sewkXtIdL8m z!bi#8Tr<;{2#!=YZM%|O2l1;d(; zo=$!E9Y?^GHnMGP(xCArmfJZ4;2x_)jCH)(q^2~OQd-I5I-0E4+Cqca`t`L$OQibY z-9eSfJ$a83L9ecu(HaFetG=8iQBr^G$~2Ap$&XnZo;pMYPm1NSH{6G9Ae~T7A_`i~ zBGm-ZW=52_P`SezDJsVsg%dql4F)LzA#rv5r<@AN_h zR0cTt;R}^jmeCSydy0!}QJzG`uX`vtHI{G>4olXHSKm^|Y|CrbH+;lky4j=gz46FR zb2gC|&udE~%I_)JC6;BdDHSO3O;tM|buRY@Q`zT;MbwWwxbOBP<&Y{zJ+q`+$T@L6 z2Ju9VB#eyDy_!u@mW9<%ac_Q@nz~C3x$cb}*eDMSNO1eYJdDA#&*2YV!cp(a=JWmM zPPh?QjLQs)pjrdN6hm+xxGMvQbbKTD)=4|PXhkRDjw?cxEg2`gUbA#Q@&dR_9I1YKaeQZ7#P1=C@W6=;$2heBKL{%(Pu`+dDC3EQ>V zhj2N~jmr>x&g?{Dly0{}QBhDTUB|e{aak_@Wm=VZUTL*2x=$od6DO=aCF;pCo5jW_ zzeyo0qD{wI*$#~d!%{3K@^ohaNxkWOTFINOObj8r+Y~{f+0dx9kLa3PjDi6U9Oyud z+XeUAqsxN~`gWH}r`2Q`gCt%Sft^n}`ySD~7MDE(TvvCuCWD#{32!23%5lqTDBZH~Tmp)9w>8zFmO<2tnws-1 zffz+SEm_;*76K7D@ec({Ps6+fvosHsoy+KoXqRiCE;pvLUf#LZdz&U{&r4*$1ByK{ zENik?^-}W%*R>>1mW2?vnx2@8w~BY#diow;+o%ssA9RwPsyU*14=jzjQi@xyZT09@ zTjJ~$*PTM&GeDEDcU-olxibq)G&g;k(V)W-HxDGe5uKJf+2kOe1_zC3$y%9Q8P1wy zn1{yAFA7pyBn61m>%e2jhnn{~2@)xJ`IBC=GjpFG9xrdtq7*fqd@PJL-33lY+%wnd zu;VChE{ZssF+jE?gNDZ42lcXM#=uiYV`~=+I^X78EsqVJYpJ{a%0LLmvoWMbCEQh5 z+lB(RVhdxH4U2<22Qm_9s_LS*h?)d7{le39atXW?`Lh z-3Mf|T3UI$x_mhIjvn*@tH5o|H~U;86;Km=Yietd>D=_dOQlZ;n9A|6law{X4y2A3 z1*n*oAKrTPt~Q5kR)PA^7s5XF_RXa+GxSH?n9cQ2+VaJTY&N*ID_j1ew1&ydC0KFL&e$$DWLG?Nfv^vGqF<{_R!j)n86Lh-mBnMN!m)K=$n z(Yv1H9$=)$j1_bH_UbcTs89vmIArqN+^hdP4&4iM?o)gxM^IjwV z@`}F33zW8LQo-lN9?*Jp?_ItjYO*HdRduvOv|$9){m^rpzO}AEVR`=yz_e7XLo9-B zf~1ZeH}^t-n=OMsWNR@$yR`4TR!x`hl|e-M;2ZhSS?Nr8uuVX zLV!oRm~oIY6@NS?OrKM-wx3wWbfk&(FH{sydtW$OI zbfA8iv>l9PsM3>dQ+AL~GE=wCI{z`D?A*-Sq2qC0OQ-F=-k=2USO=KQyBWq*pNarA zihcZ(4CY*6h8JC2mVyiT&_HLVe~&)Fbz*HiW~g|)+kOjds2kJBq0T`Rch&J^_SFfl zZP~`*I@jpX9(YvZ^@`R65MbvCh(FcU2rOIZE-!dtl<8e>H4C8p^{JS%ChjweI=UL6iLZzs+gb|{4#!aj$;I^Vvq&|ehu5xF{}BX<#qpe z=Ss0+kmHBKKG9j1z%6u73+#6@3dax8k!w zs3P-hv8cV$V){WY6gP2Wc z_qxk5yt#A9O3|HeQeE2}+K&l95SviP?~Ts>+8)&IX`by>-Kdls-(rmeKkixfk=t20 ziYi4dB!u9YJ%Bge2+w>y=dC$hgpNJ*zJ~00z^AV3J+ov_D3HU3%uI4^XHBgWME`II zlwFubHduUr_>eS=VA+dJt9cXH-O5z1yx%>KCfqHyq#v^DU#vyCES8x@#v6)fJjv0t zfpe*+3e6jIYKF)PUEfbnMVt&Dv;I}0ufI1#N$qF4f?>1*oOaOE#}q|U1(k*mCsW)q zz)~G$;WFK}5Mcznzccf|%#)S}!b)sxrUtnSh39!6q&~q!NPi5r-C^$wV6B7Jj&0O) z6$*LEFQKRzS%Z#g@Vo+l>Uh%}#a7+$C7vK1+ zNtTCQ_xgJc_}0uXR+sDY_Ua_9*^`C`@5R$<-JG10NUS@S$p({;i!!Ih+AtZn>6Pu{ z8@TIONzPV823B(J9q;{E4@*wYyOvqEB`iMTnae;+!%hGTazBqcg z+J39Rdl7xVP(5|X1k;IIR`&OH1=LoIX+v=uozC9(kt@;kz7f;DF|fJ&JZ76BAKnr1 zGGl~S3p}4kv2V7yj7cAM5TkzB?`6Z)B}`E;yi3=Hh2HN=EDR!t_@}9pcn%-YI|FAu z5}`{7v^dp!FB|alJ^z|hZSPy+4lls6Mkx8g4qPv1ISd5RwDk9AD6j)Al6Pe~p3s|Ut%Jx3?bE1mK$ zYGR03v)H?TLBz1hFk3R0E%vD_BkM;wl09D7Q3F*KGU}{fWA)Dz;`1Xf=7`>8m$sKl zd5QMr0dWLHU?#OL#+TItPvH z-1-U|kA%#R!1nbTYWBd3Tukw77bA7<7gJ`sQRfvgR}~nDys#9l)`-2k<~KI1E^!i` z?bDN=6y=@rfp2kYqqsmxR?ens#OHnGp+C7wK|qsKuwXd_XO9bJL%E5K?aQ7NhFpmk zd#m^uS24e=R3f_(Dn&sP6` zA%V=Q5nxG}^5XCDFl(7jR`2>WLgjyqZaY-(odl{u36^p{=W$f|5?gHZ687gxiAF$s z6AQJa3WPcmIo#8DLYI6REhy^0&DyWCti8AWskK+8my^p5&XAhR-PaL(Z3H4D$f8k=R8^LLcd%) z1GGGM1O`s;e46rjzgT%3Y;UN#QAQql9FkA{mN{-4_IlP46Yv;#M|`hQ|97+puA(nD zKQ%rB=tS2TU5r;RGnbuP+zVVH^tlFuHstM>(MO2G1L)U3*Nj+Wpf47`_j{iAEp-rY zqxrn6^|24?yhMh1zd+bYYP%U!_A+uF#MZlz#kzlgFsPDEN;g(i9HE4Noen=8+1~Tx z?&tTj@SSx62I@j4x_rD9y$Q=|4g=_VGJS??^J{`6mA3&-=H!P%WkW-@t6rlKJ15YW z(**Db;`PZG?y+my@!bp$FC0ROH9gu$Z!(Cy)_m^%J1<_9yp+u{#ztop8;AAg0`}5fnthb^B4v7Xz}|IHMLD5SraO) z#x4rNIDr+N8}9YN>|LMyGTC@OhBF+4+CiTk+Vy(K=8oGWJ|5*qBgILi4|OqC0eN}v z?sv}mxj)tZ=Iu7@!N+1OQ@cr#DQ&jWysfrg{`8$xD{OxL=6ke6eV-^mSJu(tk>~;R zi3qOr@Kg%4SVEv#b#M4ZPc_EvR(0P_{l1;ruRB=hL;}>NCuQP|groSUW8QX|dhu89 zzGy#{s;#T(XsPlUae8l`MqvNC=ScD?+Mv!RfYqTlSalEfxhJ(1zBL%dvKKQQ^3|3i z#2f#(h)eht%pFIfZ!GAl`uMsfHG@8BgyrNQm?@>E_iNzoXAoSJSb$E(izQ-wzs8$T>aLdM zZz)mgv|@(eZZm!cSPxM!9&+0F0`1U&RV*Eqo|a9-d_*@hUtaR^B54Xmu5zQ2^b+&G zU_d%yKc4*~R^RrJC(|*)1~4Y|4U}~zhq8< zJqP|Waf(&Jr%ji3?vg`7)Go{kHPe_@yx%YMMf4>|KhWuA%-~lN&7Ws4RgfZP?)C~j zryH1A@u`hreJnJ4MXjusYDMi?$EA+Z&Z1KIW&Cz~_t_BMHSJP0p}M<>k=2F{Ou`2b<)YqV9ow}ci3J? zO@vG(Vr)Nn-GjL2P(4?DXycTuH=0q`5tg&%w#U2`jEho7W8!0&is!pOp=ekw2S?IJ z3ckyi3yq$>08c;<)Z>RLyRPWm-e|9mfUv#WKVM^`g@VZrWR`g&MwFLDs17=h2-*}4 zm`clc#T%HK98iuos>VKIz)|l^lKK&=`Q6u)MpaL7Ff5Pu*--k?CrTbahfBT8Q&E|z z<|$C)Gp~vmq3wC<7@28?*eYbMI2;>x27Y{qd9j5X?+(N};ck$bQZVakXxGYMltpn4 z$JgfqiheZ1@=Ls~4w?xjUPJqgSq<@y1%*kxfaj(&@PtX-Sq3Ft_4i|r%n;9#qN{nc zt17c2%7*x~Y3=9er5PlD8VsJGFV8*(ovY_7kaA=bc9@_~Q!JqBACR&9fJx#i-2GEF z{$IZsg2(n3B?PXBllJ)~W2nPN5Z5*lMinKgdP zquL@l91BXtQezra`-HQe@Ts11?S#9%5`B#Hg4kb>2Z+Pf6|TLFz5bz^@@?1#u@6hj z5#WrgV)U8w+nE^A#CDNLRT3&$Ad^yO3xH_8 zR{cXFy)HHM6YKe9{TPV}x~QeO3Q9we<}t{jyVEX@KyGE)gKCfL;nE;jYb@wG{K=Ou zYM6-lUXY4XkhAncUu2}c$D33H+uw57-#_pQ$09?^sGSE<_d{e0*|i?N5Dj4Z z(3#VGRg9RDWphO@+Tm$PS<})f0Ac1NR8|gh%BeXu810C9VPty-xXH$I2RlN75HQs5 zDJdSA#y@u&yToqsd`4E1Sz`ukvs*k$T4l3DV_9i<%skjh7zQy(>&B-{WR7%^e0*!V zjI%}rcpA=`;N#`yKw5-9lI}DS=1_4tZT>_oH#qA_=@C4|>BhwFK+D*b&5T6IjdYDp zL&1mOvMNG=KXemrp_LeCy|<;+!MA-!lo8^j9xPz*JR6b#*3~u8c2$O!_NUdo$l__S z<9K&*hB{rhU@Q}4{g{wg%q;b>=t8p|#Sv*iwni3OB);{rSHsFqp?GbdOrKe-kcu#O z!%Z3zJ)J zWhZ9XhPvNHZ6}eP!_A|YOj$>&V;}!&+yb9{Kz-;_nK}ay_4E{J^(QnIbruGPME<$Ne=&3z{)JH0QWgvy$8am(h)frS;(g1zw?&2i|`Dx9$P z!}=ign0k5T?ahk!=WD7Py(ubsO?jj)MhAJT(>JWbr?z!+rpEFh9!W9B*lRqUz3O`H zdHeMe2h3lco4CjOKj~<({>$~iL09z;3OT})7KCd3G`VyalTIua8>VtGkHR|+W>N$} zM8yP@(mt9#_hd!q2fDasaFA|@wC(I6&3dWn#q(UHq#`?Z&!iQ3^&Zhm$u~pxs z#{Wv~Rh2q{cdl;$z9^53=)lI=$-?=H1#YL-(l;a$`O96f^LPs{O!gu45>s%eV6S`2 zqlPG6?!IUu9(_e18kx#d1O8kv#^v;UCik|-QTc(ob8dFZL)?el0NR^o$l4E&mh`W+ zDY)0=A6Tk!7P5Kn(sy8T?7(4rj5hsof9BtKjFRe;KYLjPG&Y3=doD_vX+_T}|G@qT z5_Y&5?JvQe*PgV2e3aQnB~LBQ<}XFT*1&++0*|QDz-l)->JJkOFYpyS-**YM*%IYM z?9jTrQ&gz(VhBjed3J)@*(s!2Z=Y{A1delE4e10;7r9a$$ZDmc>Lpj-RE!E{PsRPSEOF)FBwDvj$>vjfJnFH}3z2diAW*%8gQ`uYKPC|^RjTzM%M!Q=}2^+%81 zeqkc2hia`jLb0_p*5V#Eb`}dnFH^seo7f7sH0&$%EpC0$tGl1zgRSG#mCzKQDUz+n zLi@^~wk~gCSnHFNVfB+6$|qAWiNIE~f&J_{^#i&+a?w2KM1Ez=v}-VM(LJeQ%&i!0 z)J%3d8;@Mj@k^VUfXW;0#i_GO%%xuHI1Gu}rid@U>NjYIsv4k|_3z{!4!*|WAx0U4 zq0)&i-^QkrO*5>_6R!?9iSlT_GL+crvw7*Qs{oe^mg}3#BKs7cAlJVn9h+7RW}EPf zam$foDlT_}+T&qwVv^Si8@R!5=}x68ebz(B&j3hWvD$pHaQwAYjWB0L1o|T99X0nM z&%EOC4LhMVR&$8%{-WKeM8o3D7$=hJ)r4nuH{P_zkiM0SG?8V{nFr5btPiki8H`EWBS4BDo_tHMjyT+2%j)>>Fosb@jctw?%&d`$A{l?<1p)E{X*V# zoJXf|<2j;(`Wmg|MfFM*<uYL5FpaT!WpeV_P zY@~L@hW87Hh`~F=`IhQC=Zn|HX_vl4+SrMjehib|Lh@uv28K46Ze!j{>dQ*hH{R6s z3>8eR)~ut^$vJ7-u@s{OpID?P5$|FWo@yT@j){RS>_}?brIKQyL?Ue!X%NKmiYrJv z^i+4?3@~BY&+4~lj3Xm_0g@dGs{bP+>c@;#nD#igPxK5HcntNQw2GU~P znu#PVP6M_rVyU$Q{l;*(nB3XsYYk8zV)+vJ2y}@*C8q}Z(U7| zh8~p2qhMQtY@0ab8=pTFCe6F4COvhEE9SM-hiN|e)K>+f_gCbkepqEHHVA%lIAjHJ z@3+5ygTtt@7RvV436YZy<4TFspE}`v&@y6mnfsK7=Tm~Y;}Qh8dN=-Np$3SlEkU;5 U6ZPL~fBw_\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# CPU Dispatch Control and ISA Analysis Exercise\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## prerequisites\n", + "****\n", + "### Step 1: Prepare the build/run environment\n", + "oneDNN has four different configurations inside the Intel oneAPI toolkits. Each configuration is in a different folder under the oneDNN installation path, and each configuration supports a different compiler or threading library \n", + "\n", + "Set the installation path of your oneAPI toolkit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# default path: /opt/intel/oneapi\n", + "%env ONEAPI_INSTALL=/opt/intel/oneapi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "!printf '%s\\n' $ONEAPI_INSTALL/dnnl/latest/cpu_*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see, there are four different folders under the oneDNN installation path, and each of those configurations supports different features. This tutorial will use the cpu_gomp configuration to do ISA analysis on CPU." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a lab folder for this exercise." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!mkdir -p lab" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install required python packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -r requirements.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get current platform information for this exercise." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from profiling.profile_utils import PlatformUtils\n", + "plat_utils = PlatformUtils()\n", + "plat_utils.dump_platform_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2: Preparing the samples code\n", + "\n", + "This exercise uses the cnn_inference_f32.cpp and cnn_inference_int8.cpp examples from the oneDNN installation path.\n", + "\n", + "The section below will copy the cnn_inference_f32.cpp and cnn_inference_int8.cpp files into lab folder. \n", + "This section also copies the required header files and CMake file into the lab folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!cp $ONEAPI_INSTALL/dnnl/latest/cpu_gomp/examples/cnn_inference_f32.cpp lab/\n", + "!cp $ONEAPI_INSTALL/dnnl/latest/cpu_gomp/examples/cnn_inference_int8.cpp lab/\n", + "!cp $ONEAPI_INSTALL/dnnl/latest/cpu_gomp/examples/cpu_cnn_training_bf16.cpp lab/\n", + "!cp $ONEAPI_INSTALL/dnnl/latest/cpu_gomp/examples/example_utils.hpp lab/\n", + "!cp $ONEAPI_INSTALL/dnnl/latest/cpu_gomp/examples/example_utils.h lab/\n", + "!cp $ONEAPI_INSTALL/dnnl/latest/cpu_gomp/examples/CMakeLists.txt lab/" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 3: Build and Run with GNU Compiler and OpenMP \n", + "One of the oneDNN configurations supports the GNU Compiler.\n", + "The following section shows you how to build with the GNU Compiler and run on CPU." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Script - build.sh\n", + "The script **build.sh** encapsulates the compiler **g++** command and flags that will generate the exectuable.\n", + "In order to use GNU compiler and related OMP runtime, some definitions must be passed as cmake arguments.\n", + "Here are related cmake arguments for cpu_gomp configuration: \n", + "\n", + " -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DDNNL_CPU_RUNTIME=OMP -DDNNL_GPU_RUNTIME=NONE" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile build.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_gomp --force> /dev/null 2>&1\n", + "export EXAMPLE_ROOT=./lab/\n", + "mkdir cpu_gomp\n", + "cd cpu_gomp\n", + "cmake .. -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DDNNL_CPU_RUNTIME=OMP -DDNNL_GPU_RUNTIME=NONE\n", + "make\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you achieve an all-clear from your compilation, you execute your program on the DevCloud or a local machine.\n", + "\n", + "#### Script - run.sh\n", + "The script **run.sh** encapsulates the program for submission to the job queue for execution.\n", + "The user can refer to run.sh below to run cnn-inference-f32-cpp on CPU." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile run.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_gomp --force > /dev/null 2>&1\n", + "echo \"########## Executing the run\"\n", + "# enable verbose log\n", + "export DNNL_VERBOSE=0\n", + "./cpu_gomp/out/cnn-inference-f32-cpp\n", + "./cpu_gomp/out/cnn-inference-int8-cpp\n", + "./cpu_gomp/out/cpu-cnn-training-bf16-cpp\n", + "echo \"########## Done with the run\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Submitting **build.sh** and **run.sh** to the job queue\n", + "Now we can submit **build.sh** and **run.sh** to the job queue.\n", + "##### NOTE - it is possible to execute any of the build and run commands in local environments.\n", + "To enable users to run their scripts either on the Intel DevCloud or in local environments, this and subsequent training checks for the existence of the job submission command **qsub**. If the check fails, it is assumed that build/run will be local." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! rm -rf dpcpp;chmod 755 q; chmod 755 build.sh; chmod 755 run.sh;if [ -x \"$(command -v qsub)\" ]; then ./q build.sh; ./q run.sh; else ./build.sh; ./run.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "\n", + "## Run Time CPU Dispatcher Controls\n", + "***\n", + "In this section, we run workloads on the latest Xeon server from DevCloud, and use CPU dispatcher controls to generate JIT kernels among different ISA for comparison.\n", + "Users will understand the usage of different ISA by analyzing oneDNN Verbose logs and JIT Dump files.\n", + "Refer to the [link](https://oneapi-src.github.io/oneDNN/dev_guide_cpu_dispatcher_control.html) for detailed CPU Dispatcher Controls information" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When the feature is enabled at build-time, you can use the DNNL_MAX_CPU_ISA environment variable to limit processor features. oneDNN is able to detect to certain Instruction Set Architecture (ISA) and older instruction sets. It can also be used to enable ISAs with initial support in the library that are otherwise disabled by default.\n", + "\n", + "|Environment variable Value|Description| introduced with microarchitecture |\n", + "|:----|:-----|:-----|\n", + "|SSE41|Intel Streaming SIMD Extensions 4.1 (Intel SSE4.1)| Penryn |\n", + "|AVX|Intel Advanced Vector Extensions (Intel AVX)|Sandy Bridge |\n", + "|AVX2|Intel Advanced Vector Extensions 2 (Intel AVX2)| Haswell |\n", + "|AVX512_CORE|Intel AVX-512 with AVX512BW, AVX512VL, and AVX512DQ extensions| Skylake-X |\n", + "|AVX512_CORE_VNNI|Intel AVX-512 with Intel Deep Learning Boost (Intel DL Boost)| Cascade Lake |\n", + "|AVX512_CORE_BF16|Intel AVX-512 with Intel DL Boost and bfloat16 support| Cooper Lake |\n", + "|ALL|No restrictions on the above ISAs, but excludes the below ISAs with initial support in the library (default)| |\n", + "|AVX512_CORE_AMX|Intel AVX-512 with Intel DL Boost and bfloat16 support and Intel Advanced Matrix Extensions (Intel AMX) with 8-bit integer and bfloat16 support (initial support) | |\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## ISA Comparison\n", + "***" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The section below compares and analyzes different ISA upon JIT Kernel usage and CPU instruction usage.\n", + "\n", + "The table below shows the different comparison by using different oneDNN samples, \n", + "and also brings up the keypoint of the comparison. \n", + "\n", + "|ISA Comparation | oneDNN sample | Description | \n", + "|:----|:-----|:-----|\n", + "|AVX512 vs AVX2 |cnn-inference-f32-cpp| show the usage of zmm instruction and avx512 JIT kernel | \n", + "|AVX512 VNNI vs AVX512 |cnn-inference-int8-cpp| show the usage of VNNI instruction and VNNI JIT kernel|\n", + "|AVX512 BF16 vs AVX512| cnn-training-bf16-cpp| show the usage of BF16 instruction and BF16 JIT kernel| \n", + "\n", + "Those comparisons can be conducted on the same CPU microarchitecture with the help of oneDNN CPU dispatcher control. \n", + "Users can also conduct similiar comparisons for TensorFlow or PyTorch workloads by replacing the oneDNN sample with other workloads. \n", + "By conducting similar comparisons of real workloads, users can understand: \n", + "* Whether the workloads leverage the latest instructions like VNNI on the platform\n", + "* How much performance benefit is gained by using the latest instruction on the same platform\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 1: Pick one of ISA comparisons\n", + "After users pick an ISA comparison, related environment variables will be exported. \n", + " \n", + "The section below will list out all ISA comparison options with index number." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ISA_COMPARISON_LIST=[\"avx512_avx2\",\"avx512-vnni_avx512\",\"avx512-bf16_avx512\"]\n", + "index =0 \n", + "for ISA_C in ISA_COMPARISON_LIST:\n", + " print(\" %d : %s \" %(index, ISA_C))\n", + " index+=1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Please select a comparison option and assign its index to the ISAIndex variable.\n", + ">NOTE: no bf16 support in DevCloud now. Please **IGNORE avx512-bf16_avx512** comparison." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ISAIndex=0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The section below will export related environment variables according to the selected ISA comparison." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ISA_COMPARISON = ISA_COMPARISON_LIST[ISAIndex]\n", + "print(\" Compare between \", ISA_COMPARISON)\n", + "import os\n", + "if ISA_COMPARISON == \"avx512_avx2\":\n", + " # variables for AVX2\n", + " os.environ[\"DNNL_MAX_CPU_ISA_VAL1\"] = \"AVX2\"\n", + " os.environ[\"DNNL_APP_VAL1\"] = \"cnn-inference-f32-cpp\"\n", + " os.environ[\"DNNL_LOG_VAL1\"] = \"log_cpu_f32_avx2.csv\"\n", + " os.environ[\"DNNL_JIT_FD_VAL1\"] = \"jitdump_f32_avx2\"\n", + " # variables for AVX512\n", + " os.environ[\"DNNL_MAX_CPU_ISA_VAL2\"] = \"AVX512_CORE\"\n", + " os.environ[\"DNNL_APP_VAL2\"] = \"cnn-inference-f32-cpp\"\n", + " os.environ[\"DNNL_LOG_VAL2\"] = \"log_cpu_f32_avx512.csv\"\n", + " os.environ[\"DNNL_JIT_FD_VAL2\"] = \"jitdump_f32_avx512\"\n", + " # AVX512 specific register\n", + " os.environ[\"DNNL_ISA_KEYWORD\"] = \"zmm\"\n", + " \n", + "elif ISA_COMPARISON == \"avx512-vnni_avx512\":\n", + " # variables for AVX512\n", + " os.environ[\"DNNL_MAX_CPU_ISA_VAL1\"] = \"AVX512_CORE\"\n", + " os.environ[\"DNNL_APP_VAL1\"] = \"cnn-inference-int8-cpp\"\n", + " os.environ[\"DNNL_LOG_VAL1\"] = \"log_cpu_int8_avx512.csv\"\n", + " os.environ[\"DNNL_JIT_FD_VAL1\"] = \"jitdump_int8_avx512\"\n", + " # variables for AVX512 VNNI\n", + " os.environ[\"DNNL_MAX_CPU_ISA_VAL2\"] = \"AVX512_CORE_VNNI\"\n", + " os.environ[\"DNNL_APP_VAL2\"] = \"cnn-inference-int8-cpp\"\n", + " os.environ[\"DNNL_LOG_VAL2\"] = \"log_cpu_int8_avx512_vnni.csv\"\n", + " os.environ[\"DNNL_JIT_FD_VAL2\"] = \"jitdump_int8_avx512_vnni\"\n", + " # VNNI specific instruction\n", + " os.environ[\"DNNL_ISA_KEYWORD\"] = \"vpdpbusd\" \n", + " \n", + "elif ISA_COMPARISON == \"avx512-bf16_avx512\":\n", + " # variables for AVX512\n", + " os.environ[\"DNNL_MAX_CPU_ISA_VAL1\"] = \"AVX512_CORE\"\n", + " os.environ[\"DNNL_APP_VAL1\"] = \"cpu-cnn-training-bf16-cpp\"\n", + " os.environ[\"DNNL_LOG_VAL1\"] = \"log_cpu_bf16_avx512.csv\"\n", + " os.environ[\"DNNL_JIT_FD_VAL1\"] = \"jitdump_bf16_avx512\"\n", + " # variables for AVX512 BF16\n", + " os.environ[\"DNNL_MAX_CPU_ISA_VAL2\"] = \"AVX512_CORE_BF16\"\n", + " os.environ[\"DNNL_APP_VAL2\"] = \"cpu-cnn-training-bf16-cpp\"\n", + " os.environ[\"DNNL_LOG_VAL2\"] = \"log_cpu_bf16_avx512_bf16.csv\"\n", + " os.environ[\"DNNL_JIT_FD_VAL2\"] = \"jitdump_bf16_avx512_bf16\"\n", + " # BF16 specific instructions\n", + " os.environ[\"DNNL_ISA_KEYWORD\"] = \"vdpbf16ps|vcvtne2ps2bf16\" " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2: Script - run.sh for first selected ISA. ex: AVX2, or AVX512_CORE\n", + "****\n", + "The script **run.sh** encapsulates the program for submission to the job queue for execution.\n", + "The user can refer to run.sh below to run the oneDNN sample on CPU with the selcted ISA." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n", + "print out the selected ISA." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! echo $DNNL_MAX_CPU_ISA_VAL1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "prepare run.sh and use DNNL_MAX_CPU_ISA to run sample on selected ISA." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile run.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_gomp --force > /dev/null 2>&1\n", + "echo \"########## Executing the run\"\n", + "# enable verbose log\n", + "export DNNL_VERBOSE=2 \n", + "# enable JIT Dump\n", + "export DNNL_JIT_DUMP=1\n", + "\n", + "DNNL_MAX_CPU_ISA=$DNNL_MAX_CPU_ISA_VAL1 ./cpu_gomp/out/$DNNL_APP_VAL1 cpu >> $DNNL_LOG_VAL1 2>&1\n", + "\n", + "echo \"########## Done with the run\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Submitting **run.sh** to the job queue\n", + "> NOTE: By assigning clx to property, users can execute the sample on a Cascade Lake platform from Intel DevCloud." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! export property=clx; chmod 755 run.sh;if [ -x \"$(command -v qsub)\" ]; then ./q run.sh; else ./run.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### gather all JIT bin files into a folder" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! rm -rf $DNNL_JIT_FD_VAL1; mkdir $DNNL_JIT_FD_VAL1; mv *.bin $DNNL_JIT_FD_VAL1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 3: Script - run.sh for second selected ISA. ex: AVX512_CORE_VNNI or AVX512_CORE_BF16\n", + "**** \n", + "The script **run.sh** encapsulates the program for submission to the job queue for execution.\n", + "The user can refer to run.sh below to run the oneDNN sample on CPU with the selcted ISA." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n", + "print out the selected ISA." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! echo $DNNL_MAX_CPU_ISA_VAL2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "prepare run.sh and use DNNL_MAX_CPU_ISA to run sample on selected ISA." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile run.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --dnnl-configuration=cpu_gomp --force > /dev/null 2>&1\n", + "echo \"########## Executing the run\"\n", + "# enable verbose log\n", + "export DNNL_VERBOSE=2 \n", + "# enable JIT Dump\n", + "export DNNL_JIT_DUMP=1\n", + "\n", + "DNNL_MAX_CPU_ISA=$DNNL_MAX_CPU_ISA_VAL2 ./cpu_gomp/out/$DNNL_APP_VAL2 cpu >> $DNNL_LOG_VAL2 2>&1\n", + "\n", + "echo \"########## Done with the run\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Submitting **run.sh** to the job queue\n", + "> NOTE: By assigning clx to property, users can execute the sample on a Cascade Lake platform from Intel DevCloud.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! export property=clx; chmod 755 run.sh;if [ -x \"$(command -v qsub)\" ]; then ./q run.sh; else ./run.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### gather all JIT bin files into a folder" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! rm -rf $DNNL_JIT_FD_VAL2; mkdir $DNNL_JIT_FD_VAL2; mv *.bin $DNNL_JIT_FD_VAL2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "****\n", + "### Step 4: oneDNN Verbose Log JIT Kernel Time BreakDown\n", + "oneDNN uses just-in-time compilation (JIT) to generate optimal code for some functions based on input parameters and instruction set supported by the system. \n", + "Therefore, users can see different JIT kernel type among different first selected ISA and second selected ISA. \n", + "For example, users can see avx_core_vnni JIT kernel if the workload uses VNNI instruction on Cascake Lake platform. \n", + "Moreover, users can identify the top hotspots of JIT kernel executions with this time breakdown. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Parse verbose log and get the data back" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from profiling.profile_utils import oneDNNUtils, oneDNNLog\n", + "onednn = oneDNNUtils()\n", + "\n", + "logfile1 = os.environ[\"DNNL_LOG_VAL1\"]\n", + "log1 = oneDNNLog()\n", + "log1.load_log(logfile1)\n", + "exec_data1 = log1.exec_data\n", + "\n", + "logfile2 = os.environ[\"DNNL_LOG_VAL2\"]\n", + "log2 = oneDNNLog()\n", + "log2.load_log(logfile2)\n", + "exec_data2 = log2.exec_data\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### JIT Kernel Type Time breakdown for first selected ISA \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onednn.breakdown(exec_data1,\"jit\",\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### JIT Kernel Type Time breakdown for second selected ISA\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> NOTE: users should be able to see **avx512_core_vnni** JIT Kernel if the sample run with **VNNI** instruction \n", + "> NOTE: users should be able to see **avx512_core_bf16** JIT Kernel if the sample run with **BF16** instruction \n", + "> NOTE: users should be able to see **avx512** JIT Kernel if the sample run with **AVX512** instructions " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onednn.breakdown(exec_data2,\"jit\",\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Primitives Type Speedup from second selected ISA\n", + "oneDNN samples here are not for performance benchmarking, so the digram below gives you only a rough idea of performance speedup from the second selected ISA such as AVX512, VNNI, or BF16." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + " onednn.stats_comp('type', 'time',log2, log1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "****\n", + "### Step 5: Inspect JIT Kernel \n", + "In this section, we analyze dump JIT files on the built samples from Step 2 and Step 3. \n", + "Users should be able to see exact CPU instruction usage like VNNI or BF16 from those JIT Dump files." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### inspect either first or second selected ISA by setting VALIndex." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* To inspect the first selected ISA JIT Dump files, set VALIndex as 1. \n", + "* To inspect second selected ISA JIT Dump files, set VALIndex as 2. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "VALIndex=2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### List out all JIT Dump Files with index number for the first or second selected ISA" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "VAL=\"DNNL_JIT_FD_VAL\"+str(VALIndex)\n", + "JIT_DUMP_FD=os.environ[VAL]\n", + "print(\"Inspect Folder: \", JIT_DUMP_FD)\n", + "\n", + "filenames= os.listdir (JIT_DUMP_FD) \n", + "result = []\n", + "keyword = \".bin\"\n", + "for filename in filenames: \n", + " #if os.path.isdir(os.path.join(os.path.abspath(\".\"), filename)): \n", + " if filename.find(keyword) != -1:\n", + " result.append(filename)\n", + "result.sort()\n", + "\n", + "index =0 \n", + "for folder in result:\n", + " print(\" %d : %s \" %(index, folder))\n", + " index+=1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Pick a JIT Dump file by putting its index value below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "FdIndex=0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### export JIT Dump file to environment variable JITFILE" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if FdIndex < len(result):\n", + " logfile = result[FdIndex]\n", + " os.environ[\"JITFILE\"] = JIT_DUMP_FD+os.sep+logfile" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### disassembler JIT Dump file" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> NOTE: zmm register is introduced by AVX512 ISA. \n", + "Users should see usage of **zmm** register in AVX512 JIT dump files. \n", + "\n", + "> NOTE: vpdpbusd is introduced by AVX512_VNNI ISA. \n", + "Users should see usage of **vpdpbusd** in AVX512_VNNI JIT dump files. \n", + "\n", + "> NOTE: **vdpbf16ps**, **vcvtne2ps2bf16**, and **vcvtneps2bf16** are introduced by AVX512_BF16 ISA. \n", + "Users should see usage of vdpbf16ps, vcvtne2ps2bf16 or vcvtneps2bf16 in AVX512_BF16 JIT dump files. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> NOTE: For disassembler vdpbf16ps, vcvtne2ps2bf16, and vcvtneps2bf16 instructions, users must use objdump with **v2.34** or above." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!objdump -D -b binary -mi386:x86-64 $JITFILE | grep -E $DNNL_ISA_KEYWORD" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***\n", + "# Summary\n", + "In this lab the developer learned the following:\n", + "* use CPU Dispatch Control to generate JIT codes among different Instruction Set Architecture on CPU\n", + "* understand different JIT Kernels and CPU instructions usage among different ISA\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "525.6px", + "left": "28px", + "top": "137.8px", + "width": "301.109px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Libraries/oneDNN/tutorials/oneDNN_Profiling_VerboseMode_JITDump.ipynb b/Libraries/oneDNN/tutorials/oneDNN_Profiling_VerboseMode_JITDump.ipynb new file mode 100644 index 0000000000..262f1d4d22 --- /dev/null +++ b/Libraries/oneDNN/tutorials/oneDNN_Profiling_VerboseMode_JITDump.ipynb @@ -0,0 +1,755 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Profile Intel® oneAPI Deep Neural Network Library (oneDNN) Samples by using Verobse Mode and JIT DUMP inspection\n", + "\n", + "## Learning Objectives\n", + "In this module the developer will:\n", + "* Learn how to use Verbose Mode to profile oneDNN samples on CPU & GPU\n", + "* Learn how to inspect JIT Dump to profile oneDNN samples on CPU" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This module shows the elapsed time percentage over different oneDNN primitives\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This module also shows the elapsed time percentage over different oneDNN JIT or GPU kernels\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***\n", + "# Verbose Mode Exercise\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## prerequisites\n", + "***\n", + "### Step 1: Prepare the build/run environment\n", + "oneDNN has four different configurations inside the Intel oneAPI toolkits. Each configuration is in a different folder under the oneDNN installation path, and each configuration supports a different compiler or threading library \n", + "\n", + "Set the installation path of your oneAPI toolkit" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# default path: /opt/intel/oneapi\n", + "%env ONEAPI_INSTALL=/opt/intel/oneapi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "!printf '%s\\n' $ONEAPI_INSTALL/dnnl/latest/cpu_*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can see, there are four different folders under the oneDNN installation path, and each of those configurations supports different features. This tutorial will use the dpcpp configuration to showcase the verbose log for both CPU and GPU." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a lab folder for this exercise." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!mkdir -p lab" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install required python packages." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip3 install -r requirements.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get current platform information for this exercise." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from profiling.profile_utils import PlatformUtils\n", + "plat_utils = PlatformUtils()\n", + "plat_utils.dump_platform_info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2: Preparing the samples code\n", + "\n", + "This exercise uses the cnn_inference_f32.cpp example from oneDNN installation path.\n", + "\n", + "The section below will copy the cnn_inference_f32.cpp file into the lab folder. \n", + "This section also copies the required header files and CMake file into the lab folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!cp $ONEAPI_INSTALL/dnnl/latest/cpu_dpcpp_gpu_dpcpp/examples/cnn_inference_f32.cpp lab/\n", + "!cp $ONEAPI_INSTALL/dnnl/latest/cpu_dpcpp_gpu_dpcpp/examples/example_utils.hpp lab/\n", + "!cp $ONEAPI_INSTALL/dnnl/latest/cpu_dpcpp_gpu_dpcpp/examples/example_utils.h lab/\n", + "!cp $ONEAPI_INSTALL/dnnl/latest/cpu_dpcpp_gpu_dpcpp/examples/CMakeLists.txt lab/" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 3: Build and Run with the oneAPI DPC++ Compiler \n", + "One of the oneDNN configurations supports the oneAPI DPC++ compiler, and it can run on different architectures by using DPC++.\n", + "The following section shows you how to build with DPC++ and run on different architectures." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Script - build.sh\n", + "The script **build.sh** encapsulates the compiler **dpcpp** command and flags that will generate the exectuable.\n", + "To enable use of the DPC++ compiler and the related SYCL runtime, some definitions must be passed as cmake arguments.\n", + "Here are the related cmake arguments for the DPC++ configuration: \n", + "\n", + " -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp -DDNNL_CPU_RUNTIME=SYCL -DDNNL_GPU_RUNTIME=SYCL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile build.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --force> /dev/null 2>&1\n", + "export EXAMPLE_ROOT=./lab/\n", + "mkdir dpcpp\n", + "cd dpcpp\n", + "cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp -DDNNL_CPU_RUNTIME=SYCL -DDNNL_GPU_RUNTIME=SYCL\n", + "make cnn-inference-f32-cpp \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once you achieve an all-clear from your compilation, you execute your program on the DevCloud or a local machine.\n", + "\n", + "#### Script - run.sh\n", + "The script **run.sh** encapsulates the program for submission to the job queue for execution.\n", + "By default, the built program uses CPU as the execution engine, but the user can switch to GPU by specifying the input argument \"gpu\".\n", + "The user can refer to run.sh below to run cnn-inference-f32-cpp on both CPU and GPU." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile run.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --force > /dev/null 2>&1\n", + "echo \"########## Executing the run\"\n", + "# enable verbose log\n", + "export DNNL_VERBOSE=0\n", + "./dpcpp/out/cnn-inference-f32-cpp cpu\n", + "./dpcpp/out/cnn-inference-f32-cpp gpu\n", + "echo \"########## Done with the run\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Submitting **build.sh** and **run.sh** to the job queue\n", + "Now we can submit **build.sh** and **run.sh** to the job queue.\n", + "##### NOTE - it is possible to execute any of the build and run commands in local environments.\n", + "To enable users to run their scripts either on the Intel DevCloud or in local environments, this and subsequent training checks for the existence of the job submission command **qsub**. If the check fails, it is assumed that build/run will be local." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "! rm -rf dpcpp;chmod 755 q; chmod 755 build.sh; chmod 755 run.sh;if [ -x \"$(command -v qsub)\" ]; then ./q build.sh; ./q run.sh; else ./build.sh; ./run.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n", + "## Enable Verbose Mode\n", + "***\n", + "In this section, we enable verbose mode on the built sample from the previous section, and users can see different results from CPU and GPU. \n", + "Refer to the [link](https://oneapi-src.github.io/oneDNN/dev_guide_verbose.html) for detailed verbose mode information" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When the feature is enabled at build-time, you can use the DNNL_VERBOSE environment variable to turn verbose mode on and control the level of verbosity.\n", + "\n", + "|Environment variable|Value|Description|\n", + "|:-----|:----|:-----|\n", + "|DNNL_VERBOSE| 0 |no verbose output (default)|\n", + "||1|primitive information at execution|\n", + "||2|primitive information at creation and execution|\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "prepare run.sh and enable DNNL_VERBOSE as 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile run.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --force > /dev/null 2>&1\n", + "echo \"########## Executing the run\"\n", + "# enable verbose log\n", + "export DNNL_VERBOSE=2 \n", + "./dpcpp/out/cnn-inference-f32-cpp cpu >>log_cpu_f32_vb2.csv 2>&1\n", + "./dpcpp/out/cnn-inference-f32-cpp gpu >>log_gpu_f32_vb2.csv 2>&1\n", + "\n", + "echo \"########## Done with the run\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Submitting **build.sh** and **run.sh** to the job queue\n", + "Now we can submit **build.sh** and **run.sh** to the job queue.\n", + "##### NOTE - it is possible to execute any of the build and run commands in local environments.\n", + "To enable users to run their scripts either on the Intel DevCloud or in local environments, this and subsequent training checks for the existence of the job submission command **qsub**. If the check fails, it is assumed that build/run will be local." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "! chmod 755 run.sh;if [ -x \"$(command -v qsub)\" ]; then ./q run.sh; else ./run.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analyze Verbose Logs\n", + "***\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 1: List out all oneDNN verbose logs\n", + "users should see two verbose logs listed in the table below.\n", + "\n", + "|Log File Name | Description |\n", + "|:-----|:----|\n", + "|log_cpu_f32_vb2.csv| log for cpu run |\n", + "|log_cpu_f32_vb2.csv| log for gpu run|" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "filenames= os.listdir (\".\") \n", + "result = []\n", + "keyword = \".csv\"\n", + "for filename in filenames: \n", + " #if os.path.isdir(os.path.join(os.path.abspath(\".\"), filename)): \n", + " if filename.find(keyword) != -1:\n", + " result.append(filename)\n", + "result.sort()\n", + "\n", + "index =0 \n", + "for folder in result:\n", + " print(\" %d : %s \" %(index, folder))\n", + " index+=1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 2: Pick a verbose log by putting its index value below\n", + "Users can pick either cpu or gpu log for analysis. \n", + "Once users finish Step 2 to Step 8 for one log file, they can go back to step 2 and select another log file for analysis." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "FdIndex=0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### OPTIONAL: browse the content of selected verbose log." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "logfile = result[FdIndex]\n", + "with open(logfile) as f:\n", + " log = f.read()\n", + "print(log)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 3: Parse verbose log and get the data back" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "logfile = result[FdIndex]\n", + "print(logfile)\n", + "from profiling.profile_utils import oneDNNUtils, oneDNNLog\n", + "onednn = oneDNNUtils()\n", + "log1 = oneDNNLog()\n", + "log1.load_log(logfile)\n", + "data = log1.data\n", + "exec_data = log1.exec_data\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 4: Time breakdown for exec type\n", + "The exec type includes exec and create. \n", + "\n", + "|exec type | Description | \n", + "|:-----|:----| \n", + "|exec | Time for primitives exection. Better to spend most of time on primitives execution. | \n", + "|create| Time for primitives creation. Primitives creation happens once. Better to spend less time on primitive creation. | " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onednn.breakdown(data,\"exec\",\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 5: Time breakdown for primitives type\n", + "The primitives type includes convolution, reorder, sum, etc. \n", + "For this simple convolution net example, convolution and inner product primitives are expected to spend most of time. \n", + "However, the exact time percentage of different primitivies may vary among different architectures. \n", + "Users can easily identify top hotpots of primitives executions with this time breakdown. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onednn.breakdown(exec_data,\"type\",\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 6: Time breakdown for JIT kernel type" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "oneDNN uses just-in-time compilation (JIT) to generate optimal code for some functions based on input parameters and instruction set supported by the system. \n", + "Therefore, users can see different JIT kernel type among different CPU and GPU architectures. \n", + "For example, users can see avx_core_vnni JIT kernel if the workload uses VNNI instruction on Cascake Lake platform. \n", + "Users can also see different OCL kernels among different Intel GPU generations. \n", + "Moreover, users can identify the top hotspots of JIT kernel executions with this time breakdown. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onednn.breakdown(exec_data,\"jit\",\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 7: Time breakdown for algorithm type\n", + "oneDNN also supports different algorithms. \n", + "Users can identify the top hotspots of algorthm executions with this time breakdown. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onednn.breakdown(exec_data,\"alg\",\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 8: Time breakdown for architecture type\n", + "The supported architectures include CPU and GPU. \n", + "For this simple net sample, we don't split computation among CPU and GPU, \n", + "so users should see either 100% CPU time or 100% GPU time. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "onednn.breakdown(data,\"arch\",\"time\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***\n", + "## Inspecting JIT Code\n", + "\n", + "In this section, we dump JIT code on the built sample from the previous section, and users can see different results from CPU. \n", + "Refer to the [link](https://oneapi-src.github.io/oneDNN/dev_guide_inspecting_jit.html) for detailed JIT Dump information" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When the feature is enabled at build-time, you can use the DNNL_JIT_DUMP environment variable to inspect JIT code.\n", + "\n", + "|Environment variable|Value|Description|\n", + "|:-----|:----|:-----|\n", + "|DNNL_JIT_DUMP | 0 |JIT dump is disabled (default)|\n", + "||any other value|JIT dump is enabled|\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 1: Prepare run.sh and enable DNNL_JIT_DUMP as 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile run.sh\n", + "#!/bin/bash\n", + "source $ONEAPI_INSTALL/setvars.sh --force > /dev/null 2>&1\n", + "echo \"########## Executing the run\"\n", + "# disable verbose log\n", + "export DNNL_VERBOSE=0\n", + "# enable JIT Dump\n", + "export DNNL_JIT_DUMP=1 \n", + "./dpcpp/out/cnn-inference-f32-cpp cpu\n", + "echo \"########## Done with the run\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "#### Step 2: Submitting ***run.sh** to the job queue\n", + "Now we can submit **run.sh** to the job queue." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "! chmod 755 run.sh;if [ -x \"$(command -v qsub)\" ]; then ./q run.sh; else ./run.sh; fi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 3: Move all JIT Dump files into the jitdump folder" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!mkdir jitdump;mv *.bin jitdump" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 4: List out all oneDNN JIT Dump files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "filenames= os.listdir (\"jitdump\") \n", + "result = []\n", + "keyword = \".bin\"\n", + "for filename in filenames: \n", + " #if os.path.isdir(os.path.join(os.path.abspath(\".\"), filename)): \n", + " if filename.find(keyword) != -1:\n", + " result.append(filename)\n", + "result.sort()\n", + "\n", + "index =0 \n", + "for folder in result:\n", + " print(\" %d : %s \" %(index, folder))\n", + " index+=1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 5: Pick a JIT Dump file by putting its index value below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "FdIndex=2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 6: export JIT Dump file to environment variable JITFILE" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "logfile = result[FdIndex]\n", + "os.environ[\"JITFILE\"] = logfile" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 7: disassembler JIT Dump file to view the code\n", + "\n", + "> NOTE: If the oneDNN sample uses VNNI instruction, users should be able to see \"vpdpbusd\" instruction from the JIT Dump file \n", + "\n", + "> NOTE: If the oneDNN sample uses BF16 instruction, users should see usage of vdpbf16ps or vcvtne2ps2bf16 in the JIT dump file. \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> NOTE: For disassembler vdpbf16ps and vcvtne2ps2bf16 instructions, users must use objdump with v2.34 or above." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!objdump -D -b binary -mi386:x86-64 jitdump/$JITFILE" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***\n", + "# Summary\n", + "In this lab the developer learned the following:\n", + "* how to use Verbose Mode to profile different oneDNN samples on CPU and GPU\n", + "* how to inspect JIT Dump to profile oneDNN samples on CPU\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "525.6px", + "left": "28px", + "top": "137.8px", + "width": "301.109px" + }, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Libraries/oneDNN/tutorials/profiling/README.md b/Libraries/oneDNN/tutorials/profiling/README.md new file mode 100644 index 0000000000..f34b0858a0 --- /dev/null +++ b/Libraries/oneDNN/tutorials/profiling/README.md @@ -0,0 +1,25 @@ +# oneDNN verbose log parser + + +## prerequisites + + +* users need to get a oneDNN verbose log from their workloads first. + +## how to parse logs + +### Raw log from frameworks like tensorflow or pytorch +* parse a raw log "log.txt" from workload : `$profile profile_utils.py log.txt` + * users will see output from console + * users will also get some pie chart diagram PNG files like typeTime Breakdown.png + * users will also get a parsed output mkldnn_log.csv which only contains onednn logs + +### Pure oneDNN log or parsed ouput 'mkldnn_log.csv' +* parse a onednn log "mkldnn_log.csv" : `$profile profile_utils.py mkldnn_log.csv` + * users will see output from console + * users will also get some pie chart diagram PNG files like typeTime Breakdown.png + +### Compare two pure oneDNN logs +* compare two onednn log "a.csv" and "b.csv" : `$profile profile_utils.py a.csv b.csv` + * users will see output from console + * users will also get a bar chart diagram PNG files like typeTime Comparison.png \ No newline at end of file diff --git a/Libraries/oneDNN/tutorials/profiling/__init__.py b/Libraries/oneDNN/tutorials/profiling/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Libraries/oneDNN/tutorials/profiling/profile_utils.py b/Libraries/oneDNN/tutorials/profiling/profile_utils.py new file mode 100755 index 0000000000..fdec405f6c --- /dev/null +++ b/Libraries/oneDNN/tutorials/profiling/profile_utils.py @@ -0,0 +1,206 @@ +#! /usr/bin/env python +import os, sys +import subprocess + +os.environ['DNNL_VERBOSE'] = '1' +import psutil + +class PlatformUtils: + + def __init_(self): + self.cpufreq = '' + self.cpu_socket_count = '' + self.svmem = '' + return + + def dump_platform_info(self): + # let's print CPU information + print("=" * 20, "CPU Info", "=" * 20) + # number of cores + print("Physical cores:", psutil.cpu_count(logical=False)) + print("Total cores:", psutil.cpu_count(logical=True)) + # CPU frequencies + cpufreq = psutil.cpu_freq() + print("Max Frequency:", cpufreq.max) + print("Min Frequency:", cpufreq.min) + cpu_socket_count = int(subprocess.check_output( + 'cat /proc/cpuinfo | grep "physical id" | sort -u | wc -l', shell=True)) + print("Socket Number:", cpu_socket_count) + print("=" * 20, "Memory Information", "=" * 20) + # get the memory details + svmem = psutil.virtual_memory() + print("Total: ", int(svmem.total / (1024 ** 3)), "GB") + self.cpufreq = cpufreq + self.cpu_socket_count = cpu_socket_count + self.svmem = svmem + + +def run_workload(outfile='mkldnn_log.csv'): + print('Executing:', sys.argv[1:]) + output = subprocess.getoutput(' '.join(sys.argv[1:])) + + #print('Output:', output) + + with open(outfile, 'w') as f: + for l in output.split('\n'): + if 'dnnl' in l and 'exec' in l: + f.write(l + '\n') + +class oneDNNLog: + + def __init_(self): + self.filename = '' + self.data = None + self.exec_data = None + return + + def load_log(self, log): + self.filename = log + + data = self.load_log_dnnl(log) + count = data['time'].count() + + if count == 0: + data = self.load_log_mkldnn(log) + count = data['time'].count() + + exec_data = data[data['exec'] == 'exec'] + self.data = data + self.exec_data = exec_data + return + + def load_log_dnnl(self, log): + import pandas as pd + # dnnl_verbose,exec,cpu,convolution,jit:avx2,forward_inference,src_f32::blocked:abcd:f0 wei_f32::blocked:Acdb8a:f0 bia_f32::blocked:a:f0 dst_f32::blocked:aBcd8b:f0,,alg:convolution_direct,mb1_ic3oc96_ih227oh55kh11sh4dh0ph0_iw227ow55kw11sw4dw0pw0,1.21704 + data = pd.read_csv(log, names=[ 'dnnl_verbose','exec','arch','type', 'jit', 'pass', 'fmt', 'opt', 'alg', 'shape', 'time']) + return data + + def load_log_mkldnn(self, log): + import pandas as pd + #mkldnn_verbose,exec,convolution,jit:avx512_common,forward_training,fsrc:nChw16c fwei:OIhw16i16o fbia:undef fdst:nChw16c,alg:convolution_direct,mb100_ic128oc32_ih7oh7kh3sh1dh0ph1_iw7ow7kw3sw1dw0pw1,0.201904 + print("load_log_mkldnn") + data = pd.read_csv(log, names=[ 'mkldnn_verbose','exec','type', 'jit', 'pass', 'fmt', 'alg', 'shape', 'time']) + return data + + +class oneDNNUtils: + + def __init_(self): + self.topk=50 + self.logx=True + self.figsize=(10,10) + import matplotlib.pyplot as plt + fig = plt.figure(figsize=(18, 15)) + self.ax = fig.add_subplot(111) + return + + def breakdown(self, data, Group, Type): + import matplotlib.pyplot as plt + fig = plt.figure(figsize=(18, 15)) + ax = fig.add_subplot(111) + figsize=(10,10) + topk=50 + if Type == "time": + print() + print(' breakdown:',Group) + time = data.groupby(Group)['time'].sum().sort_values().head(topk) + print(time) + title=Group + "Time Breakdown" + time[:topk].plot.pie( + ax=ax, title=title, figsize=figsize, logx=True, autopct='%1.1f%%') + ax.figure.savefig(title) + elif Type == "count": + print() + count = data[Group].value_counts().head(topk) + print(count) + title=Group+"Count Breakdown" + count[:topk].plot.bar( + ax=ax, title=title, figsize=figsize, logx=False, rot=45) + ax.figure.savefig(title) + return + + def stats_comp(self, name, Type,onednn_log1, onednn_log2, n=50): + import pandas as pd + import matplotlib.pyplot as plt + fig = plt.figure(figsize=(18, 15)) + ax = fig.add_subplot(111) + figsize=(10,10) + topk=50 + + d1 = onednn_log1.exec_data + log1 = onednn_log1.filename + d2 = onednn_log2.exec_data + log2 = onednn_log2.filename + print(name, 'stats:') + if Type == "count": + jitstat = pd.concat((d1[name].value_counts(), d2[name].value_counts()), axis=1, sort=True) + jitstat.columns = ('1-' + log1, '2-' + log2) + jitstat['run2/run1'] = jitstat.iloc[:, 1] / jitstat.iloc[:, 0] + jitstat_count = jitstat.sort_values('1-' + log1, ascending=False).head(n) + print(jitstat_count) + elif Type == "time": + jitstat = pd.concat((d1.groupby(name)['time'].sum(), d2.groupby(name)['time'].sum()), axis=1, sort=True) + jitstat.columns = ('1-' + log1, '2-' + log2) + jitstat['run2/run1'] = jitstat.iloc[:, 1] / jitstat.iloc[:, 0] + jitstat_time = jitstat.sort_values('1-' + log1, ascending=False).head(n) + print(jitstat_time) + title=name + " run2/run1 Time Comparison" + jitstat_compare = jitstat_time.drop(columns=['1-' + log1, '2-' + log2]) + if len(jitstat_compare) == 0: + return + jitstat_compare[:topk].plot.bar( + ax=ax, title=title, figsize=figsize, logx=False, rot=45) + filename = name + " Time Comparison" + ax.figure.savefig(filename) + def parse_raw_output_to_csv(self, filepath, csvpath='mkldnn_log.csv', keyword='dnnl_verbose'): + #filepath = 'Iliad.txt' + import csv + + with open(csvpath, "w") as file: + with open(filepath) as fp: + line = fp.readline() + cnt = 1 + while line: + if line.find(keyword) != -1: + file.write(line) + #print("Line {}: {}".format(cnt, line.strip())) + line = fp.readline() + cnt += 1 + return csvpath + + + +if __name__ == '__main__': + onednn = oneDNNUtils() + if len(sys.argv) > 2 and '.csv' in sys.argv[1] and '.csv' in sys.argv[2]: + log1 = oneDNNLog() + log1.load_log(sys.argv[1]) + log2 = oneDNNLog() + log2.load_log(sys.argv[2]) + print('Total time %s: %0.2f\t--- %s: %0.2f' % (log1.filename, log1.data['time'].sum(), log2.filename, log2.data['time'].sum())) + print('Total ops %s: %d\t\t--- %s: %d' % (log1.filename, log1.data['time'].count(), log2.filename, log2.data['time'].count())) + #onednn.stats_comp('jit', 'time',log1, log2) + + print() + onednn.stats_comp('type', 'time',log1, log2) + + #print() + #onednn.stats_comp('shape', 'time',log1, log2) + + elif len(sys.argv) > 1 and '.csv' in sys.argv[1]: + log = oneDNNLog() + log.load_log(sys.argv[1]) + print('Total MKLDNN time:', log.data['time'].sum()) + print('Total MKLDNN ops:', log.data['time'].count()) + onednn.breakdown(log.exec_data,"type","time") + onednn.breakdown(log.exec_data,"jit","time") + elif len(sys.argv) > 1: + keyword = "_verbose" + csvpath = onednn.parse_raw_output_to_csv(sys.argv[1], keyword=keyword) + print(csvpath) + log = oneDNNLog() + log.load_log(csvpath) + print('Total MKLDNN time:', log.data['time'].sum()) + print('Total MKLDNN ops:', log.data['time'].count()) + onednn.breakdown(log.exec_data,"type","time") + onednn.breakdown(log.exec_data,"jit","time") diff --git a/Libraries/oneDNN/getting_started/q b/Libraries/oneDNN/tutorials/q similarity index 77% rename from Libraries/oneDNN/getting_started/q rename to Libraries/oneDNN/tutorials/q index 8377675780..98c1f7759a 100755 --- a/Libraries/oneDNN/getting_started/q +++ b/Libraries/oneDNN/tutorials/q @@ -4,6 +4,13 @@ # # Version: 0.5 #======================================== +#property='gpu' +#property='clx' +#property='skx' +if [ -z "$property" ]; then + property='gpu' +fi + if [ -z "$1" ]; then echo "Missing script argument, Usage: ./q run.sh" elif [ ! -f "$1" ]; then @@ -13,9 +20,11 @@ else rm *.sh.* > /dev/null 2>&1 #qsub echo "Submitting job:" - qsub -l nodes=1:gpu:ppn=2 -d . $script + qsub -l nodes=1:$property:ppn=2 -d . $script + # qsub -q batch@v-qsvr-nda-l nodes=ppn=2 -I + # pbsnodes #qstat - qstat + qstat #wait for output file to be generated and display echo -ne "Waiting for Output." until [ -f $script.o* ]; do diff --git a/Libraries/oneDNN/tutorials/requirements.txt b/Libraries/oneDNN/tutorials/requirements.txt new file mode 100644 index 0000000000..c68f08ce51 --- /dev/null +++ b/Libraries/oneDNN/tutorials/requirements.txt @@ -0,0 +1,5 @@ +###### Requirements without Version Specifiers ######` +pandas +matplotlib +psutil +###### Requirements with Version Specifiers ######` diff --git a/Libraries/oneDNN/tutorials/sample.json b/Libraries/oneDNN/tutorials/sample.json new file mode 100644 index 0000000000..1432aad4d0 --- /dev/null +++ b/Libraries/oneDNN/tutorials/sample.json @@ -0,0 +1,11 @@ +{ + "name": "oneDNN Tutorials", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/oneDNN"], + "description": "oneDNN Tutorials among different features like Verbose Mode and CPU Dispatcher Control", + "toolchain": ["dpcpp","gcc"], + "languages": [{"cpp":{}}], + "dependencies": ["oneDNN"], + "os": ["linux"], + "builder": ["cli"], + "targetDevice": ["CPU", "GPU"] +} diff --git a/Libraries/oneDNN/simple_model/simple_model.ipynb b/Libraries/oneDNN/tutorials/simple_model.ipynb similarity index 100% rename from Libraries/oneDNN/simple_model/simple_model.ipynb rename to Libraries/oneDNN/tutorials/simple_model.ipynb From 10dc87676c093271938dfdfaecd3f4201359f42f Mon Sep 17 00:00:00 2001 From: ltsai1 Date: Thu, 20 Aug 2020 16:01:08 -0700 Subject: [PATCH 05/11] add ci tests support for notebooks --- .../oneCCL_Getting_Started.ipynb | 0 .../{oneCCL_Getting_Started => tutorials}/q | 0 Libraries/oneCCL/tutorials/sample.json | 21 +++++++++ Libraries/oneDNN/tutorials/sample.json | 43 ++++++++++++++++--- 4 files changed, 59 insertions(+), 5 deletions(-) rename Libraries/oneCCL/{oneCCL_Getting_Started => tutorials}/oneCCL_Getting_Started.ipynb (100%) rename Libraries/oneCCL/{oneCCL_Getting_Started => tutorials}/q (100%) create mode 100644 Libraries/oneCCL/tutorials/sample.json diff --git a/Libraries/oneCCL/oneCCL_Getting_Started/oneCCL_Getting_Started.ipynb b/Libraries/oneCCL/tutorials/oneCCL_Getting_Started.ipynb similarity index 100% rename from Libraries/oneCCL/oneCCL_Getting_Started/oneCCL_Getting_Started.ipynb rename to Libraries/oneCCL/tutorials/oneCCL_Getting_Started.ipynb diff --git a/Libraries/oneCCL/oneCCL_Getting_Started/q b/Libraries/oneCCL/tutorials/q similarity index 100% rename from Libraries/oneCCL/oneCCL_Getting_Started/q rename to Libraries/oneCCL/tutorials/q diff --git a/Libraries/oneCCL/tutorials/sample.json b/Libraries/oneCCL/tutorials/sample.json new file mode 100644 index 0000000000..ed2f54183f --- /dev/null +++ b/Libraries/oneCCL/tutorials/sample.json @@ -0,0 +1,21 @@ +{ + "guid": "70FAEF03-6509-4B6C-B995-D42A65CE36EE", + "name": "oneCCL Tutorials", + "categories": ["Toolkit/Intel® oneAPI Base Toolkit/oneCCL"], + "description": "oneCCL tutorials.", + "toolchain": ["dpcpp"], + "languages": [{"cpp":{}}], + "dependencies": ["ccl"], + "os": ["linux"], + "builder": ["cli","cmake"], + "targetDevice": ["CPU", "GPU"], + "ciTests": { + "linux": [{ + "env": ["source /opt/intel/oneapi/setvars.sh --ccl-configuration=cpu_gpu_dpcpp --force" ], + "id": "gsg", + "steps": [ + "runipy oneCCL_Getting_Started.ipynb" + ] + }] + } +} diff --git a/Libraries/oneDNN/tutorials/sample.json b/Libraries/oneDNN/tutorials/sample.json index 1432aad4d0..15dbac04e1 100644 --- a/Libraries/oneDNN/tutorials/sample.json +++ b/Libraries/oneDNN/tutorials/sample.json @@ -1,11 +1,44 @@ { + "guid": "FC7A16DE-9594-4F40-AFA2-71ACABF366B3", "name": "oneDNN Tutorials", "categories": ["Toolkit/Intel® oneAPI Base Toolkit/oneDNN"], - "description": "oneDNN Tutorials among different features like Verbose Mode and CPU Dispatcher Control", - "toolchain": ["dpcpp","gcc"], + "description": "oneDNN Tutorials.", + "toolchain": ["dpcpp"], "languages": [{"cpp":{}}], - "dependencies": ["oneDNN"], + "dependencies": ["oneDNN", "tbb"], "os": ["linux"], - "builder": ["cli"], - "targetDevice": ["CPU", "GPU"] + "builder": ["ide","cmake"], + "targetDevice": ["CPU", "GPU"], + "ciTests": { + "linux": [ + { + "env": ["source /opt/intel/oneapi/setvars.sh --dnnl-configuration=cpu_dpcpp_gpu_dpcpp --force" ], + "id": "gsg", + "steps": [ + "runipy getting_started.ipynb" + ] + }, + { + "env": ["source /opt/intel/oneapi/setvars.sh --dnnl-configuration=cpu_dpcpp_gpu_dpcpp --force" ], + "id": "simple_model", + "steps": [ + "runipy simple_model.ipynb" + ] + }, + { + "env": ["source /opt/intel/oneapi/setvars.sh --dnnl-configuration=cpu_dpcpp_gpu_dpcpp --force" ], + "id": "verbose_jit", + "steps": [ + "runipy oneDNN_Profiling_VerboseMode_JITDump.ipynb" + ] + }, + { + "env": ["source /opt/intel/oneapi/setvars.sh --dnnl-configuration=cpu_gomp --force" ], + "id": "isa", + "steps": [ + "runipy oneDNN_Analyze_ISA_with_DispatcherControl.ipynb" + ] + } + ] + } } From b5d8036a142f1ab26e3238de6d70c6b8bda10ae5 Mon Sep 17 00:00:00 2001 From: Louie Tsai Date: Thu, 20 Aug 2020 15:54:29 -0700 Subject: [PATCH 06/11] Update README.md --- Libraries/oneCCL/README.md | 5 +++-- Libraries/oneCCL/oneCCL_Getting_Started/README.md | 9 ++++----- Libraries/oneDNN/README.md | 5 +++-- Libraries/oneDNN/dpcpp_interoperability/README.md | 2 +- Libraries/oneDNN/getting_started/README.md | 4 ++-- Libraries/oneDNN/simple_model/README.md | 6 +++--- Libraries/oneDNN/tutorials/README.md | 6 ++++-- 7 files changed, 20 insertions(+), 17 deletions(-) diff --git a/Libraries/oneCCL/README.md b/Libraries/oneCCL/README.md index c6057a55f0..e452812c81 100644 --- a/Libraries/oneCCL/README.md +++ b/Libraries/oneCCL/README.md @@ -11,8 +11,9 @@ The code samples are licensed under MIT license | Type | Name | Description | | --------- | ----------------------- | ------------------------------------------------------------ | -| Component | oneCCL_Getting_Started | Those C++ & C API example demonstrates basic of oneCCL programming model by invoking different collective operations such as allreduce. | -| Component | oneCCL_Getting_Started.ipynb |This Jupyter Notebook demonstrates how to compile a oneCCL sample with different releases and how to port a oneCCL sample from CPU-only version to CPU&GPU version by using DPC++ via batch jobs on the Intel oneAPI DevCloud (check below Notice)| +| Component | [oneCCL_Getting_Started](oneCCL_Getting_Started) | Those C++ & C API example demonstrates basic of oneCCL programming model by invoking different collective operations such as allreduce. | +| Component | [tutorials](tutorials) | Hands-on Jupyter notebook tutorials among different topics. | +| Component | [oneCCL_Getting_Started](oneCCL_Getting_Started.ipynb) |This Jupyter Notebook demonstrates how to compile a oneCCL sample with different releases and how to port a oneCCL sample from CPU-only version to CPU&GPU version by using DPC++ via batch jobs on the Intel oneAPI DevCloud (check below Notice)| > Notice : Please use Intel oneAPI DevCloud as the environment for jupyter notebook samples. \ Users can refer to [DevCloud Getting Started](https://devcloud.intel.com/oneapi/get-started/) for using DevCloud \ Users can use JupyterLab from DevCloud via "One-click Login in", and download samples via "git clone" or the "oneapi-cli" tool \ diff --git a/Libraries/oneCCL/oneCCL_Getting_Started/README.md b/Libraries/oneCCL/oneCCL_Getting_Started/README.md index 0b77c073bf..7002e25a18 100644 --- a/Libraries/oneCCL/oneCCL_Getting_Started/README.md +++ b/Libraries/oneCCL/oneCCL_Getting_Started/README.md @@ -6,7 +6,7 @@ By using all reduce collective operation samples, users can understand how to co |:--- |:--- | OS | Linux Ubuntu 18.04; | Hardware | Kaby Lake with GEN9 or newer -| Software | Intel oneAPI Collective Communications Library (oneCCL), Intel oneAPI DPC++ Compiler, Intel oneAPI DPC++ Library (oneDPL), GNU Compiler +| Software | Intel oneAPI Collective Communications Library (oneCCL), Intel oneAPI DPC++/C++ Compiler, Intel oneAPI DPC++ Library (oneDPL), GNU Compiler | What you will learn | basic oneCCL programming model for both Intel CPU and GPU | Time to complete | 15 minutes @@ -15,7 +15,6 @@ By using all reduce collective operation samples, users can understand how to co | ------ | ------ | ------ | | sycl_allreduce_cpp_test.cpp | sycl_allreduce_test.cpp |[Allreduce](https://intel.github.io/oneccl/spec/communication_primitives.html#allreduce) | | cpu_allreduce_cpp_test.cpp | cpu_allreduce_test.cpp/cpu_allreduce_bfp16.c |[Allreduce](https://intel.github.io/oneccl/spec/communication_primitives.html#allreduce) | -|oneCCL_Getting_Started.ipynb (check below Notice)| | | > Notice : Please use Intel oneAPI DevCloud as the environment for jupyter notebook samples. \ Users can refer to [DevCloud Getting Started](https://devcloud.intel.com/oneapi/get-started/) for using DevCloud \ Users can use JupyterLab from DevCloud via "One-click Login in", and download samples via "git clone" or the "oneapi-cli" tool \ @@ -47,7 +46,7 @@ You can refer to this page [oneAPI](https://software.intel.com/en-us/oneapi) for The samples below require the following components, which are part of the [Intel oneAPI Base Tookit](https://software.intel.com/en-us/oneapi/oneapi-kit) * Intel oneAPI Collective Communications Library (oneCCL) -* Intel oneAPI DPC++ Compiler +* Intel oneAPI DPC++/C++ Compiler * Intel oneAPI DPC++ Library (oneDPL) The samples also require OpenCL driver. Please refer [System Requirements](https://software.intel.com/en-us/articles/intel-oneapi-base-toolkit-system-requirements) for OpenCL driver installation. @@ -66,7 +65,7 @@ You can refer to this page [oneAPI](https://software.intel.com/en-us/oneapi) for - Build the samples with GCC for CPU only \ please replace ${ONEAPI_ROOT} for your installation path. \ - ex : /opt/intel/inteloneapi \ + ex : /opt/intel/oneapi \ Don't need to replace {DPCPP_CMPLR_ROOT} ``` source ${ONEAPI_ROOT}/setvars.sh --ccl-configuration=cpu_icc @@ -84,7 +83,7 @@ Users can rebuild the cpu_allreduce_cpp_test.cpp by typing "make cpu_allreduce_c - Build the samples with SYCL for GPU and CPU \ please replace ${ONEAPI_ROOT} for your installation path. \ - ex : /opt/intel/inteloneapi \ + ex : /opt/intel/oneapi \ Don't need to replace {DPCPP_CMPLR_ROOT} ``` source ${ONEAPI_ROOT}/setvars.sh --ccl-configuration=cpu_gpu_dpcpp diff --git a/Libraries/oneDNN/README.md b/Libraries/oneDNN/README.md index 4c11e0d6d5..825dd1e156 100644 --- a/Libraries/oneDNN/README.md +++ b/Libraries/oneDNN/README.md @@ -16,9 +16,10 @@ The code samples are licensed under MIT license. | Type | Name | Description | --------- | ------------------------------------------------ | - -| Component | [getting_started](getting_started) | A C++ sample demonstrating basics of oneDNN programming model. The sample also includes a Jupyter notebook with step by step instructions on building code with different compilers and runtime configurations oneDNN support. +| Component | [getting_started](getting_started) | A C++ sample demonstrating basics of oneDNN programming model. | Component | [dpcpp_interoparibility](dpcpp_interoperability) | A DPC++ example demonstrating interoperaility of oneDNN with DPC++ application code. -| Component | [simple_model](simple_model) | A C++ example demonstrating implmentation of simple convolutional model with oneDNN. The samples also include a Jupyter notebook with step by step instructions on running oneDNN-based application on a GPU. +| Component | [simple_model](simple_model) | A C++ example demonstrating implmentation of simple convolutional model with oneDNN. +| Component | [tutorials](tutorials) | Hands-on Jupyter notebook tutorials among different topics. # Using Samples in Intel oneAPI DevCloud diff --git a/Libraries/oneDNN/dpcpp_interoperability/README.md b/Libraries/oneDNN/dpcpp_interoperability/README.md index 5e26388738..18411eba91 100644 --- a/Libraries/oneDNN/dpcpp_interoperability/README.md +++ b/Libraries/oneDNN/dpcpp_interoperability/README.md @@ -8,7 +8,7 @@ and this interface also helps users to execute a custom SYCL kernel with oneDNN | :--- | :--- | OS | Linux Ubuntu 18.04; | Hardware | Kaby Lake with GEN9 or newer -| Software | Intel oneAPI Deep Neural Network Library (oneDNN), Intel oneAPI DPC++ Compiler, Intel oneAPI Threading Building Blocks (oneTBB) +| Software | Intel oneAPI Deep Neural Network Library (oneDNN), Intel oneAPI DPC++/C++ Compiler, Intel oneAPI Threading Building Blocks (oneTBB) | What you will learn | Using oneDNN in DPC++ application targeting Intel CPU or Intel GPU | Time to complete | 15 minutes diff --git a/Libraries/oneDNN/getting_started/README.md b/Libraries/oneDNN/getting_started/README.md index ed644fcc28..9c573c2b36 100644 --- a/Libraries/oneDNN/getting_started/README.md +++ b/Libraries/oneDNN/getting_started/README.md @@ -17,7 +17,7 @@ in Intel oneAPI DevCloud environment. | :--- | :--- | OS | Linux* Ubuntu* 18.04; Windows 10 | Hardware | Skylake with GEN9 or newer -| Software | Intel oneAPI Deep Neural Network Library (oneDNN), Intel oneAPI DPC++ Compiler, Intel oneAPI Threading Building Blocks (oneTBB), GNU Compiler Collection, Intel C++ Compiler +| Software | Intel oneAPI Deep Neural Network Library (oneDNN), Intel oneAPI DPC++/C++ Compiler, Intel oneAPI Threading Building Blocks (oneTBB), GNU Compiler Collection, Intel C++ Compiler | What you will learn | Running a simple convolutional model on Intel CPU or Intel GPU | Time to complete | 15 minutes @@ -67,7 +67,7 @@ make ./bin/simple_model ``` -By default the sample uses oneAPI DPC++ Compiler and can execute on CPUs or +By default the sample uses oneAPI DPC++/C++ Compiler and can execute on CPUs or Intel GPUs. You can build the sample with CPU support with other compilers and threading runtimes: * GNU C++ Compiler and GNU OpenMP runtime diff --git a/Libraries/oneDNN/simple_model/README.md b/Libraries/oneDNN/simple_model/README.md index 0053b89137..cd84489858 100644 --- a/Libraries/oneDNN/simple_model/README.md +++ b/Libraries/oneDNN/simple_model/README.md @@ -9,7 +9,7 @@ in Intel oneAPI DevCloud environment. | :--- | :--- | OS | Linux* Ubuntu* 18.04; Windows 10 | Hardware | Skylake with GEN9 or newer -| Software | Intel oneAPI Deep Neural Network Library (oneDNN), Intel oneAPI DPC++ Compiler, Intel oneAPI Threading Building Blocks (oneTBB), GNU Compiler Collection, Intel C++ Compiler +| Software | Intel oneAPI Deep Neural Network Library (oneDNN), Intel oneAPI DPC++/C++ Compiler, Intel oneAPI Threading Building Blocks (oneTBB), GNU Compiler Collection, Intel C++ Compiler | What you will learn | Running a simple convolutional model on Intel CPU or Intel GPU | Time to complete | 15 minutes @@ -22,7 +22,7 @@ unit (ReLU), linear response normalization (LRN), and inner product. With this sample you will learn: * How to run a simple convolutional network on Intel CPU or Intel GPU -* How to compile examples with Intel oneAPI DPC++ Compiler, Intel C++ Compiler, +* How to compile examples with Intel oneAPI DPC++/C++ Compiler, Intel C++ Compiler, and GNU C++ Compiler * How to switch between OpenMP and TBB for CPU parallelization * How to describe tensors with oneDNN memory objects @@ -64,7 +64,7 @@ make ./bin/cnn-inference-f32-cpp ``` -By default the sample uses oneAPI DPC++ Compiler and can execute on CPUs or +By default the sample uses oneAPI DPC++/C++ Compiler and can execute on CPUs or Intel GPUs. You can build the sample with CPU support with other compilers and threading runtimes: * GNU C++ Compiler and GNU OpenMP runtime diff --git a/Libraries/oneDNN/tutorials/README.md b/Libraries/oneDNN/tutorials/README.md index 6755ed53b8..31d944e43a 100644 --- a/Libraries/oneDNN/tutorials/README.md +++ b/Libraries/oneDNN/tutorials/README.md @@ -11,8 +11,10 @@ The code samples are licensed under MIT license | Type | Name | Description | | --------- | ----------------------- | ------------------------------------------------------------ | -| Component | oneDNN_Profiling_VerboseMode_JITDump.ipynb | This Jupyter Notebook demonstrates how to use Verbose Mode and JIT Dump to profile oneDNN samples. | -| Component | oneDNN_Analyze_ISA_with_DispatcherControl.ipynb | This Jupyter Notebook demonstrates how to use CPU Dispatch Control to generate JIT codes among different ISA on CPU and also analyze JIT kernels among ISAs.| +| Component | [getting_started](getting_started.ipynb) | The sample also includes a Jupyter notebook with step by step instructions on building code with different compilers and runtime configurations oneDNN support. | +| Component | [simple_model](simple_model.ipynb)| A Jupyter notebook with step by step instructions on running oneDNN-based application on a GPU. | +| Component | [oneDNN_Profiling_VerboseMode_JITDump](oneDNN_Profiling_VerboseMode_JITDump.ipynb) | This Jupyter Notebook demonstrates how to use Verbose Mode and JIT Dump to profile oneDNN samples. | +| Component | [oneDNN_Analyze_ISA_with_DispatcherControl](oneDNN_Analyze_ISA_with_DispatcherControl.ipynb) | This Jupyter Notebook demonstrates how to use CPU Dispatch Control to generate JIT codes among different ISA on CPU and also analyze JIT kernels among ISAs.| > Notice : Please use Intel oneAPI DevCloud as the environment for jupyter notebook samples. \ Users can refer to [DevCloud Getting Started](https://devcloud.intel.com/oneapi/get-started/) for using DevCloud \ Users can use JupyterLab from DevCloud via "One-click Login in", and download samples via "git clone" or the "oneapi-cli" tool \ From d664fb56a4e02eb0ac1d8652fad6758c0e4e1b9e Mon Sep 17 00:00:00 2001 From: ltsai1 Date: Fri, 21 Aug 2020 09:42:09 -0700 Subject: [PATCH 07/11] fix for CI testings --- Libraries/oneDNN/dpcpp_interoperability/sample.json | 4 ++-- Libraries/oneDNN/getting_started/sample.json | 2 +- Libraries/oneDNN/simple_model/sample.json | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Libraries/oneDNN/dpcpp_interoperability/sample.json b/Libraries/oneDNN/dpcpp_interoperability/sample.json index 0289379abe..59a8898574 100644 --- a/Libraries/oneDNN/dpcpp_interoperability/sample.json +++ b/Libraries/oneDNN/dpcpp_interoperability/sample.json @@ -18,8 +18,8 @@ "cd build", "cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp", "make sycl-interop-cpp", - "./out/sycl-interop-cpp cpu", - "SYCL_BE=PI_OPENCL ./out/sycl-interop-cpp gpu" + "./bin/sycl-interop-cpp cpu", + "SYCL_BE=PI_OPENCL ./bin/sycl-interop-cpp gpu" ] }] } diff --git a/Libraries/oneDNN/getting_started/sample.json b/Libraries/oneDNN/getting_started/sample.json index 956fc69f38..1a31985f9c 100644 --- a/Libraries/oneDNN/getting_started/sample.json +++ b/Libraries/oneDNN/getting_started/sample.json @@ -18,7 +18,7 @@ "cd build", "cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp", "make getting-started-cpp", - "SYCL_BE=PI_OPENCL ./out/getting-started-cpp gpu" + "SYCL_BE=PI_OPENCL ./bin/getting-started-cpp gpu" ] }] } diff --git a/Libraries/oneDNN/simple_model/sample.json b/Libraries/oneDNN/simple_model/sample.json index 9e2dfd9f20..b27e7150fb 100644 --- a/Libraries/oneDNN/simple_model/sample.json +++ b/Libraries/oneDNN/simple_model/sample.json @@ -18,7 +18,7 @@ "cd build", "cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp", "make cnn-inference-f32-cpp", - "./out/cnn-inference-f32-cpp cpu", + "./bin/cnn-inference-f32-cpp cpu", "SYCL_BE=PI_OPENCL ./out/cnn-inference-f32-cpp gpu" ] }] From e9b8f145ad6c2155f79fe39b45528d8232f70de0 Mon Sep 17 00:00:00 2001 From: ltsai1 Date: Fri, 21 Aug 2020 09:56:23 -0700 Subject: [PATCH 08/11] add one README for oneCCL tutorials --- Libraries/oneCCL/README.md | 1 - Libraries/oneCCL/tutorials/README.md | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 Libraries/oneCCL/tutorials/README.md diff --git a/Libraries/oneCCL/README.md b/Libraries/oneCCL/README.md index e452812c81..7bf8446d9a 100644 --- a/Libraries/oneCCL/README.md +++ b/Libraries/oneCCL/README.md @@ -13,7 +13,6 @@ The code samples are licensed under MIT license | --------- | ----------------------- | ------------------------------------------------------------ | | Component | [oneCCL_Getting_Started](oneCCL_Getting_Started) | Those C++ & C API example demonstrates basic of oneCCL programming model by invoking different collective operations such as allreduce. | | Component | [tutorials](tutorials) | Hands-on Jupyter notebook tutorials among different topics. | -| Component | [oneCCL_Getting_Started](oneCCL_Getting_Started.ipynb) |This Jupyter Notebook demonstrates how to compile a oneCCL sample with different releases and how to port a oneCCL sample from CPU-only version to CPU&GPU version by using DPC++ via batch jobs on the Intel oneAPI DevCloud (check below Notice)| > Notice : Please use Intel oneAPI DevCloud as the environment for jupyter notebook samples. \ Users can refer to [DevCloud Getting Started](https://devcloud.intel.com/oneapi/get-started/) for using DevCloud \ Users can use JupyterLab from DevCloud via "One-click Login in", and download samples via "git clone" or the "oneapi-cli" tool \ diff --git a/Libraries/oneCCL/tutorials/README.md b/Libraries/oneCCL/tutorials/README.md new file mode 100644 index 0000000000..79f7dcedac --- /dev/null +++ b/Libraries/oneCCL/tutorials/README.md @@ -0,0 +1,18 @@ +# Intel oneAPI Collective Communications Library (oneCCL) + +Collective Communication Library is a library providing an efficient implementation of communication patterns usedin deep learning. + +Github : https://github.com/oneapi-src/oneCCL + +## License +The code samples are licensed under MIT license + +# oneCCL samples + +| Type | Name | Description | +| --------- | ----------------------- | ------------------------------------------------------------ | +| Component | [oneCCL_Getting_Started](oneCCL_Getting_Started.ipynb) |This Jupyter Notebook demonstrates how to compile a oneCCL sample with different releases and how to port a oneCCL sample from CPU-only version to CPU&GPU version by using DPC++ via batch jobs on the Intel oneAPI DevCloud (check below Notice)| +> Notice : Please use Intel oneAPI DevCloud as the environment for jupyter notebook samples. \ +Users can refer to [DevCloud Getting Started](https://devcloud.intel.com/oneapi/get-started/) for using DevCloud \ +Users can use JupyterLab from DevCloud via "One-click Login in", and download samples via "git clone" or the "oneapi-cli" tool \ +Once users are in the JupyterLab with downloaded jupyter notebook samples, they can start following the steps without further installion needed. From 6c8fd32ead74312babe365ebc3a3f0779a3f9691 Mon Sep 17 00:00:00 2001 From: ltsai1 Date: Fri, 21 Aug 2020 18:14:01 -0700 Subject: [PATCH 09/11] rename jupyter notebooks per vadim's request --- Libraries/oneDNN/tutorials/README.md | 4 ++-- ...ontrol.ipynb => analyze_isa_with_dispatcher_control.ipynb} | 0 Libraries/oneDNN/tutorials/sample.json | 4 ++-- ...filing_VerboseMode_JITDump.ipynb => verbose_jitdump.ipynb} | 0 4 files changed, 4 insertions(+), 4 deletions(-) rename Libraries/oneDNN/tutorials/{oneDNN_Analyze_ISA_with_DispatcherControl.ipynb => analyze_isa_with_dispatcher_control.ipynb} (100%) rename Libraries/oneDNN/tutorials/{oneDNN_Profiling_VerboseMode_JITDump.ipynb => verbose_jitdump.ipynb} (100%) diff --git a/Libraries/oneDNN/tutorials/README.md b/Libraries/oneDNN/tutorials/README.md index 31d944e43a..fcdafb80b6 100644 --- a/Libraries/oneDNN/tutorials/README.md +++ b/Libraries/oneDNN/tutorials/README.md @@ -13,8 +13,8 @@ The code samples are licensed under MIT license | --------- | ----------------------- | ------------------------------------------------------------ | | Component | [getting_started](getting_started.ipynb) | The sample also includes a Jupyter notebook with step by step instructions on building code with different compilers and runtime configurations oneDNN support. | | Component | [simple_model](simple_model.ipynb)| A Jupyter notebook with step by step instructions on running oneDNN-based application on a GPU. | -| Component | [oneDNN_Profiling_VerboseMode_JITDump](oneDNN_Profiling_VerboseMode_JITDump.ipynb) | This Jupyter Notebook demonstrates how to use Verbose Mode and JIT Dump to profile oneDNN samples. | -| Component | [oneDNN_Analyze_ISA_with_DispatcherControl](oneDNN_Analyze_ISA_with_DispatcherControl.ipynb) | This Jupyter Notebook demonstrates how to use CPU Dispatch Control to generate JIT codes among different ISA on CPU and also analyze JIT kernels among ISAs.| +| Component | [verbose_jitdump](verbose_jitdump.ipynb) | This Jupyter Notebook demonstrates how to use Verbose Mode and JIT Dump to profile oneDNN samples. | +| Component | [analyze_isa_with_dispatcher_control](analyze_isa_with_dispatcher_control.ipynb) | This Jupyter Notebook demonstrates how to use CPU Dispatch Control to generate JIT codes among different ISA on CPU and also analyze JIT kernels among ISAs.| > Notice : Please use Intel oneAPI DevCloud as the environment for jupyter notebook samples. \ Users can refer to [DevCloud Getting Started](https://devcloud.intel.com/oneapi/get-started/) for using DevCloud \ Users can use JupyterLab from DevCloud via "One-click Login in", and download samples via "git clone" or the "oneapi-cli" tool \ diff --git a/Libraries/oneDNN/tutorials/oneDNN_Analyze_ISA_with_DispatcherControl.ipynb b/Libraries/oneDNN/tutorials/analyze_isa_with_dispatcher_control.ipynb similarity index 100% rename from Libraries/oneDNN/tutorials/oneDNN_Analyze_ISA_with_DispatcherControl.ipynb rename to Libraries/oneDNN/tutorials/analyze_isa_with_dispatcher_control.ipynb diff --git a/Libraries/oneDNN/tutorials/sample.json b/Libraries/oneDNN/tutorials/sample.json index 15dbac04e1..b09c50744e 100644 --- a/Libraries/oneDNN/tutorials/sample.json +++ b/Libraries/oneDNN/tutorials/sample.json @@ -29,14 +29,14 @@ "env": ["source /opt/intel/oneapi/setvars.sh --dnnl-configuration=cpu_dpcpp_gpu_dpcpp --force" ], "id": "verbose_jit", "steps": [ - "runipy oneDNN_Profiling_VerboseMode_JITDump.ipynb" + "runipy verbose_jitdump.ipynb" ] }, { "env": ["source /opt/intel/oneapi/setvars.sh --dnnl-configuration=cpu_gomp --force" ], "id": "isa", "steps": [ - "runipy oneDNN_Analyze_ISA_with_DispatcherControl.ipynb" + "runipy analyze_isa_with_dispatcher_control.ipynb" ] } ] diff --git a/Libraries/oneDNN/tutorials/oneDNN_Profiling_VerboseMode_JITDump.ipynb b/Libraries/oneDNN/tutorials/verbose_jitdump.ipynb similarity index 100% rename from Libraries/oneDNN/tutorials/oneDNN_Profiling_VerboseMode_JITDump.ipynb rename to Libraries/oneDNN/tutorials/verbose_jitdump.ipynb From 5998e0a5be79508b51e48a6ecb6667c5b5ce9562 Mon Sep 17 00:00:00 2001 From: ltsai1 Date: Mon, 24 Aug 2020 09:00:16 -0700 Subject: [PATCH 10/11] fix for ci script --- Libraries/oneDNN/simple_model/sample.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Libraries/oneDNN/simple_model/sample.json b/Libraries/oneDNN/simple_model/sample.json index b27e7150fb..3a883d7f20 100644 --- a/Libraries/oneDNN/simple_model/sample.json +++ b/Libraries/oneDNN/simple_model/sample.json @@ -19,7 +19,7 @@ "cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=dpcpp", "make cnn-inference-f32-cpp", "./bin/cnn-inference-f32-cpp cpu", - "SYCL_BE=PI_OPENCL ./out/cnn-inference-f32-cpp gpu" + "SYCL_BE=PI_OPENCL ./bin/cnn-inference-f32-cpp gpu" ] }] From 5a635480b46485693e919c2d1d63af1f3aa5ad77 Mon Sep 17 00:00:00 2001 From: ltsai1 Date: Mon, 24 Aug 2020 09:03:15 -0700 Subject: [PATCH 11/11] rename id for oneccl/onednn GSG --- Libraries/oneCCL/tutorials/sample.json | 2 +- Libraries/oneDNN/tutorials/sample.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Libraries/oneCCL/tutorials/sample.json b/Libraries/oneCCL/tutorials/sample.json index ed2f54183f..23d4b492c6 100644 --- a/Libraries/oneCCL/tutorials/sample.json +++ b/Libraries/oneCCL/tutorials/sample.json @@ -12,7 +12,7 @@ "ciTests": { "linux": [{ "env": ["source /opt/intel/oneapi/setvars.sh --ccl-configuration=cpu_gpu_dpcpp --force" ], - "id": "gsg", + "id": "ccl gsg", "steps": [ "runipy oneCCL_Getting_Started.ipynb" ] diff --git a/Libraries/oneDNN/tutorials/sample.json b/Libraries/oneDNN/tutorials/sample.json index b09c50744e..00ac1bb1da 100644 --- a/Libraries/oneDNN/tutorials/sample.json +++ b/Libraries/oneDNN/tutorials/sample.json @@ -13,7 +13,7 @@ "linux": [ { "env": ["source /opt/intel/oneapi/setvars.sh --dnnl-configuration=cpu_dpcpp_gpu_dpcpp --force" ], - "id": "gsg", + "id": "dnn gsg", "steps": [ "runipy getting_started.ipynb" ]