Skip to content

Commit 240a890

Browse files
authored
Fortran do conc (#1895)
* Submitting Fortran edge detection sample Signed-off-by: u172874 <u172874@s001-n161.aidevcloud> * Submitting Fortran edge detection sample Signed-off-by: hagabb <henry.a.gabb@intel.com> * made requested changes to the README Signed-off-by: hagabb <henry.a.gabb@intel.com> --------- Signed-off-by: u172874 <u172874@s001-n161.aidevcloud> Signed-off-by: hagabb <henry.a.gabb@intel.com>
1 parent 7725f25 commit 240a890

6 files changed

Lines changed: 550 additions & 0 deletions

File tree

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Copyright 2020 Intel Corporation
2+
3+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4+
5+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6+
7+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
##=============================================================
2+
## Copyright © 2020 Intel Corporation
3+
##
4+
## SPDX-License-Identifier: MIT
5+
## =============================================================
6+
##
7+
##**************************************************************
8+
## To compile and run the do concurrent examples: make run_dc
9+
## To compile and run the for-loop examples: make run_omp
10+
## To compile and run all examples: make run_all
11+
##**************************************************************
12+
13+
default: run_all
14+
15+
run_all: run_dc run_omp
16+
17+
run_dc: img_seg_do_conc_cpu_seq img_seg_do_conc_cpu_par img_seg_do_conc_gpu
18+
./img_seg_do_conc_cpu_seq -n 12 -o 2 -i 1 -d
19+
./img_seg_do_conc_cpu_par -n 12 -o 2 -i 1 -d
20+
OMP_TARGET_OFFLOAD=MANDATORY ./img_seg_do_conc_gpu -n 12 -o 2 -i 1 -d
21+
22+
run_omp: img_seg_cpu img_seg_omp_cpu img_seg_omp_gpu
23+
./img_seg_cpu -n 12 -o 2 -i 1 -d
24+
./img_seg_omp_cpu -n 12 -o 2 -i 1 -d
25+
OMP_TARGET_OFFLOAD=MANDATORY ./img_seg_omp_gpu -n 12 -o 2 -i 1 -d
26+
27+
OMP_OPTS = -qopenmp
28+
GPU_OPTS = -fopenmp-targets=spir64 -fopenmp-target-do-concurrent
29+
30+
img_seg_do_conc_cpu_seq: img_seg_do_concurrent.F90
31+
ifx $< -o $@
32+
33+
img_seg_do_conc_cpu_par: img_seg_do_concurrent.F90
34+
ifx $< -o $@ $(OMP_OPTS)
35+
36+
img_seg_do_conc_gpu: img_seg_do_concurrent.F90
37+
ifx $< -o $@ $(OMP_OPTS) $(GPU_OPTS)
38+
39+
img_seg_cpu: img_seg_omp_target.F90
40+
ifx $< -o $@
41+
42+
img_seg_omp_cpu: img_seg_omp_target.F90
43+
ifx $< -o $@ $(OMP_OPTS)
44+
45+
img_seg_omp_gpu: img_seg_omp_target.F90
46+
ifx $< -o $@ $(OMP_OPTS) $(GPU_OPTS)
47+
48+
clean:
49+
-rm -f img_seg_do_conc_cpu_seq img_seg_do_conc_cpu_par img_seg_do_conc_gpu
50+
-rm -f img_seg_cpu img_seg_omp_cpu img_seg_omp_gpu
51+
52+
.PHONY: clean all run_all run_dc run_omp
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# Simple Edge Detection Sample
2+
Segmentation is a common operation in image processing to find the boundaries of objects in an image.
3+
This sample implements a simple edge detection algorithm to find object boundaries in a binary image.
4+
However, this sample is more about offloading Fortran code to a GPU than it is about edge detection.
5+
The algorithm is implemented in two different but functionally equivalent ways. First, it is implemented
6+
using ordinary nested for-loops that are parallelized using OpenMP directives. Second, it is implemented
7+
using a single DO CONCURRENT loop, which is parallelized using the OpenMP backend. In either case, the
8+
Intel&reg; OpenMP runtime library is capable of offloading the edge detection loops to a GPU.
9+
10+
| Optimized for | Description
11+
|:--- |:---
12+
| OS | Linux* Ubuntu* 18.04 or newer
13+
| Hardware | Intel&reg; CPUs and GPUs
14+
| Software | Intel&reg; Fortran Compiler
15+
| What you will learn | How to offload Fortran loops to a GPU
16+
| Time to complete | 15 minutes
17+
18+
## Purpose
19+
This sample demonstrates two Fortran implementations of edge detection:
20+
21+
1. img_seg_omp_target.F90 implements edge detection on binary images using ordinary for-loops and OpenMP target directives
22+
2. img_seg_do_concurrent.F90 implements edge detection on binary images using only a DO CONCURRENT loop
23+
24+
The implementations are functionally equivalent. In both cases, the OpenMP runtime library is used to parallelize the
25+
edge detection loops, regardless of whether they are run on the CPU or offloaded to a GPU.
26+
27+
## Key Implementation Details
28+
[Using Fortran DO CONCURRENT for Accelerator Offload](https://www.intel.com/content/www/us/en/developer/articles/technical/using-fortran-do-current-for-accelerator-offload.html) provides more detailed descriptions of each example code, and discusses the relative merits of each approach.
29+
30+
## Using Visual Studio Code* (Optional)
31+
32+
You can use Visual Studio Code (VS Code) extensions to set your environment, create launch configurations,
33+
and browse and download samples.
34+
35+
The basic steps to build and run a sample using VS Code include:
36+
- Download a sample using the extension **Code Sample Browser for Intel oneAPI Toolkits**.
37+
- Configure the oneAPI environment with the extension **Environment Configurator for Intel oneAPI Toolkits**.
38+
- Open a Terminal in VS Code (**Terminal>New Terminal**).
39+
- Run the sample in the VS Code terminal using the instructions below.
40+
- (Linux only) Debug your GPU application with GDB for Intel® oneAPI toolkits using the **Generate Launch Configurations** extension.
41+
42+
To learn more about the extensions, see
43+
[Using Visual Studio Code with Intel® oneAPI Toolkits](https://www.intel.com/content/www/us/en/develop/documentation/using-vs-code-with-intel-oneapi/top.html).
44+
45+
After learning how to use the extensions for Intel oneAPI Toolkits, return to this readme for instructions on how to build and run a sample.
46+
47+
## Building and Running this sample
48+
49+
> **Note**: If you have not already done so, set up your CLI
50+
> environment by sourcing the `setvars` script located in
51+
> the root of your oneAPI installation.
52+
>
53+
> Linux Sudo: . /opt/intel/oneapi/setvars.sh
54+
>
55+
> Linux User: . ~/intel/oneapi/setvars.sh
56+
>
57+
>For more information on environment variables, see Use the setvars Script for [Linux or macOS](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/oneapi-development-environment-setup/use-the-setvars-script-with-linux-or-macos.html).
58+
59+
### On a Linux System
60+
Run `make` to build and run the sample. Six programs are generated:
61+
62+
1. img_seg_cpu runs the for-loop implementation sequentially on the CPU
63+
2. img_seg_omp_cpu runs the for-loops in parallel on the CPU using OpenMP directives
64+
3. img_seg_omp_gpu offloads the for-loop in parallel on the GPU using OpenMP target directives
65+
4. img_seg_do_conc_cpu_seq runs the DO CONCURRENT implementation sequentially on the CPU
66+
5. img_seg_do_conc_cpu_par runs the DO CONCURRENT loop in parallel on the CPU
67+
6. img_seg_do_conc_gpu offloads the DO CONCURRENT loop to the GPU using the OpenMP backend
68+
69+
You can remove all generated files with `make clean`.
70+
71+
### Example of Output
72+
If everything is working correctly, each example program will perform edge detection on a small, randomly-generated binary
73+
image. It will display the original image followed by the outline of the objects in the image, e.g.:
74+
```
75+
OMP_TARGET_OFFLOAD=MANDATORY ./img_seg_omp_gpu -n 12 -o 2 -i 1 -d
76+
Grid dimensions: 12
77+
Number of images to process: 1
78+
Number of objects in each image: 2
79+
80+
Binary image:
81+
0 0 0 0 0 0 0 0 0 0 0 0
82+
0 0 0 0 0 0 0 0 0 0 0 0
83+
0 0 0 0 0 0 0 0 0 0 0 0
84+
0 1 1 1 1 1 0 0 0 0 0 0
85+
0 1 1 1 1 1 0 0 0 0 0 0
86+
0 1 1 1 1 1 0 0 0 0 0 0
87+
0 1 1 1 1 1 0 0 0 0 0 0
88+
0 1 1 1 1 1 0 0 0 0 0 0
89+
0 0 0 0 0 0 1 1 1 0 0 0
90+
0 0 0 0 0 0 1 1 1 0 0 0
91+
0 0 0 0 0 0 1 1 1 0 0 0
92+
0 0 0 0 0 0 0 0 0 0 0 0
93+
94+
Edge mask:
95+
- - - - - - - - - - - -
96+
- - - - - - - - - - - -
97+
- - - - - - - - - - - -
98+
- T T T T T - - - - - -
99+
- T - - - T - - - - - -
100+
- T - - - T - - - - - -
101+
- T - - - T - - - - - -
102+
- T T T T T - - - - - -
103+
- - - - - - T T T - - -
104+
- - - - - - T - T - - -
105+
- - - - - - T T T - - -
106+
- - - - - - - - - - - -
107+
Image 1 took 9.010000000000000E-004 seconds
108+
Total time (not including first iteration): 0.000000000000000E+000 seconds
109+
```
110+
111+
### Troubleshooting
112+
If an error occurs, troubleshoot the problem using the Diagnostics Utility for Intel® oneAPI Toolkits.
113+
[Learn more](https://www.intel.com/content/www/us/en/develop/documentation/diagnostic-utility-user-guide/top.html)
114+
115+
## License
116+
Code samples are licensed under the MIT license. See [License.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/License.txt) for details.
117+
118+
Third party program Licenses can be found here: [third-party-programs.txt](https://github.com/oneapi-src/oneAPI-samples/blob/master/third-party-programs.txt)
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
!===============================================================================
2+
!
3+
! Content:
4+
! Implement edge detection on simple binary images using a standard Fortran
5+
! DO CONCURRENT loop. The compiler will offload the loop to a GPU using the
6+
! OpenMP runtime.
7+
!
8+
! Compile for CPU (sequential):
9+
! ifx img_seg_do_concurrent.F90 -o img_seg_do_conc_cpu_seq
10+
!
11+
! Compile for CPU (parallel):
12+
! ifx img_seg_do_concurrent.F90 -o img_seg_do_conc_cpu_par -qopenmp
13+
!
14+
! Compile for GPU using the OpenMP backend:
15+
! ifx img_seg_do_concurrent.F90 -o img_seg_do_conc_gpu -qopenmp \
16+
! -fopenmp-targets=spir64 -fopenmp-target-do-concurrent
17+
!
18+
!===============================================================================
19+
program img_seg_do_conc_example
20+
implicit none
21+
22+
integer :: n = 8, objects = 3, images = 1
23+
logical :: display = .false.
24+
integer :: i, j, img_i, allocstat, stat
25+
26+
integer, allocatable :: image(:,:)
27+
logical, allocatable :: edge_mask(:,:)
28+
29+
character (len = 132) :: allocmsg
30+
character (len = 32) :: arg1, arg2
31+
32+
integer (kind=8) :: start_time, end_time, clock_precision
33+
real (kind=8) :: cycle_time, total_time = 0.0d0
34+
35+
call process_command_line()
36+
call system_clock(count_rate = clock_precision)
37+
38+
! Allocate image and edge mask
39+
allocate (image(n, n), source = 0, stat = allocstat, errmsg = allocmsg)
40+
if (allocstat > 0) stop trim(allocmsg)
41+
42+
allocate (edge_mask(n, n), source = .false., stat = allocstat, errmsg = allocmsg)
43+
if (allocstat > 0) stop trim(allocmsg)
44+
45+
! Process images
46+
do img_i = 1, images
47+
call initialize_image()
48+
if (display) call display_image()
49+
50+
call system_clock(start_time) ! Start timer
51+
52+
! Outline the objects in the binary image
53+
do concurrent (j = 1:n, i = 1:n, image(i, j) /= 0)
54+
if (i == 1 .or. i == n .or. &
55+
j == 1 .or. j == n) then
56+
edge_mask(i, j) = .true.
57+
else
58+
if (any(image(i-1:i+1, j-1:j+1) == 0)) edge_mask(i, j) = .true.
59+
endif
60+
enddo
61+
62+
call system_clock(end_time) ! Stop timer
63+
cycle_time = dble(end_time - start_time) / dble(clock_precision)
64+
65+
if (display) call display_edge_mask()
66+
67+
print *, 'Image', img_i, 'took', cycle_time, 'seconds'
68+
if (img_i /= 1) total_time = total_time + cycle_time
69+
70+
edge_mask = .false. ! Reset edge mask
71+
enddo
72+
print *, 'Total time (not including first iteration):', total_time, 'seconds'
73+
74+
deallocate(image, edge_mask)
75+
76+
contains
77+
subroutine initialize_image()
78+
integer x, x_min, x_max, y, y_min, y_max, d
79+
real :: rn(3)
80+
81+
image = 0
82+
83+
! Create random regions of interest in the image
84+
call random_seed()
85+
do i = 1, objects
86+
call random_number(rn)
87+
d = 1 + floor(2 * rn(1))
88+
89+
x_min = d + 1
90+
x_max = n - d
91+
x = x_min + (x_max - x_min) * rn(2)
92+
93+
y_min = d + 1
94+
y_max = n - d
95+
y = y_min + (y_max - y_min) * rn(3)
96+
97+
image(x-d:x+d, y-d:y+d) = 1
98+
enddo
99+
end subroutine initialize_image
100+
101+
subroutine display_image()
102+
print *
103+
print *, 'Binary image:'
104+
do j = 1, n
105+
do i = 1, n
106+
write(6, advance='no', fmt="(i3)") image(i, j)
107+
enddo
108+
print *
109+
enddo
110+
end subroutine display_image
111+
112+
subroutine display_edge_mask()
113+
print *
114+
print *, 'Edge mask:'
115+
do j = 1, n
116+
do i = 1, n
117+
if (edge_mask(i, j)) then
118+
write(6, advance='no', fmt="(l3)") edge_mask(i, j)
119+
else
120+
write(6, advance='no', fmt="(a3)") '-'
121+
endif
122+
enddo
123+
print *
124+
enddo
125+
end subroutine display_edge_mask
126+
127+
subroutine process_command_line()
128+
j = 1
129+
do while (j <= command_argument_count())
130+
call get_command_argument(j, arg1)
131+
select case (arg1)
132+
case ('-n')
133+
call get_command_argument(j+1, arg2)
134+
read(arg2, *, iostat=stat) n
135+
j = j + 2
136+
case ('-o')
137+
call get_command_argument(j+1, arg2)
138+
read(arg2, *, iostat=stat) objects
139+
j = j + 2
140+
case ('-i')
141+
call get_command_argument(j+1, arg2)
142+
read(arg2, *, iostat=stat) images
143+
j = j + 2
144+
case ('-d')
145+
display = .true.
146+
j = j + 1
147+
case ('-h')
148+
call print_help()
149+
stop
150+
case default
151+
print *, 'Unrecognized command-line option: ', arg1
152+
call print_help()
153+
stop
154+
end select
155+
enddo
156+
print *, 'Grid dimensions:', n
157+
print *, 'Number of images to process:', images
158+
print *, 'Number of objects in each image:', objects
159+
end subroutine process_command_line
160+
161+
subroutine print_help()
162+
print '(a,/)', 'Command-line options:'
163+
print '(a)', ' -n # image dimensions (integer)'
164+
print '(a)', ' -o # number of objects in image (integer), objects may overlap'
165+
print '(a)', ' -i # number of images to process (integer)'
166+
print '(a)', ' -d display image and object edge mask'
167+
end subroutine print_help
168+
end program img_seg_do_conc_example

0 commit comments

Comments
 (0)