|
57 | 57 | import java.util.ArrayList; |
58 | 58 | import java.util.HashMap; |
59 | 59 | import java.util.HashSet; |
| 60 | +import java.util.Set; |
60 | 61 | import java.util.Collections; |
61 | 62 | import java.util.LinkedHashMap; |
62 | 63 | import java.util.List; |
@@ -555,6 +556,39 @@ public void setFiniteMathOnly() { |
555 | 556 | public void setUnsafeMathOptimizations() { |
556 | 557 | addBuildOption("-cl-unsafe-math-optimizations"); |
557 | 558 | } |
| 559 | + /** |
| 560 | + * Add the <a href="http://www.cs.cmu.edu/afs/cs/academic/class/15668-s11/www/cuda-doc/OpenCL_Extensions/cl_nv_compiler_options.txt">-cl-nv-verbose</a> compilation option (<b><i>NVIDIA GPUs only</i></b>)<br> |
| 561 | + * Enable verbose mode. Output will be reported in JavaCL's log at the INFO level |
| 562 | + */ |
| 563 | + public void setNVVerbose() { |
| 564 | + addBuildOption("-cl-nv-verbose"); |
| 565 | + } |
| 566 | + /** |
| 567 | + * Add the <a href="http://www.cs.cmu.edu/afs/cs/academic/class/15668-s11/www/cuda-doc/OpenCL_Extensions/cl_nv_compiler_options.txt">-cl-nv-maxrregcount=N</a> compilation option (<b><i>NVIDIA GPUs only</i></b>)<br> |
| 568 | + * Specify the maximum number of registers that GPU functions can use. |
| 569 | + * Until a function-specific limit, a higher value will generally increase |
| 570 | + * the performance of individual GPU threads that execute this function. |
| 571 | + * However, because thread registers are allocated from a global register |
| 572 | + * pool on each GPU, a higher value of this option will also reduce the |
| 573 | + * maximum thread block size, thereby reducing the amount of thread |
| 574 | + * parallelism. Hence, a good maxrregcount value is the result of a |
| 575 | + * trade-off. |
| 576 | + * If this option is not specified, then no maximum is assumed. Otherwise |
| 577 | + * the specified value will be rounded to the next multiple of 4 registers |
| 578 | + * until the GPU specific maximum of 128 registers. |
| 579 | + * @param N positive integer |
| 580 | + */ |
| 581 | + public void setNVMaximumRegistryCount(int N) { |
| 582 | + addBuildOption("-cl-nv-maxrregcount=" + N); |
| 583 | + } |
| 584 | + /** |
| 585 | + * Add the <a href="http://www.cs.cmu.edu/afs/cs/academic/class/15668-s11/www/cuda-doc/OpenCL_Extensions/cl_nv_compiler_options.txt">-cl-nv-opt-level</a> compilation option (<b><i>NVIDIA GPUs only</i></b>)<br> |
| 586 | + * Specify optimization level (default value: 3) |
| 587 | + * @param N positive integer, or 0 (no optimization). |
| 588 | + */ |
| 589 | + public void setNVOptimizationLevel(int N) { |
| 590 | + addBuildOption("-cl-nv-opt-level=" + N); |
| 591 | + } |
558 | 592 |
|
559 | 593 | /** |
560 | 594 | * Please see <a href="http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/clBuildProgram.html">OpenCL's clBuildProgram documentation</a> for details on supported build options. |
@@ -666,6 +700,25 @@ protected String computeCacheSignature() throws IOException { |
666 | 700 | "os.name" |
667 | 701 | }; |
668 | 702 |
|
| 703 | + protected Set<String> getProgramBuildInfo(cl_program pgm, Pointer<cl_device_id> deviceIds) { |
| 704 | + Pointer<SizeT> len = allocateSizeT(); |
| 705 | + int bufLen = 2048 * 32; //TODO find proper size |
| 706 | + Pointer<?> buffer = allocateBytes(bufLen); |
| 707 | + |
| 708 | + Set<String> errs = new HashSet<String>(); |
| 709 | + if (deviceIds == null) { |
| 710 | + error(CL.clGetProgramBuildInfo(pgm, null, CL_PROGRAM_BUILD_LOG, bufLen, buffer, len)); |
| 711 | + String s = buffer.getCString(); |
| 712 | + errs.add(s); |
| 713 | + } else { |
| 714 | + for (cl_device_id device : deviceIds) { |
| 715 | + error(CL.clGetProgramBuildInfo(pgm, device, CL_PROGRAM_BUILD_LOG, bufLen, buffer, len)); |
| 716 | + String s = buffer.getCString(); |
| 717 | + errs.add(s); |
| 718 | + } |
| 719 | + } |
| 720 | + return errs; |
| 721 | + } |
669 | 722 | boolean built; |
670 | 723 | /** |
671 | 724 | * Returns the context of this program |
@@ -719,24 +772,13 @@ public synchronized CLProgram build() throws CLBuildException { |
719 | 772 | } |
720 | 773 | int err = CL.clBuildProgram(getEntity(), nDevices, deviceIds, pointerToCString(getOptionsString()), null, null); |
721 | 774 | //int err = CL.clBuildProgram(getEntity(), 0, null, getOptionsString(), null, null); |
| 775 | + Set<String> errors = getProgramBuildInfo(getEntity(), deviceIds); |
| 776 | + |
722 | 777 | if (err != CL_SUCCESS) {//BUILD_PROGRAM_FAILURE) { |
723 | | - Pointer<SizeT> len = allocateSizeT(); |
724 | | - int bufLen = 2048 * 32; //TODO find proper size |
725 | | - Pointer<?> buffer = allocateBytes(bufLen); |
726 | | - |
727 | | - HashSet<String> errs = new HashSet<String>(); |
728 | | - if (deviceIds == null) { |
729 | | - error(CL.clGetProgramBuildInfo(getEntity(), null, CL_PROGRAM_BUILD_LOG, bufLen, buffer, len)); |
730 | | - String s = buffer.getCString(); |
731 | | - errs.add(s); |
732 | | - } else |
733 | | - for (cl_device_id device : deviceIds) { |
734 | | - error(CL.clGetProgramBuildInfo(getEntity(), device, CL_PROGRAM_BUILD_LOG, bufLen, buffer, len)); |
735 | | - String s = buffer.getCString(); |
736 | | - errs.add(s); |
737 | | - } |
738 | | - |
739 | | - throw new CLBuildException(this, "Compilation failure : " + errorString(err), errs); |
| 778 | + throw new CLBuildException(this, "Compilation failure : " + errorString(err), errors); |
| 779 | + } else { |
| 780 | + if (!errors.isEmpty()) |
| 781 | + JavaCL.log(Level.INFO, "Build info :\n\t" + StringUtils.implode(errors, "\n\t")); |
740 | 782 | } |
741 | 783 | built = true; |
742 | 784 | if (deleteTempFiles != null) |
|
0 commit comments