Skip to content

Commit f62aae4

Browse files
committed
JavaCL: added -cl-nv-verbose, -cl-nv-maxrregcount, -cl-nv-opt-level + proper log even without error when nv-verbose is set
1 parent e091781 commit f62aae4

1 file changed

Lines changed: 59 additions & 17 deletions

File tree

Core/src/main/java/com/nativelibs4java/opencl/CLProgram.java

Lines changed: 59 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
import java.util.ArrayList;
5858
import java.util.HashMap;
5959
import java.util.HashSet;
60+
import java.util.Set;
6061
import java.util.Collections;
6162
import java.util.LinkedHashMap;
6263
import java.util.List;
@@ -555,6 +556,39 @@ public void setFiniteMathOnly() {
555556
public void setUnsafeMathOptimizations() {
556557
addBuildOption("-cl-unsafe-math-optimizations");
557558
}
559+
/**
560+
* Add the <a href="http://www.cs.cmu.edu/afs/cs/academic/class/15668-s11/www/cuda-doc/OpenCL_Extensions/cl_nv_compiler_options.txt">-cl-nv-verbose</a> compilation option (<b><i>NVIDIA GPUs only</i></b>)<br>
561+
* Enable verbose mode. Output will be reported in JavaCL's log at the INFO level
562+
*/
563+
public void setNVVerbose() {
564+
addBuildOption("-cl-nv-verbose");
565+
}
566+
/**
567+
* Add the <a href="http://www.cs.cmu.edu/afs/cs/academic/class/15668-s11/www/cuda-doc/OpenCL_Extensions/cl_nv_compiler_options.txt">-cl-nv-maxrregcount=N</a> compilation option (<b><i>NVIDIA GPUs only</i></b>)<br>
568+
* Specify the maximum number of registers that GPU functions can use.
569+
* Until a function-specific limit, a higher value will generally increase
570+
* the performance of individual GPU threads that execute this function.
571+
* However, because thread registers are allocated from a global register
572+
* pool on each GPU, a higher value of this option will also reduce the
573+
* maximum thread block size, thereby reducing the amount of thread
574+
* parallelism. Hence, a good maxrregcount value is the result of a
575+
* trade-off.
576+
* If this option is not specified, then no maximum is assumed. Otherwise
577+
* the specified value will be rounded to the next multiple of 4 registers
578+
* until the GPU specific maximum of 128 registers.
579+
* @param N positive integer
580+
*/
581+
public void setNVMaximumRegistryCount(int N) {
582+
addBuildOption("-cl-nv-maxrregcount=" + N);
583+
}
584+
/**
585+
* Add the <a href="http://www.cs.cmu.edu/afs/cs/academic/class/15668-s11/www/cuda-doc/OpenCL_Extensions/cl_nv_compiler_options.txt">-cl-nv-opt-level</a> compilation option (<b><i>NVIDIA GPUs only</i></b>)<br>
586+
* Specify optimization level (default value: 3)
587+
* @param N positive integer, or 0 (no optimization).
588+
*/
589+
public void setNVOptimizationLevel(int N) {
590+
addBuildOption("-cl-nv-opt-level=" + N);
591+
}
558592

559593
/**
560594
* Please see <a href="http://www.khronos.org/registry/cl/sdk/1.0/docs/man/xhtml/clBuildProgram.html">OpenCL's clBuildProgram documentation</a> for details on supported build options.
@@ -666,6 +700,25 @@ protected String computeCacheSignature() throws IOException {
666700
"os.name"
667701
};
668702

703+
protected Set<String> getProgramBuildInfo(cl_program pgm, Pointer<cl_device_id> deviceIds) {
704+
Pointer<SizeT> len = allocateSizeT();
705+
int bufLen = 2048 * 32; //TODO find proper size
706+
Pointer<?> buffer = allocateBytes(bufLen);
707+
708+
Set<String> errs = new HashSet<String>();
709+
if (deviceIds == null) {
710+
error(CL.clGetProgramBuildInfo(pgm, null, CL_PROGRAM_BUILD_LOG, bufLen, buffer, len));
711+
String s = buffer.getCString();
712+
errs.add(s);
713+
} else {
714+
for (cl_device_id device : deviceIds) {
715+
error(CL.clGetProgramBuildInfo(pgm, device, CL_PROGRAM_BUILD_LOG, bufLen, buffer, len));
716+
String s = buffer.getCString();
717+
errs.add(s);
718+
}
719+
}
720+
return errs;
721+
}
669722
boolean built;
670723
/**
671724
* Returns the context of this program
@@ -719,24 +772,13 @@ public synchronized CLProgram build() throws CLBuildException {
719772
}
720773
int err = CL.clBuildProgram(getEntity(), nDevices, deviceIds, pointerToCString(getOptionsString()), null, null);
721774
//int err = CL.clBuildProgram(getEntity(), 0, null, getOptionsString(), null, null);
775+
Set<String> errors = getProgramBuildInfo(getEntity(), deviceIds);
776+
722777
if (err != CL_SUCCESS) {//BUILD_PROGRAM_FAILURE) {
723-
Pointer<SizeT> len = allocateSizeT();
724-
int bufLen = 2048 * 32; //TODO find proper size
725-
Pointer<?> buffer = allocateBytes(bufLen);
726-
727-
HashSet<String> errs = new HashSet<String>();
728-
if (deviceIds == null) {
729-
error(CL.clGetProgramBuildInfo(getEntity(), null, CL_PROGRAM_BUILD_LOG, bufLen, buffer, len));
730-
String s = buffer.getCString();
731-
errs.add(s);
732-
} else
733-
for (cl_device_id device : deviceIds) {
734-
error(CL.clGetProgramBuildInfo(getEntity(), device, CL_PROGRAM_BUILD_LOG, bufLen, buffer, len));
735-
String s = buffer.getCString();
736-
errs.add(s);
737-
}
738-
739-
throw new CLBuildException(this, "Compilation failure : " + errorString(err), errs);
778+
throw new CLBuildException(this, "Compilation failure : " + errorString(err), errors);
779+
} else {
780+
if (!errors.isEmpty())
781+
JavaCL.log(Level.INFO, "Build info :\n\t" + StringUtils.implode(errors, "\n\t"));
740782
}
741783
built = true;
742784
if (deleteTempFiles != null)

0 commit comments

Comments
 (0)