@@ -256,6 +256,25 @@ static GLuint compileShader(QOpenGLFunctions_4_5_Core* gl, GLenum type, const ch
256256// counter at binding 1; shared survivor-index output at binding 2. Each
257257// survivor is written as (u_model_tag | local_instance_index) so the CPU can
258258// unpack model + local index from one uint.
259+ static const char * HIZ_DOWNSAMPLE_VS = R"(
260+ #version 450 core
261+ void main() {
262+ vec2 pos = vec2((gl_VertexID & 1) * 4.0 - 1.0,
263+ (gl_VertexID & 2) * 2.0 - 1.0);
264+ gl_Position = vec4(pos, 0.0, 1.0);
265+ }
266+ )" ;
267+
268+ static const char * HIZ_DOWNSAMPLE_FS = R"(
269+ #version 450 core
270+ uniform sampler2D u_depth;
271+ uniform vec2 u_inv_dest_size;
272+ void main() {
273+ vec2 uv = gl_FragCoord.xy * u_inv_dest_size;
274+ gl_FragDepth = texture(u_depth, uv).r;
275+ }
276+ )" ;
277+
259278static const char * CULL_COMPUTE_SHADER = R"(
260279#version 450 core
261280layout(local_size_x = 64) in;
@@ -502,6 +521,8 @@ ViewportWindow::~ViewportWindow() {
502521 if (hiz_depth_tex_) gl_->glDeleteTextures (1 , &hiz_depth_tex_);
503522 if (hiz_resolve_fbo_) gl_->glDeleteFramebuffers (1 , &hiz_resolve_fbo_);
504523 if (hiz_resolve_depth_tex_) gl_->glDeleteTextures (1 , &hiz_resolve_depth_tex_);
524+ if (hiz_downsample_program_) gl_->glDeleteProgram (hiz_downsample_program_);
525+ if (hiz_downsample_vao_) gl_->glDeleteVertexArrays (1 , &hiz_downsample_vao_);
505526 }
506527 context_->doneCurrent ();
507528 }
@@ -584,6 +605,12 @@ void ViewportWindow::buildShaders() {
584605 GLuint fs = compileShader (gl_, GL_FRAGMENT_SHADER, AXIS_FRAGMENT_SHADER);
585606 axis_program_ = linkProgram (gl_, vs, fs);
586607 }
608+ {
609+ GLuint vs = compileShader (gl_, GL_VERTEX_SHADER, HIZ_DOWNSAMPLE_VS);
610+ GLuint fs = compileShader (gl_, GL_FRAGMENT_SHADER, HIZ_DOWNSAMPLE_FS);
611+ hiz_downsample_program_ = linkProgram (gl_, vs, fs);
612+ gl_->glCreateVertexArrays (1 , &hiz_downsample_vao_);
613+ }
587614 cull_program_ = linkComputeProgram (gl_, CULL_COMPUTE_SHADER);
588615 gl_->glCreateBuffers (1 , &gpu_cull_counter_ssbo_);
589616 gl_->glNamedBufferStorage (gpu_cull_counter_ssbo_, sizeof (uint32_t ), nullptr ,
@@ -1209,22 +1236,16 @@ void ViewportWindow::buildHizPyramid() {
12091236 const int base_w = hizBaseWidth ();
12101237 const int base_h = std::max (1 , (base_w * win_h) / win_w);
12111238
1212- // Depth format must match the default FBO's depth format for the blit
1213- // to succeed — GL spec requires identical internal formats for depth
1214- // blits. Qt's default surface uses 24-bit depth (setDepthBufferSize(24)
1215- // in initGL), so we match with DEPTH_COMPONENT24 on both textures.
1216- //
1217- // Resolve target (full window size, single sample). Needed because
1218- // GL also forbids scale-blitting from an MSAA source: resolve at 1:1
1219- // first, then down-blit.
1239+ // Resolve target (full window size, single sample, D24S8 to match Qt's
1240+ // default FBO which uses depth+stencil even when only depth is requested).
12201241 if (win_w != hiz_resolve_w_ || win_h != hiz_resolve_h_) {
12211242 if (hiz_resolve_fbo_) gl_->glDeleteFramebuffers (1 , &hiz_resolve_fbo_);
12221243 if (hiz_resolve_depth_tex_) gl_->glDeleteTextures (1 , &hiz_resolve_depth_tex_);
12231244 gl_->glCreateTextures (GL_TEXTURE_2D, 1 , &hiz_resolve_depth_tex_);
12241245 gl_->glTextureStorage2D (hiz_resolve_depth_tex_, 1 ,
1225- GL_DEPTH_COMPONENT24 , win_w, win_h);
1246+ GL_DEPTH24_STENCIL8 , win_w, win_h);
12261247 gl_->glCreateFramebuffers (1 , &hiz_resolve_fbo_);
1227- gl_->glNamedFramebufferTexture (hiz_resolve_fbo_, GL_DEPTH_ATTACHMENT ,
1248+ gl_->glNamedFramebufferTexture (hiz_resolve_fbo_, GL_DEPTH_STENCIL_ATTACHMENT ,
12281249 hiz_resolve_depth_tex_, 0 );
12291250 hiz_resolve_w_ = win_w;
12301251 hiz_resolve_h_ = win_h;
@@ -1239,6 +1260,8 @@ void ViewportWindow::buildHizPyramid() {
12391260 gl_->glCreateFramebuffers (1 , &hiz_fbo_);
12401261 gl_->glNamedFramebufferTexture (hiz_fbo_, GL_DEPTH_ATTACHMENT,
12411262 hiz_depth_tex_, 0 );
1263+ gl_->glNamedFramebufferDrawBuffer (hiz_fbo_, GL_NONE);
1264+ gl_->glNamedFramebufferReadBuffer (hiz_fbo_, GL_NONE);
12421265
12431266 hiz_base_w_ = base_w;
12441267 hiz_base_h_ = base_h;
@@ -1262,34 +1285,39 @@ void ViewportWindow::buildHizPyramid() {
12621285 hiz_pyramid_.assign (off, 1 .0f );
12631286 }
12641287
1265- // Two-step: MSAA default-fb → full-size SS resolve, then SS → down-scaled.
1266- // GL forbids scaling a blit whose source is multisampled, and also
1267- // requires matching depth internal formats — hence this dance.
1288+ // Step 1: MSAA default-fb → full-size single-sample resolve (same-size).
1289+ while (gl_->glGetError () != GL_NO_ERROR) {}
12681290 gl_->glBindFramebuffer (GL_READ_FRAMEBUFFER, 0 );
12691291 gl_->glBindFramebuffer (GL_DRAW_FRAMEBUFFER, hiz_resolve_fbo_);
12701292 gl_->glBlitFramebuffer (0 , 0 , win_w, win_h,
12711293 0 , 0 , win_w, win_h,
1272- GL_DEPTH_BUFFER_BIT, GL_NEAREST);
1294+ GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT , GL_NEAREST);
12731295
1274- gl_->glBindFramebuffer (GL_READ_FRAMEBUFFER, hiz_resolve_fbo_);
1275- gl_->glBindFramebuffer (GL_DRAW_FRAMEBUFFER, hiz_fbo_);
1276- gl_->glBlitFramebuffer (0 , 0 , win_w, win_h,
1277- 0 , 0 , hiz_base_w_, hiz_base_h_,
1278- GL_DEPTH_BUFFER_BIT, GL_NEAREST);
1279- gl_->glBindFramebuffer (GL_READ_FRAMEBUFFER, 0 );
1280- gl_->glBindFramebuffer (GL_DRAW_FRAMEBUFFER, 0 );
1281-
1282- // One-shot diagnostic so blit failures aren't silent. We only warn
1283- // the first handful of times — GL errors can pile up and spam.
1284- static int err_warn_budget = 3 ;
1285- if (err_warn_budget > 0 ) {
1286- GLenum e = gl_->glGetError ();
1287- if (e != GL_NO_ERROR) {
1288- qWarning (" HiZ blit/readback GL error 0x%04x (win %dx%d → %dx%d → %dx%d)" ,
1289- e, win_w, win_h, win_w, win_h, hiz_base_w_, hiz_base_h_);
1290- --err_warn_budget;
1291- }
1292- }
1296+ // Step 2: downsample resolved depth to HiZ base via fullscreen-triangle.
1297+ // glBlitFramebuffer with depth + scaling produces GL_INVALID_VALUE on
1298+ // some drivers, so we sample the resolve texture and write gl_FragDepth.
1299+ gl_->glBindFramebuffer (GL_FRAMEBUFFER, hiz_fbo_);
1300+ gl_->glViewport (0 , 0 , hiz_base_w_, hiz_base_h_);
1301+ gl_->glEnable (GL_DEPTH_TEST);
1302+ gl_->glDepthFunc (GL_ALWAYS);
1303+ gl_->glDepthMask (GL_TRUE);
1304+ gl_->glColorMask (GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
1305+ gl_->glUseProgram (hiz_downsample_program_);
1306+ gl_->glTextureParameteri (hiz_resolve_depth_tex_, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1307+ gl_->glTextureParameteri (hiz_resolve_depth_tex_, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1308+ gl_->glTextureParameteri (hiz_resolve_depth_tex_, GL_TEXTURE_COMPARE_MODE, GL_NONE);
1309+ gl_->glBindTextureUnit (0 , hiz_resolve_depth_tex_);
1310+ gl_->glUniform1i (gl_->glGetUniformLocation (hiz_downsample_program_, " u_depth" ), 0 );
1311+ gl_->glUniform2f (gl_->glGetUniformLocation (hiz_downsample_program_, " u_inv_dest_size" ),
1312+ 1 .0f / static_cast <float >(hiz_base_w_),
1313+ 1 .0f / static_cast <float >(hiz_base_h_));
1314+ gl_->glBindVertexArray (hiz_downsample_vao_);
1315+ gl_->glDrawArrays (GL_TRIANGLES, 0 , 3 );
1316+ gl_->glBindVertexArray (0 );
1317+ gl_->glColorMask (GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1318+ gl_->glDepthFunc (GL_LESS);
1319+ gl_->glBindFramebuffer (GL_FRAMEBUFFER, 0 );
1320+ gl_->glViewport (0 , 0 , win_w, win_h);
12931321
12941322 // Synchronous readback into level 0 of the pyramid. At 256x128 this
12951323 // is ~128 KB and the driver copy is fast enough not to matter in
@@ -1374,16 +1402,10 @@ bool ViewportWindow::aabbOccludedByHiz(const float mn[3], const float mx[3]) con
13741402 const float v_max = 0 .5f * (sy_max + 1 .0f );
13751403 const float aabb_near_depth = 0 .5f * (sz_min + 1 .0f );
13761404
1377- // Pick mip level where the projected rect covers at most 2 texels on
1378- // each axis; sample the max over the covered texels there.
1379- const float px_w = (u_max - u_min) * static_cast <float >(hiz_base_w_);
1380- const float px_h = (v_max - v_min) * static_cast <float >(hiz_base_h_);
1381- int mip = 0 ;
1382- while ((int )hiz_mip_offset_.size () - 1 > mip &&
1383- ((px_w / (1 << mip)) > 2 .0f || (px_h / (1 << mip)) > 2 .0f )) {
1384- ++mip;
1385- }
1386-
1405+ // Sample at a fine mip and reject ONLY if every texel agrees the AABB
1406+ // is behind it. One non-occluding texel → visible, early-out. Cap
1407+ // iteration to avoid slow queries on large projected rects.
1408+ const int mip = std::min (1 , (int )hiz_mip_offset_.size () - 1 );
13871409 const uint32_t mw = hiz_mip_w_[mip];
13881410 const uint32_t mh = hiz_mip_h_[mip];
13891411 int x0 = static_cast <int >(std::floor (u_min * mw));
@@ -1396,18 +1418,17 @@ bool ViewportWindow::aabbOccludedByHiz(const float mn[3], const float mx[3]) con
13961418 if (y1 > (int )mh) y1 = mh;
13971419 if (x1 <= x0 || y1 <= y0) return false ;
13981420
1421+ static constexpr int MAX_HIZ_SAMPLES = 64 ;
1422+ if ((x1 - x0) * (y1 - y0) > MAX_HIZ_SAMPLES) return false ;
1423+
13991424 const float * level = hiz_pyramid_.data () + hiz_mip_offset_[mip];
1400- float hiz_max = 0 .0f ;
14011425 for (int y = y0; y < y1; ++y) {
14021426 const float * row = level + static_cast <size_t >(y) * mw;
14031427 for (int x = x0; x < x1; ++x) {
1404- if (row[x] > hiz_max) hiz_max = row[x];
1428+ if (aabb_near_depth < = row[x]) return false ;
14051429 }
14061430 }
1407-
1408- // AABB's closest point must be strictly farther than everything drawn
1409- // in the region for it to be fully occluded.
1410- return aabb_near_depth > hiz_max;
1431+ return true ;
14111432}
14121433
14131434uint32_t ViewportWindow::pickObjectAt (int x, int y) {
@@ -1559,7 +1580,12 @@ void ViewportWindow::cullModelCpu(ModelGpuData& m, const float planes[6][4],
15591580 // the buffer — a self-reinforcing feedback loop). On static views HiZ
15601581 // kicks in after a single frame of lag.
15611582 const QMatrix4x4 current_vp = proj_matrix_ * view_matrix_;
1562- const bool hiz_vp_matches = hiz_vp_valid_ && hiz_vp_ == current_vp;
1583+ static const bool hiz_force_motion = []{
1584+ const char * e = std::getenv (" IFC_HIZ_MOTION" );
1585+ return e && *e && std::atoi (e) != 0 ;
1586+ }();
1587+ const bool hiz_vp_matches = hiz_vp_valid_
1588+ && (hiz_force_motion || hiz_vp_ == current_vp);
15631589 const bool hiz_on = hizEnabled () && min_pixel_radius > 0 .0f && hiz_vp_matches;
15641590
15651591 // Hot path: read the AABB from the compact bvh_items array (28 B stride)
@@ -1732,7 +1758,12 @@ void ViewportWindow::emitFromGpuSurvivors(
17321758 };
17331759
17341760 const QMatrix4x4 current_vp = proj_matrix_ * view_matrix_;
1735- const bool hiz_vp_matches = hiz_vp_valid_ && hiz_vp_ == current_vp;
1761+ static const bool hiz_force_motion = []{
1762+ const char * e = std::getenv (" IFC_HIZ_MOTION" );
1763+ return e && *e && std::atoi (e) != 0 ;
1764+ }();
1765+ const bool hiz_vp_matches = hiz_vp_valid_
1766+ && (hiz_force_motion || hiz_vp_ == current_vp);
17361767 const bool hiz_on = hizEnabled () && min_pixel_radius > 0 .0f && hiz_vp_matches;
17371768
17381769 thread_local std::vector<bool > mesh_seen;
0 commit comments