Skip to content

Commit 4e3cc63

Browse files
Moultclaude
andcommitted
ifcviewer: fix HiZ depth blit and make occlusion test conservative
The HiZ pipeline had two bugs causing false occlusions: 1. The scaling depth blit (glBlitFramebuffer from window-size to HiZ-size) produced GL_INVALID_VALUE on some drivers. Replace with a fullscreen- triangle shader that samples the resolved depth and writes gl_FragDepth. 2. The resolve texture used GL_DEPTH_COMPONENT24 but Qt's default FBO uses D24S8 (depth+stencil). Mismatched formats cause the MSAA resolve blit to fail. Fix by using GL_DEPTH24_STENCIL8 for the resolve texture. Additionally, the occlusion test was too aggressive for scenes with compressed depth ranges (entire scene in 0.99-1.0). Change from "max over coarse mip texels" to "reject only if ALL fine-mip texels agree the AABB is behind them", with early-out on first non-occluding texel and a 64-sample cap. Also fix IFC_HIZ_MOTION=0 being treated as enabled (checked env var existence, not value). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 7b64dd3 commit 4e3cc63

2 files changed

Lines changed: 84 additions & 51 deletions

File tree

src/ifcviewer/ViewportWindow.cpp

Lines changed: 82 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,25 @@ static GLuint compileShader(QOpenGLFunctions_4_5_Core* gl, GLenum type, const ch
256256
// counter at binding 1; shared survivor-index output at binding 2. Each
257257
// survivor is written as (u_model_tag | local_instance_index) so the CPU can
258258
// unpack model + local index from one uint.
259+
static const char* HIZ_DOWNSAMPLE_VS = R"(
260+
#version 450 core
261+
void main() {
262+
vec2 pos = vec2((gl_VertexID & 1) * 4.0 - 1.0,
263+
(gl_VertexID & 2) * 2.0 - 1.0);
264+
gl_Position = vec4(pos, 0.0, 1.0);
265+
}
266+
)";
267+
268+
static const char* HIZ_DOWNSAMPLE_FS = R"(
269+
#version 450 core
270+
uniform sampler2D u_depth;
271+
uniform vec2 u_inv_dest_size;
272+
void main() {
273+
vec2 uv = gl_FragCoord.xy * u_inv_dest_size;
274+
gl_FragDepth = texture(u_depth, uv).r;
275+
}
276+
)";
277+
259278
static const char* CULL_COMPUTE_SHADER = R"(
260279
#version 450 core
261280
layout(local_size_x = 64) in;
@@ -502,6 +521,8 @@ ViewportWindow::~ViewportWindow() {
502521
if (hiz_depth_tex_) gl_->glDeleteTextures(1, &hiz_depth_tex_);
503522
if (hiz_resolve_fbo_) gl_->glDeleteFramebuffers(1, &hiz_resolve_fbo_);
504523
if (hiz_resolve_depth_tex_) gl_->glDeleteTextures(1, &hiz_resolve_depth_tex_);
524+
if (hiz_downsample_program_) gl_->glDeleteProgram(hiz_downsample_program_);
525+
if (hiz_downsample_vao_) gl_->glDeleteVertexArrays(1, &hiz_downsample_vao_);
505526
}
506527
context_->doneCurrent();
507528
}
@@ -584,6 +605,12 @@ void ViewportWindow::buildShaders() {
584605
GLuint fs = compileShader(gl_, GL_FRAGMENT_SHADER, AXIS_FRAGMENT_SHADER);
585606
axis_program_ = linkProgram(gl_, vs, fs);
586607
}
608+
{
609+
GLuint vs = compileShader(gl_, GL_VERTEX_SHADER, HIZ_DOWNSAMPLE_VS);
610+
GLuint fs = compileShader(gl_, GL_FRAGMENT_SHADER, HIZ_DOWNSAMPLE_FS);
611+
hiz_downsample_program_ = linkProgram(gl_, vs, fs);
612+
gl_->glCreateVertexArrays(1, &hiz_downsample_vao_);
613+
}
587614
cull_program_ = linkComputeProgram(gl_, CULL_COMPUTE_SHADER);
588615
gl_->glCreateBuffers(1, &gpu_cull_counter_ssbo_);
589616
gl_->glNamedBufferStorage(gpu_cull_counter_ssbo_, sizeof(uint32_t), nullptr,
@@ -1209,22 +1236,16 @@ void ViewportWindow::buildHizPyramid() {
12091236
const int base_w = hizBaseWidth();
12101237
const int base_h = std::max(1, (base_w * win_h) / win_w);
12111238

1212-
// Depth format must match the default FBO's depth format for the blit
1213-
// to succeed — GL spec requires identical internal formats for depth
1214-
// blits. Qt's default surface uses 24-bit depth (setDepthBufferSize(24)
1215-
// in initGL), so we match with DEPTH_COMPONENT24 on both textures.
1216-
//
1217-
// Resolve target (full window size, single sample). Needed because
1218-
// GL also forbids scale-blitting from an MSAA source: resolve at 1:1
1219-
// first, then down-blit.
1239+
// Resolve target (full window size, single sample, D24S8 to match Qt's
1240+
// default FBO which uses depth+stencil even when only depth is requested).
12201241
if (win_w != hiz_resolve_w_ || win_h != hiz_resolve_h_) {
12211242
if (hiz_resolve_fbo_) gl_->glDeleteFramebuffers(1, &hiz_resolve_fbo_);
12221243
if (hiz_resolve_depth_tex_) gl_->glDeleteTextures(1, &hiz_resolve_depth_tex_);
12231244
gl_->glCreateTextures(GL_TEXTURE_2D, 1, &hiz_resolve_depth_tex_);
12241245
gl_->glTextureStorage2D(hiz_resolve_depth_tex_, 1,
1225-
GL_DEPTH_COMPONENT24, win_w, win_h);
1246+
GL_DEPTH24_STENCIL8, win_w, win_h);
12261247
gl_->glCreateFramebuffers(1, &hiz_resolve_fbo_);
1227-
gl_->glNamedFramebufferTexture(hiz_resolve_fbo_, GL_DEPTH_ATTACHMENT,
1248+
gl_->glNamedFramebufferTexture(hiz_resolve_fbo_, GL_DEPTH_STENCIL_ATTACHMENT,
12281249
hiz_resolve_depth_tex_, 0);
12291250
hiz_resolve_w_ = win_w;
12301251
hiz_resolve_h_ = win_h;
@@ -1239,6 +1260,8 @@ void ViewportWindow::buildHizPyramid() {
12391260
gl_->glCreateFramebuffers(1, &hiz_fbo_);
12401261
gl_->glNamedFramebufferTexture(hiz_fbo_, GL_DEPTH_ATTACHMENT,
12411262
hiz_depth_tex_, 0);
1263+
gl_->glNamedFramebufferDrawBuffer(hiz_fbo_, GL_NONE);
1264+
gl_->glNamedFramebufferReadBuffer(hiz_fbo_, GL_NONE);
12421265

12431266
hiz_base_w_ = base_w;
12441267
hiz_base_h_ = base_h;
@@ -1262,34 +1285,39 @@ void ViewportWindow::buildHizPyramid() {
12621285
hiz_pyramid_.assign(off, 1.0f);
12631286
}
12641287

1265-
// Two-step: MSAA default-fb → full-size SS resolve, then SS → down-scaled.
1266-
// GL forbids scaling a blit whose source is multisampled, and also
1267-
// requires matching depth internal formats — hence this dance.
1288+
// Step 1: MSAA default-fb → full-size single-sample resolve (same-size).
1289+
while (gl_->glGetError() != GL_NO_ERROR) {}
12681290
gl_->glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
12691291
gl_->glBindFramebuffer(GL_DRAW_FRAMEBUFFER, hiz_resolve_fbo_);
12701292
gl_->glBlitFramebuffer(0, 0, win_w, win_h,
12711293
0, 0, win_w, win_h,
1272-
GL_DEPTH_BUFFER_BIT, GL_NEAREST);
1294+
GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT, GL_NEAREST);
12731295

1274-
gl_->glBindFramebuffer(GL_READ_FRAMEBUFFER, hiz_resolve_fbo_);
1275-
gl_->glBindFramebuffer(GL_DRAW_FRAMEBUFFER, hiz_fbo_);
1276-
gl_->glBlitFramebuffer(0, 0, win_w, win_h,
1277-
0, 0, hiz_base_w_, hiz_base_h_,
1278-
GL_DEPTH_BUFFER_BIT, GL_NEAREST);
1279-
gl_->glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
1280-
gl_->glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
1281-
1282-
// One-shot diagnostic so blit failures aren't silent. We only warn
1283-
// the first handful of times — GL errors can pile up and spam.
1284-
static int err_warn_budget = 3;
1285-
if (err_warn_budget > 0) {
1286-
GLenum e = gl_->glGetError();
1287-
if (e != GL_NO_ERROR) {
1288-
qWarning("HiZ blit/readback GL error 0x%04x (win %dx%d → %dx%d → %dx%d)",
1289-
e, win_w, win_h, win_w, win_h, hiz_base_w_, hiz_base_h_);
1290-
--err_warn_budget;
1291-
}
1292-
}
1296+
// Step 2: downsample resolved depth to HiZ base via fullscreen-triangle.
1297+
// glBlitFramebuffer with depth + scaling produces GL_INVALID_VALUE on
1298+
// some drivers, so we sample the resolve texture and write gl_FragDepth.
1299+
gl_->glBindFramebuffer(GL_FRAMEBUFFER, hiz_fbo_);
1300+
gl_->glViewport(0, 0, hiz_base_w_, hiz_base_h_);
1301+
gl_->glEnable(GL_DEPTH_TEST);
1302+
gl_->glDepthFunc(GL_ALWAYS);
1303+
gl_->glDepthMask(GL_TRUE);
1304+
gl_->glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
1305+
gl_->glUseProgram(hiz_downsample_program_);
1306+
gl_->glTextureParameteri(hiz_resolve_depth_tex_, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1307+
gl_->glTextureParameteri(hiz_resolve_depth_tex_, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1308+
gl_->glTextureParameteri(hiz_resolve_depth_tex_, GL_TEXTURE_COMPARE_MODE, GL_NONE);
1309+
gl_->glBindTextureUnit(0, hiz_resolve_depth_tex_);
1310+
gl_->glUniform1i(gl_->glGetUniformLocation(hiz_downsample_program_, "u_depth"), 0);
1311+
gl_->glUniform2f(gl_->glGetUniformLocation(hiz_downsample_program_, "u_inv_dest_size"),
1312+
1.0f / static_cast<float>(hiz_base_w_),
1313+
1.0f / static_cast<float>(hiz_base_h_));
1314+
gl_->glBindVertexArray(hiz_downsample_vao_);
1315+
gl_->glDrawArrays(GL_TRIANGLES, 0, 3);
1316+
gl_->glBindVertexArray(0);
1317+
gl_->glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1318+
gl_->glDepthFunc(GL_LESS);
1319+
gl_->glBindFramebuffer(GL_FRAMEBUFFER, 0);
1320+
gl_->glViewport(0, 0, win_w, win_h);
12931321

12941322
// Synchronous readback into level 0 of the pyramid. At 256x128 this
12951323
// is ~128 KB and the driver copy is fast enough not to matter in
@@ -1374,16 +1402,10 @@ bool ViewportWindow::aabbOccludedByHiz(const float mn[3], const float mx[3]) con
13741402
const float v_max = 0.5f * (sy_max + 1.0f);
13751403
const float aabb_near_depth = 0.5f * (sz_min + 1.0f);
13761404

1377-
// Pick mip level where the projected rect covers at most 2 texels on
1378-
// each axis; sample the max over the covered texels there.
1379-
const float px_w = (u_max - u_min) * static_cast<float>(hiz_base_w_);
1380-
const float px_h = (v_max - v_min) * static_cast<float>(hiz_base_h_);
1381-
int mip = 0;
1382-
while ((int)hiz_mip_offset_.size() - 1 > mip &&
1383-
((px_w / (1 << mip)) > 2.0f || (px_h / (1 << mip)) > 2.0f)) {
1384-
++mip;
1385-
}
1386-
1405+
// Sample at a fine mip and reject ONLY if every texel agrees the AABB
1406+
// is behind it. One non-occluding texel → visible, early-out. Cap
1407+
// iteration to avoid slow queries on large projected rects.
1408+
const int mip = std::min(1, (int)hiz_mip_offset_.size() - 1);
13871409
const uint32_t mw = hiz_mip_w_[mip];
13881410
const uint32_t mh = hiz_mip_h_[mip];
13891411
int x0 = static_cast<int>(std::floor(u_min * mw));
@@ -1396,18 +1418,17 @@ bool ViewportWindow::aabbOccludedByHiz(const float mn[3], const float mx[3]) con
13961418
if (y1 > (int)mh) y1 = mh;
13971419
if (x1 <= x0 || y1 <= y0) return false;
13981420

1421+
static constexpr int MAX_HIZ_SAMPLES = 64;
1422+
if ((x1 - x0) * (y1 - y0) > MAX_HIZ_SAMPLES) return false;
1423+
13991424
const float* level = hiz_pyramid_.data() + hiz_mip_offset_[mip];
1400-
float hiz_max = 0.0f;
14011425
for (int y = y0; y < y1; ++y) {
14021426
const float* row = level + static_cast<size_t>(y) * mw;
14031427
for (int x = x0; x < x1; ++x) {
1404-
if (row[x] > hiz_max) hiz_max = row[x];
1428+
if (aabb_near_depth <= row[x]) return false;
14051429
}
14061430
}
1407-
1408-
// AABB's closest point must be strictly farther than everything drawn
1409-
// in the region for it to be fully occluded.
1410-
return aabb_near_depth > hiz_max;
1431+
return true;
14111432
}
14121433

14131434
uint32_t ViewportWindow::pickObjectAt(int x, int y) {
@@ -1559,7 +1580,12 @@ void ViewportWindow::cullModelCpu(ModelGpuData& m, const float planes[6][4],
15591580
// the buffer — a self-reinforcing feedback loop). On static views HiZ
15601581
// kicks in after a single frame of lag.
15611582
const QMatrix4x4 current_vp = proj_matrix_ * view_matrix_;
1562-
const bool hiz_vp_matches = hiz_vp_valid_ && hiz_vp_ == current_vp;
1583+
static const bool hiz_force_motion = []{
1584+
const char* e = std::getenv("IFC_HIZ_MOTION");
1585+
return e && *e && std::atoi(e) != 0;
1586+
}();
1587+
const bool hiz_vp_matches = hiz_vp_valid_
1588+
&& (hiz_force_motion || hiz_vp_ == current_vp);
15631589
const bool hiz_on = hizEnabled() && min_pixel_radius > 0.0f && hiz_vp_matches;
15641590

15651591
// Hot path: read the AABB from the compact bvh_items array (28 B stride)
@@ -1732,7 +1758,12 @@ void ViewportWindow::emitFromGpuSurvivors(
17321758
};
17331759

17341760
const QMatrix4x4 current_vp = proj_matrix_ * view_matrix_;
1735-
const bool hiz_vp_matches = hiz_vp_valid_ && hiz_vp_ == current_vp;
1761+
static const bool hiz_force_motion = []{
1762+
const char* e = std::getenv("IFC_HIZ_MOTION");
1763+
return e && *e && std::atoi(e) != 0;
1764+
}();
1765+
const bool hiz_vp_matches = hiz_vp_valid_
1766+
&& (hiz_force_motion || hiz_vp_ == current_vp);
17361767
const bool hiz_on = hizEnabled() && min_pixel_radius > 0.0f && hiz_vp_matches;
17371768

17381769
thread_local std::vector<bool> mesh_seen;

src/ifcviewer/ViewportWindow.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,8 @@ class ViewportWindow : public QWindow {
332332
// (hiz_vp_ + hiz_vp_valid_) so next frame's cull can test AABBs
333333
// against a slightly-stale depth. Skipped for the pick pass and when
334334
// IFC_NO_HIZ=1.
335+
GLuint hiz_downsample_program_ = 0;
336+
GLuint hiz_downsample_vao_ = 0;
335337
GLuint hiz_fbo_ = 0;
336338
GLuint hiz_depth_tex_ = 0;
337339
GLuint hiz_resolve_fbo_ = 0; // full-size single-sample resolve

0 commit comments

Comments
 (0)