From 907fd036c7af169df7d8b6639ae96e38dff90343 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sat, 11 Apr 2026 16:25:09 +1000 Subject: [PATCH 01/37] Bump util from v0.8.0 branch --- .../ifcopenshell/util/alignment.py | 7 +- .../ifcopenshell/util/attribute.py | 3 +- .../ifcopenshell/util/brick.py | 6 +- .../ifcopenshell/util/classification.py | 3 +- .../ifcopenshell/util/constraint.py | 3 +- .../ifcopenshell/util/cost.py | 12 +- .../ifcopenshell/util/data.py | 7 +- .../ifcopenshell/util/date.py | 8 +- .../ifcopenshell/util/doc.py | 19 +- .../ifcopenshell/util/element.py | 35 ++- .../ifcopenshell/util/file.py | 3 +- .../ifcopenshell/util/fm.py | 6 +- .../util/generate_pset_templates.py | 16 +- .../ifcopenshell/util/geolocation.py | 15 +- .../util/ifc4x3dev_scrape_data_for_docs.py | 15 +- .../ifcopenshell/util/mvd_info.py | 8 +- .../ifcopenshell/util/placement.py | 6 +- .../ifcopenshell/util/pset.py | 9 +- .../ifcopenshell/util/representation.py | 9 +- .../ifcopenshell/util/resource.py | 6 +- .../ifcopenshell/util/schema.py | 13 +- .../util/schema/ifc_classes_suggestions.json | 20 ++ .../util/scripts/validate_stub.py | 27 +- .../ifcopenshell/util/selector.py | 205 ++++++++++---- .../ifcopenshell/util/sequence.py | 10 +- .../ifcopenshell/util/shape.py | 30 +- .../ifcopenshell/util/shape_builder.py | 261 +++++++++++++----- .../ifcopenshell/util/system.py | 3 +- .../ifcopenshell/util/type.py | 3 +- .../ifcopenshell/util/unit.py | 14 +- 30 files changed, 555 insertions(+), 227 deletions(-) diff --git a/src/ifcopenshell-python/ifcopenshell/util/alignment.py b/src/ifcopenshell-python/ifcopenshell/util/alignment.py index a2e9c08455f..1b90ac7c5f0 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/alignment.py +++ b/src/ifcopenshell-python/ifcopenshell/util/alignment.py @@ -17,6 +17,7 @@ # along with IfcOpenShell. If not, see . import math + import ifcopenshell import ifcopenshell.util.unit @@ -24,7 +25,7 @@ def add_linear_placement_fallback_position(file: ifcopenshell.file) -> ifcopenshell.file: import ifcopenshell.api.alignment - patched_file = ifcopenshell.file.from_string(file.to_string()) + patched_file = ifcopenshell.file.from_string(file.wrapped_data.to_string()) linear_placements = patched_file.by_type("IfcLinearPlacement") for lp in linear_placements: @@ -36,7 +37,7 @@ def add_linear_placement_fallback_position(file: ifcopenshell.file) -> ifcopensh def create_alignment_geometry(file: ifcopenshell.file) -> ifcopenshell.file: import ifcopenshell.api.alignment - patched_file = ifcopenshell.file.from_string(file.to_string()) + patched_file = ifcopenshell.file.from_string(file.wrapped_data.to_string()) alignments = patched_file.by_type("IfcAlignment") for alignment in alignments: @@ -49,7 +50,7 @@ def append_zero_length_segments(file: ifcopenshell.file) -> ifcopenshell.file: """Appends zero length segments to all alignment layouts and layout geometry, if missing.""" import ifcopenshell.api.alignment - patched_file = ifcopenshell.file.from_string(file.to_string()) + patched_file = ifcopenshell.file.from_string(file.wrapped_data.to_string()) alignments = patched_file.by_type("IfcAlignment") for alignment in alignments: diff --git a/src/ifcopenshell-python/ifcopenshell/util/attribute.py b/src/ifcopenshell-python/ifcopenshell/util/attribute.py index a3212aca672..0e34b5c0225 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/attribute.py +++ b/src/ifcopenshell-python/ifcopenshell/util/attribute.py @@ -16,8 +16,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with IfcOpenShell. If not, see . +from typing import Literal, Union + import ifcopenshell.ifcopenshell_wrapper as ifcopenshell_wrapper -from typing import Union, Literal PrimitiveType = Literal["entity", "string", "float", "integer", "boolean", "enum", "binary"] ComplexPrimitiveType = Literal["list", "array", "set"] diff --git a/src/ifcopenshell-python/ifcopenshell/util/brick.py b/src/ifcopenshell-python/ifcopenshell/util/brick.py index 04e0be5452d..ec358c2b98e 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/brick.py +++ b/src/ifcopenshell-python/ifcopenshell/util/brick.py @@ -16,14 +16,14 @@ # You should have received a copy of the GNU Lesser General Public License # along with IfcOpenShell. If not, see . -import os import json +import os +from typing import Union + import ifcopenshell import ifcopenshell.util.classification import ifcopenshell.util.element import ifcopenshell.util.system -from typing import Union - cwd = os.path.dirname(os.path.realpath(__file__)) diff --git a/src/ifcopenshell-python/ifcopenshell/util/classification.py b/src/ifcopenshell-python/ifcopenshell/util/classification.py index 06f9b0e31ae..df36eb97a48 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/classification.py +++ b/src/ifcopenshell-python/ifcopenshell/util/classification.py @@ -16,9 +16,10 @@ # You should have received a copy of the GNU Lesser General Public License # along with IfcOpenShell. If not, see . -import ifcopenshell.util.element from typing import Optional +import ifcopenshell.util.element + def get_references(element: ifcopenshell.entity_instance, should_inherit=True) -> set[ifcopenshell.entity_instance]: """Gets classification references associated with the element diff --git a/src/ifcopenshell-python/ifcopenshell/util/constraint.py b/src/ifcopenshell-python/ifcopenshell/util/constraint.py index 2c370aa997d..6d52d746763 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/constraint.py +++ b/src/ifcopenshell-python/ifcopenshell/util/constraint.py @@ -18,9 +18,10 @@ # # -import ifcopenshell from typing import Union +import ifcopenshell + def get_constraints(product: ifcopenshell.entity_instance) -> list[ifcopenshell.entity_instance]: """ diff --git a/src/ifcopenshell-python/ifcopenshell/util/cost.py b/src/ifcopenshell-python/ifcopenshell/util/cost.py index a7e96221d99..875594f1a5e 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/cost.py +++ b/src/ifcopenshell-python/ifcopenshell/util/cost.py @@ -16,10 +16,12 @@ # You should have received a copy of the GNU Lesser General Public License # along with IfcOpenShell. If not, see . +from collections.abc import Generator +from typing import Any, Literal, Optional, Union + import lark + import ifcopenshell -from typing import Optional, Union, Literal, Any -from collections.abc import Generator import ifcopenshell.ifcopenshell_wrapper as ifcopenshell_wrapper import ifcopenshell.util.attribute import ifcopenshell.util.element @@ -350,8 +352,7 @@ def get_cost_rate( class CostValueUnserialiser: def parse(self, formula: str): - l = lark.Lark( - """start: formula + l = lark.Lark("""start: formula formula: operand (operator operand)* operand: value | category "(" formula ")" value: NUMBER? @@ -388,8 +389,7 @@ def parse(self, formula: str): NEWLINE: (CR? LF)+ %ignore WS // Disregard spaces in text - """ - ) + """) start = l.parse(formula) return self.get_formula(start.children[0]) diff --git a/src/ifcopenshell-python/ifcopenshell/util/data.py b/src/ifcopenshell-python/ifcopenshell/util/data.py index c65a87ddb0f..4169b1ea4d2 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/data.py +++ b/src/ifcopenshell-python/ifcopenshell/util/data.py @@ -17,10 +17,13 @@ # along with IfcOpenShell. If not, see . from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Union + import numpy as np + import ifcopenshell -from typing import Any, Union -from dataclasses import dataclass from ifcopenshell.util.shape_builder import ShapeBuilder diff --git a/src/ifcopenshell-python/ifcopenshell/util/date.py b/src/ifcopenshell-python/ifcopenshell/util/date.py index 3b0761f75cd..dcd1c36ca5b 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/date.py +++ b/src/ifcopenshell-python/ifcopenshell/util/date.py @@ -16,12 +16,14 @@ # You should have received a copy of the GNU Lesser General Public License # along with IfcOpenShell. If not, see . -import ifcopenshell import datetime -import isodate from re import findall +from typing import Any, Literal, Union, overload + +import isodate from dateutil import parser -from typing import Literal, Union, Any, overload + +import ifcopenshell def timedelta2duration(timedelta): diff --git a/src/ifcopenshell-python/ifcopenshell/util/doc.py b/src/ifcopenshell-python/ifcopenshell/util/doc.py index 41d6189fe4e..b0443f6117f 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/doc.py +++ b/src/ifcopenshell-python/ifcopenshell/util/doc.py @@ -16,27 +16,30 @@ # You should have received a copy of the GNU Lesser General Public License # along with IfcOpenShell. If not, see . +import copy import json from pathlib import Path -import copy +from typing import Optional, TypedDict, Union + +from typing_extensions import NotRequired + import ifcopenshell import ifcopenshell.ifcopenshell_wrapper as ifcopenshell_wrapper import ifcopenshell.util.attribute import ifcopenshell.util.schema -from typing import Optional, Literal, Any, Union, TypedDict -from typing_extensions import NotRequired try: import glob + import re + import shutil + import urllib.parse import warnings + import zipfile + import requests - import urllib.parse - from markdown import markdown from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning - import zipfile from lxml import etree - import re - import shutil + from markdown import markdown except: pass # Only necessary if you're using it to generate the docs database diff --git a/src/ifcopenshell-python/ifcopenshell/util/element.py b/src/ifcopenshell-python/ifcopenshell/util/element.py index 7bcce391712..0aeccccea47 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/element.py +++ b/src/ifcopenshell-python/ifcopenshell/util/element.py @@ -16,14 +16,14 @@ # You should have received a copy of the GNU Lesser General Public License # along with IfcOpenShell. If not, see . +from collections import namedtuple +from collections.abc import Callable, Generator, Sequence +from typing import Any, Literal, Optional, Union, overload + import ifcopenshell import ifcopenshell.guid import ifcopenshell.util.element import ifcopenshell.util.representation -from typing import Any, Callable, Optional, Union, Literal, overload -from collections.abc import Generator, Sequence -from collections import deque, namedtuple - MATERIAL_TYPE = Literal[ "IfcMaterial", @@ -737,7 +737,7 @@ def get_material( return relationship.RelatingMaterial if should_inherit: relating_type = get_type(element) - if relating_type != element and (has_associations := getattr(relating_type, "HasAssociations", None)): + if relating_type is not None and relating_type != element and (has_associations := getattr(relating_type, "HasAssociations", None)): return get_material(relating_type, should_skip_usage) @@ -958,7 +958,7 @@ def get_elements_by_profile(profile: ifcopenshell.entity_instance) -> set[ifcope :return: The elements using the profile. """ ifc_file = profile.file - queue = list(ifc_file.get_inverse(profile)) + queue = ifc_file.get_inverse(profile) processed: set[ifcopenshell.entity_instance] = set() representations: set[ifcopenshell.entity_instance] = set() while queue: @@ -1234,7 +1234,9 @@ def get_controls(element: ifcopenshell.entity_instance) -> Generator[ifcopenshel yield rel.RelatingControl -def get_parent(element: ifcopenshell.entity_instance) -> Union[ifcopenshell.entity_instance, None]: +def get_parent( + element: ifcopenshell.entity_instance, ifc_class: Optional[str] = None +) -> Union[ifcopenshell.entity_instance, None]: """Get the parent in the spatial heirarchy IFC features a spatial hierarchy tree of all objects. Each spatial element @@ -1251,6 +1253,8 @@ def get_parent(element: ifcopenshell.entity_instance) -> Union[ifcopenshell.enti - Voiding: the opening voids another physical element, such as a hole in a wall :param element: Any physical or spatial element in the tree + :param ifc_class: Optionally filter the type of parent you're after. For + example, you may be after the storey, not a space. :return: Its parent. This must exist for any valid file, or None if we've reached the IfcProject. Example: @@ -1260,7 +1264,7 @@ def get_parent(element: ifcopenshell.entity_instance) -> Union[ifcopenshell.enti element = file.by_type("IfcWall")[0] parent = ifcopenshell.util.element.get_parent(element) """ - return ( + parent = ( get_container(element, should_get_direct=True) or get_aggregate(element) or get_nest(element) @@ -1268,6 +1272,16 @@ def get_parent(element: ifcopenshell.entity_instance) -> Union[ifcopenshell.enti or get_voided_element(element) ) + if not ifc_class: + return parent + + while parent: + if parent.is_a(ifc_class): + return parent + parent = get_parent(parent) + + return None + def get_filled_void(element: ifcopenshell.entity_instance) -> Union[ifcopenshell.entity_instance, None]: """If the element is filling a void, get the void @@ -1661,14 +1675,14 @@ def are_inverses_contained() -> bool: subgraph = list(ifc_file.traverse(element, breadth_first=True)) subgraph.extend(also_consider) subgraph_set = set(subgraph) - subelement_queue = deque([element]) + subelement_queue = [element] # Cache already processed entities to avoid traversing them multiple time. # E.g. lots of IFCINDEXEDPOLYCURVES may reference the same IFCCARTESIANPOINTLIST2D. processed_ids: set[int] = set() while subelement_queue: - subelement = subelement_queue.popleft() + subelement = subelement_queue.pop(0) subelement_id = subelement.id() if ( subelement_id @@ -1703,7 +1717,6 @@ def are_inverses_contained() -> bool: # We delete elements from subgraph in reverse order to allow batching to work for subelement in filter(lambda e: e in to_delete, subgraph[::-1]): - to_delete.remove(subelement) ifc_file.remove(subelement) # ifc_file.unbatch() diff --git a/src/ifcopenshell-python/ifcopenshell/util/file.py b/src/ifcopenshell-python/ifcopenshell/util/file.py index 4875ddf3b79..a874e1ba4d9 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/file.py +++ b/src/ifcopenshell-python/ifcopenshell/util/file.py @@ -17,7 +17,8 @@ # along with IfcOpenShell. If not, see . import zipfile -from typing import IO, Union, TypedDict +from typing import IO, TypedDict, Union + from typing_extensions import NotRequired diff --git a/src/ifcopenshell-python/ifcopenshell/util/fm.py b/src/ifcopenshell-python/ifcopenshell/util/fm.py index c9b1977e4ec..bfb7391dc3d 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/fm.py +++ b/src/ifcopenshell-python/ifcopenshell/util/fm.py @@ -16,11 +16,13 @@ # You should have received a copy of the GNU Lesser General Public License # along with IfcOpenShell. If not, see . +from typing import Literal + +from typing_extensions import assert_never + import ifcopenshell import ifcopenshell.ifcopenshell_wrapper as W import ifcopenshell.util.attribute -from typing import Literal -from typing_extensions import assert_never # COBie actually uses an exclusion list, but this inclusion list is equivalent. cobie_type_classes = [ diff --git a/src/ifcopenshell-python/ifcopenshell/util/generate_pset_templates.py b/src/ifcopenshell-python/ifcopenshell/util/generate_pset_templates.py index 44d5bc28cf2..681c0740097 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/generate_pset_templates.py +++ b/src/ifcopenshell-python/ifcopenshell/util/generate_pset_templates.py @@ -18,22 +18,22 @@ RUN_FROM_DEV_REPO = False -import ifcopenshell.ifcopenshell_wrapper as W -import ifcopenshell.api.unit -import ifcopenshell.api.project -import ifcopenshell.guid -import ifcopenshell.util.attribute import glob import sys -from pathlib import Path -from lxml import etree from itertools import chain +from pathlib import Path from typing import cast +from lxml import etree + +import ifcopenshell.api.project +import ifcopenshell.api.unit +import ifcopenshell.guid +import ifcopenshell.ifcopenshell_wrapper as W if not RUN_FROM_DEV_REPO: - import zipfile import shutil + import zipfile BASE_MODULE_PATH = Path(__file__).parent diff --git a/src/ifcopenshell-python/ifcopenshell/util/geolocation.py b/src/ifcopenshell-python/ifcopenshell/util/geolocation.py index 748ba8f3c39..47c8886691a 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/geolocation.py +++ b/src/ifcopenshell-python/ifcopenshell/util/geolocation.py @@ -17,12 +17,14 @@ # along with IfcOpenShell. If not, see . import math +from decimal import ROUND_HALF_UP, Decimal +from typing import Any, NamedTuple, Optional, Union + import numpy as np + import ifcopenshell import ifcopenshell.util.element import ifcopenshell.util.placement -from typing import NamedTuple, Optional, Union -from decimal import Decimal, ROUND_HALF_UP MatrixType = ifcopenshell.util.placement.MatrixType @@ -266,6 +268,15 @@ def get_helmert_transformation_parameters(ifc_file: ifcopenshell.file) -> Option return HelmertTransformation(e, n, h, xaa, xao, scale, factor_x, factor_y, factor_z) +def get_crs(ifc_file: ifcopenshell.file) -> dict[str, Any]: + """Get CRS information from an IFC file.""" + if ifc_file.schema == "IFC2X3": + return ifcopenshell.util.element.get_pset(ifc_file.by_type("IfcProject")[0], "ePSet_ProjectedCRS") + for context in ifc_file.by_type("IfcGeometricRepresentationContext", include_subtypes=False): + if operation := context.HasCoordinateOperation: + return operation[0].TargetCRS.get_info() + + def auto_z2e(ifc_file: ifcopenshell.file, z: float, should_return_in_map_units: bool = True) -> float: """Convert a Z coordinate to an elevation using model georeferencing data diff --git a/src/ifcopenshell-python/ifcopenshell/util/ifc4x3dev_scrape_data_for_docs.py b/src/ifcopenshell-python/ifcopenshell/util/ifc4x3dev_scrape_data_for_docs.py index 02e4f1f4913..86ab81ef4c1 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/ifc4x3dev_scrape_data_for_docs.py +++ b/src/ifcopenshell-python/ifcopenshell/util/ifc4x3dev_scrape_data_for_docs.py @@ -17,7 +17,12 @@ # along with IfcOpenShell. If not, see . try: - from server import get_resource_path, resource_documentation_builder, process_markdown, R + from server import ( + R, + get_resource_path, + process_markdown, + resource_documentation_builder, + ) except ModuleNotFoundError as e: print( "ERROR. Failed to import `server.py`.\n" @@ -26,13 +31,15 @@ raise e import itertools -import operator import json -import ifcopenshell +import operator from collections import Counter -from bs4 import BeautifulSoup from typing import Any, Union +from bs4 import BeautifulSoup + +import ifcopenshell + # Hacky modified functions from server.py to make parser work def get_definition_from_md(resource: str, mdc: str) -> str: diff --git a/src/ifcopenshell-python/ifcopenshell/util/mvd_info.py b/src/ifcopenshell-python/ifcopenshell/util/mvd_info.py index a4ca36039dc..a3bdb073375 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/mvd_info.py +++ b/src/ifcopenshell-python/ifcopenshell/util/mvd_info.py @@ -26,8 +26,8 @@ except ImportError: LARK_AVAILABLE = False -from typing import Callable, Union import re +from typing import Union if LARK_AVAILABLE: mvd_grammar = r""" @@ -51,9 +51,9 @@ value: /[A-Za-z0-9 _\.-]+/ - other_keyword: /[^\[\]]+/ - - dynamic_option_word: /[^\[\]]+/ + other_keyword: /[^\[\]]+/ + + dynamic_option_word: /[^\[\]]+/ %import common.WS %ignore WS diff --git a/src/ifcopenshell-python/ifcopenshell/util/placement.py b/src/ifcopenshell-python/ifcopenshell/util/placement.py index e7e7b99a682..cc37ab99060 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/placement.py +++ b/src/ifcopenshell-python/ifcopenshell/util/placement.py @@ -16,11 +16,13 @@ # You should have received a copy of the GNU Lesser General Public License # along with IfcOpenShell. If not, see . +from collections.abc import Iterable +from typing import Literal, Optional + import numpy as np import numpy.typing as npt + import ifcopenshell -from typing import Literal, Optional -from collections.abc import Iterable MatrixType = npt.NDArray[np.float64] """`npt.NDArray[np.float64]`""" diff --git a/src/ifcopenshell-python/ifcopenshell/util/pset.py b/src/ifcopenshell-python/ifcopenshell/util/pset.py index db1b4736c9c..68fc84a5b11 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/pset.py +++ b/src/ifcopenshell-python/ifcopenshell/util/pset.py @@ -16,15 +16,16 @@ # You should have received a copy of the GNU Lesser General Public License # along with IfcOpenShell. If not, see . -import re import pathlib +import re +from functools import lru_cache +from typing import Literal, NamedTuple, Optional, Union + import ifcopenshell import ifcopenshell.ifcopenshell_wrapper as W import ifcopenshell.util.schema import ifcopenshell.util.type -from ifcopenshell import entity_instance -from functools import lru_cache -from typing import Optional, Literal, NamedTuple, Union +from ifcopenshell.entity_instance import entity_instance templates: dict[ifcopenshell.util.schema.IFC_SCHEMA, "PsetQto"] = {} diff --git a/src/ifcopenshell-python/ifcopenshell/util/representation.py b/src/ifcopenshell-python/ifcopenshell/util/representation.py index 10743294b06..f0c2b54fcae 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/representation.py +++ b/src/ifcopenshell-python/ifcopenshell/util/representation.py @@ -16,15 +16,16 @@ # You should have received a copy of the GNU Lesser General Public License # along with IfcOpenShell. If not, see . +from collections.abc import Generator, Sequence +from typing import Literal, Optional, TypedDict, Union + import numpy as np import numpy.typing as npt + import ifcopenshell -import ifcopenshell.util.representation import ifcopenshell.util.placement +import ifcopenshell.util.representation import ifcopenshell.util.shape -from typing import Optional, Union, TypedDict, Literal -from collections.abc import Generator, Sequence - CONTEXT_TYPE = Literal["Model", "Plan", "NotDefined"] REPRESENTATION_IDENTIFIER = Literal[ diff --git a/src/ifcopenshell-python/ifcopenshell/util/resource.py b/src/ifcopenshell-python/ifcopenshell/util/resource.py index f518a091250..262ddda125c 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/resource.py +++ b/src/ifcopenshell-python/ifcopenshell/util/resource.py @@ -16,11 +16,11 @@ # You should have received a copy of the GNU Lesser General Public License # along with IfcOpenShell. If not, see . +from typing import Any, Union + import ifcopenshell.util.cost -import ifcopenshell.util.element import ifcopenshell.util.date -from typing import Union, Any - +import ifcopenshell.util.element PRODUCTIVITY_PSET_DATA = Union[dict[str, Any], None] # https://ifc43-docs.standards.buildingsmart.org/IFC/RELEASE/IFC4x3/HTML/lexical/IfcConstructionResource.htm#Table-7.3.3.7.1.3.H diff --git a/src/ifcopenshell-python/ifcopenshell/util/schema.py b/src/ifcopenshell-python/ifcopenshell/util/schema.py index e3e9ad7540a..bdd6d489d0f 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/schema.py +++ b/src/ifcopenshell-python/ifcopenshell/util/schema.py @@ -16,13 +16,14 @@ # You should have received a copy of the GNU Lesser General Public License # along with IfcOpenShell. If not, see . -import os import json +import os import time +from typing import Any, Literal, Union + import ifcopenshell -import ifcopenshell.util.attribute import ifcopenshell.ifcopenshell_wrapper as ifcopenshell_wrapper -from typing import Union, Any, Literal +import ifcopenshell.util.attribute # This is highly experimental and incomplete, however, it may work for simple datasets. @@ -70,7 +71,7 @@ def get_declaration(element: ifcopenshell.entity_instance): print(declaration.is_abstract()) # False print(declaration.supertype().name()) # IfcBuildingElement """ - return element.declaration + return element.wrapped_data.declaration().as_entity() def is_a(declaration: ifcopenshell.ifcopenshell_wrapper.declaration, ifc_class: str) -> bool: @@ -104,7 +105,7 @@ def get_supertypes( .. code:: python wall = model.createIfcWall() - results = ifcopenshell.util.schema.get_supertypes(wall.declaration.as_entity()) + results = ifcopenshell.util.schema.get_supertypes(wall.wrapped_data.declaration().as_entity()) # [, , ..., ] """ results = [] @@ -462,7 +463,7 @@ def migrate_attribute( ) -> None: # NOTE: `attribute` is an attribute in new file schema # print("Migrating attribute", element, new_element, attribute.name()) - old_file = element.file + old_file = element.wrapped_data.file if hasattr(element, attribute.name()): value = getattr(element, attribute.name()) # print("Attribute names matched", value) diff --git a/src/ifcopenshell-python/ifcopenshell/util/schema/ifc_classes_suggestions.json b/src/ifcopenshell-python/ifcopenshell/util/schema/ifc_classes_suggestions.json index ea520e028b7..c3cf248ada8 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/schema/ifc_classes_suggestions.json +++ b/src/ifcopenshell-python/ifcopenshell/util/schema/ifc_classes_suggestions.json @@ -5,6 +5,16 @@ "predefined_type": "NOTDEFINED" } ], + "IfcAirTerminal": [ + { + "name": "Commercial Kitchen Hood" + } + ], + "IfcAirTerminalType": [ + { + "name": "Commercial Kitchen Hood" + } + ], "IfcAirTerminalBox": [ { "name": "VAV Box" @@ -51,6 +61,16 @@ "predefined_type": "DISTRIBUTIONBOARD" } ], + "IfcFireSuppressionTerminal": [ + { + "name": "Fire Extinguisher" + } + ], + "IfcFireSuppressionTerminalType": [ + { + "name": "Fire Extinguisher" + } + ], "IfcFurniture": [ { "name": "Casework" diff --git a/src/ifcopenshell-python/ifcopenshell/util/scripts/validate_stub.py b/src/ifcopenshell-python/ifcopenshell/util/scripts/validate_stub.py index 14b05ed258f..1c3b6cb0015 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/scripts/validate_stub.py +++ b/src/ifcopenshell-python/ifcopenshell/util/scripts/validate_stub.py @@ -25,11 +25,11 @@ - class hierarchy """ - import ast import difflib from pathlib import Path from typing import Union + from typing_extensions import assert_never @@ -57,11 +57,28 @@ def get_function_node_name(node: ast.FunctionDef) -> Union[SubnameType, None]: :return: Function node name as ``SubnameType`` or ``None``, if function wasn't processed and can be skipped. """ node_name = node.name - if node_name.startswith("_") and node_name not in ("_is",): + is_init = node_name == "__init__" + + if node_name.startswith("_") and node_name not in ("_is",) and not is_init: + return None + arg_nodes = node.args.args + defaults = [None] * (len(arg_nodes) - len(node.args.defaults)) + node.args.defaults + args: list[str] = [] + for arg, default in zip(arg_nodes, defaults): + if default is None: + args.append(arg.arg) + else: + args.append(f"{arg.arg}={ast.unparse(default)}") + + if arg := node.args.vararg: + args.append(f"*{arg.arg}") + + if arg := node.args.kwarg: + args.append(f"**{arg.arg}") + + # Skip non-informative constructors. + if is_init and args == ["self"]: return None - args = [a.arg for a in node.args.args] - if node.args.vararg: - args.append("*args") node_name = f"def {node.name}" node_name = f"{node_name}({', '.join(args)}): ..." diff --git a/src/ifcopenshell-python/ifcopenshell/util/selector.py b/src/ifcopenshell-python/ifcopenshell/util/selector.py index 4d05912eb80..bbe8125927a 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/selector.py +++ b/src/ifcopenshell-python/ifcopenshell/util/selector.py @@ -17,16 +17,20 @@ # along with IfcOpenShell. If not, see . import re -import sys +from collections.abc import Iterable +from decimal import Decimal +from types import EllipsisType +from typing import Any, Optional, Union + import lark import numpy as np -import ifcopenshell.api.pset + import ifcopenshell.api.geometry +import ifcopenshell.api.pset import ifcopenshell.util import ifcopenshell.util.attribute import ifcopenshell.util.classification import ifcopenshell.util.element -import ifcopenshell.util.fm import ifcopenshell.util.geolocation import ifcopenshell.util.placement import ifcopenshell.util.pset @@ -34,18 +38,8 @@ import ifcopenshell.util.shape import ifcopenshell.util.system import ifcopenshell.util.unit -from decimal import Decimal -from typing import Optional, Any, Union -from collections.abc import Iterable - -if sys.version_info >= (3, 10): - from types import EllipsisType -else: - EllipsisType = type(...) - -filter_elements_grammar = lark.Lark( - """start: filter_group +filter_elements_grammar = lark.Lark("""start: filter_group filter_group: facet_list ("+" facet_list)* facet_list: facet ("," facet)* @@ -116,11 +110,9 @@ NEWLINE: (CR? LF)+ %ignore WS // Disregard spaces in text -""" -) +""") -get_element_grammar = lark.Lark( - """start: keys +get_element_grammar = lark.Lark("""start: keys keys: key ("." key)* key: quoted_string | regex_string | unquoted_string @@ -135,25 +127,37 @@ WS: /[ \\t\\f\\r\\n]/+ %ignore WS // Disregard spaces in text - """ -) - -format_grammar = lark.Lark( - """start: function - - function: round | number | int | format_length | lower | upper | title | concat | substr | ESCAPED_STRING | NUMBER - - round: "round(" function "," NUMBER ")" - number: "number(" function ["," ESCAPED_STRING ["," ESCAPED_STRING]] ")" - int: "int(" function ")" + """) + +format_grammar = lark.Lark("""start: expression + + ?expression: add_sub + ?add_sub: mul_div + | add_sub "+" mul_div -> add + | add_sub "-" mul_div -> subtract + ?mul_div: function + | mul_div "*" function -> multiply + | mul_div "/" function -> divide + + function: round | number | int | format_length | lower | upper | title | concat | substr | sort | reverse | join | variable | ESCAPED_STRING | SIGNED_NUMBER | "(" expression ")" + + variable: "{{" query_path "}}" + query_path: /[^}]+/ + + round: "round(" expression "," NUMBER ")" + number: "number(" expression ["," ESCAPED_STRING ["," ESCAPED_STRING]] ")" + int: "int(" expression ")" format_length: metric_length | imperial_length - metric_length: "metric_length(" function "," NUMBER "," NUMBER ")" - imperial_length: "imperial_length(" function "," NUMBER ["," ESCAPED_STRING "," ESCAPED_STRING ["," boolean]] ")" - lower: "lower(" function ")" - upper: "upper(" function ")" - title: "title(" function ")" - concat: "concat(" function ("," function)* ")" - substr: "substr(" function "," SIGNED_INT ["," SIGNED_INT] ")" + metric_length: "metric_length(" expression "," NUMBER "," NUMBER ")" + imperial_length: "imperial_length(" expression "," NUMBER ["," ESCAPED_STRING "," ESCAPED_STRING ["," boolean]] ")" + lower: "lower(" expression ")" + upper: "upper(" expression ")" + title: "title(" expression ")" + concat: "concat(" expression ("," expression)* ")" + substr: "substr(" expression "," SIGNED_INT ["," SIGNED_INT] ")" + sort: "sort(" expression ")" + reverse: "reverse(" expression ")" + join: "join(" ESCAPED_STRING "," expression ")" boolean: TRUE | FALSE TRUE: "true" | "True" | "TRUE" @@ -184,14 +188,82 @@ NEWLINE: (CR? LF)+ %ignore WS // Disregard spaces in text -""" -) +""") class FormatTransformer(lark.Transformer): + def __init__(self, element=None): + """Initialize transformer with optional element for variable substitution""" + super().__init__() + self.element = element + def start(self, args): + if isinstance(args[0], (list, tuple)): + return ", ".join(args[0]) + return args[0] + + def expression(self, args): return args[0] + def variable(self, args): + """Handle variable substitution like {{z}} or {{Pset_Wall.FireRating}}""" + if self.element: + try: + return get_element_value(self.element, args[0]) + except: + pass + + def query_path(self, args): + """Extract the query path from variable""" + return str(args[0]).strip() + + def add(self, args): + """Handle addition operation""" + left, right = args + try: + left_val = float(left) if left != "None" and left is not None else 0.0 + right_val = float(right) if right != "None" and right is not None else 0.0 + result = left_val + right_val + # Return integer if result has no decimal part + if result % 1 == 0: + return str(int(result)) + return str(result) + except (ValueError, TypeError): + # If can't convert to numbers, concatenate as strings + return str(left) + str(right) + + def subtract(self, args): + """Handle subtraction operation""" + left, right = args + left_val = float(left) if left != "None" and left is not None else 0.0 + right_val = float(right) if right != "None" and right is not None else 0.0 + result = left_val - right_val + if result % 1 == 0: + return str(int(result)) + return str(result) + + def multiply(self, args): + """Handle multiplication operation""" + left, right = args + left_val = float(left) if left != "None" and left is not None else 0.0 + right_val = float(right) if right != "None" and right is not None else 0.0 + result = left_val * right_val + if result % 1 == 0: + return str(int(result)) + return str(result) + + def divide(self, args): + """Handle division operation""" + left, right = args + left_val = float(left) if left != "None" and left is not None else 0.0 + right_val = float(right) if right != "None" and right is not None else 1.0 + if right_val == 0: + return "inf" # or raise an error, or return "0" + result = left_val / right_val + if result % 1 == 0: + return str(int(result)) + return str(result) + def function(self, args): return args[0] @@ -211,7 +283,7 @@ def title(self, args): return str(args[0]).title() def concat(self, args): - return "".join(args) + return "".join(str(arg) for arg in args) def substr(self, args): if len(args) == 3: @@ -221,6 +293,15 @@ def substr(self, args): elif len(args) == 2: return str(args[0])[int(args[1]) :] + def sort(self, args): + return sorted(args[0]) + + def reverse(self, args): + return list(reversed(args[0])) + + def join(self, args): + return args[0].join(args[1]) + def boolean(self, args): if not args: return True @@ -241,13 +322,14 @@ def round(self, args): return str(result) def number(self, args): - if isinstance(args[0], str): - args[0] = float(args[0]) if "." in args[0] else int(args[0]) + arg_val = args[0] + if isinstance(arg_val, str): + arg_val = float(arg_val) if "." in arg_val else int(arg_val) if len(args) >= 3 and args[2]: - return "{:,}".format(args[0]).replace(".", "*").replace(",", args[2]).replace("*", args[1]) + return "{:,}".format(arg_val).replace(".", "*").replace(",", args[2]).replace("*", args[1]) elif len(args) >= 2 and args[1]: - return "{}".format(args[0]).replace(".", args[1]) - return "{:,}".format(args[0]) + return "{}".format(arg_val).replace(".", args[1]) + return "{:,}".format(arg_val) def format_length(self, args): return args[0] @@ -287,7 +369,8 @@ def imperial_length(self, args): ) def int(self, args: list[str]) -> str: - return str(int(float(args[0]))) + value = 0.0 if args[0] == "None" else args[0] or 0.0 + return str(int(float(value))) class GetElementTransformer(lark.Transformer): @@ -313,8 +396,18 @@ def ESCAPED_STRING(self, args): return args[1:-1].replace("\\", "") -def format(query: str) -> str: - return FormatTransformer().transform(format_grammar.parse(query)) +def format(query: str, element: Optional[ifcopenshell.entity_instance] = None) -> str: + """Format a query string with optional element context for variable substitution. + + :param query: Format query string (can include {{variable}} placeholders) + :param element: Optional IFC element for variable substitution + :return: Formatted string + + Example: + format("{{z}} / 2", element) # Substitutes element's z value + format("imperial_length({{z}} / 2, 4)", element) # Uses z in calculation + """ + return FormatTransformer(element).transform(format_grammar.parse(query)) def get_element_value(element: ifcopenshell.entity_instance, query: str) -> Any: @@ -347,13 +440,13 @@ def _get_element_value(element: ifcopenshell.entity_instance, keys: list[str]) - elif key == "container": value = ifcopenshell.util.element.get_container(value) elif key == "space": - value = ifcopenshell.util.element.get_container(value, ifc_class="IfcSpace") + value = ifcopenshell.util.element.get_parent(value, ifc_class="IfcSpace") elif key == "storey": - value = ifcopenshell.util.element.get_container(value, ifc_class="IfcBuildingStorey") + value = ifcopenshell.util.element.get_parent(value, ifc_class="IfcBuildingStorey") elif key == "building": - value = ifcopenshell.util.element.get_container(value, ifc_class="IfcBuilding") + value = ifcopenshell.util.element.get_parent(value, ifc_class="IfcBuilding") elif key == "site": - value = ifcopenshell.util.element.get_container(value, ifc_class="IfcSite") + value = ifcopenshell.util.element.get_parent(value, ifc_class="IfcSite") elif key == "parent": value = ifcopenshell.util.element.get_parent(value) elif key in ("types", "occurrences"): @@ -386,7 +479,7 @@ def _get_element_value(element: ifcopenshell.entity_instance, keys: list[str]) - if key in ("x", "y", "z"): value = xyz["xyz".index(key)] else: - enh = ifcopenshell.util.geolocation.auto_xyz2enh(element.file, *xyz) + enh = ifcopenshell.util.geolocation.auto_xyz2enh(element.wrapped_data.file, *xyz) value = enh[("easting", "northing", "elevation").index(key)] else: value = None @@ -569,8 +662,8 @@ def set_predefined_type( element: ifcopenshell.entity_instance, value: Union[str, None], *, is_type: bool ) -> None: predefined_type = element.PredefinedType - declaration = element.declaration - entity = declaration + declaration = element.wrapped_data.declaration() + entity = declaration.as_entity() enum_attr = next(attr for attr in entity.attributes() if attr.name() == "PredefinedType") enum_items = ifcopenshell.util.attribute.get_enum_items(enum_attr) @@ -639,7 +732,9 @@ def set_predefined_type( except: # Try to cast data_type = ifcopenshell.util.attribute.get_primitive_type( - element.declaration.attribute_by_index(element.get_argument_index(key)) + element.wrapped_data.declaration() + .as_entity() + .attribute_by_index(element.wrapped_data.get_argument_index(key)) ) if data_type == "string": value = str(value) diff --git a/src/ifcopenshell-python/ifcopenshell/util/sequence.py b/src/ifcopenshell-python/ifcopenshell/util/sequence.py index 51f34b7226a..518d581657a 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/sequence.py +++ b/src/ifcopenshell-python/ifcopenshell/util/sequence.py @@ -17,13 +17,13 @@ # along with IfcOpenShell. If not, see . import datetime -import ifcopenshell.util.date -import ifcopenshell.util.element -from math import floor -from functools import cache -from typing import Union, Literal, Optional from collections.abc import Generator +from functools import cache +from math import floor +from typing import Literal, Optional, Union +import ifcopenshell.util.date +import ifcopenshell.util.element DURATION_TYPE = Literal["ELAPSEDTIME", "WORKTIME", "NOTDEFINED"] RECURRENCE_TYPE = Literal[ diff --git a/src/ifcopenshell-python/ifcopenshell/util/shape.py b/src/ifcopenshell-python/ifcopenshell/util/shape.py index 75c15b69353..7732412861c 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/shape.py +++ b/src/ifcopenshell-python/ifcopenshell/util/shape.py @@ -16,26 +16,36 @@ # You should have received a copy of the GNU Lesser General Public License # along with IfcOpenShell. If not, see . -import shapely -import shapely.ops +from __future__ import annotations + +from math import cos, radians +from typing import TYPE_CHECKING, Literal, Optional, Union + import numpy as np import numpy.typing as npt -import ifcopenshell.ifcopenshell_wrapper as W +import shapely +import shapely.ops + import ifcopenshell.util.element import ifcopenshell.util.placement import ifcopenshell.util.representation -from ifcopenshell.util.shape_builder import VectorType -from math import radians, cos -from ifcopenshell.geom import ShapeElementType -from typing import Optional, Literal, Union -tol = 1e-6 -AXIS_LITERAL = Literal["X", "Y", "Z"] -VECTOR_3D = tuple[float, float, float] +if TYPE_CHECKING: + + import ifcopenshell.ifcopenshell_wrapper as W + from ifcopenshell.geom import ShapeElementType + from ifcopenshell.util.shape_builder import VectorType + + AXIS_LITERAL = Literal["X", "Y", "Z"] + VECTOR_3D = tuple[float, float, float] + +# Used only for typing, but reused by `shape.py` users. MatrixType = npt.NDArray[np.float64] """`npt.NDArray[np.float64]`""" +tol = 1e-6 + # NOTE: See IfcGeomRepresentation.h for W.Triangulation buffer types. # NOTE: For functions that return a single scalar ensure to use .item() to diff --git a/src/ifcopenshell-python/ifcopenshell/util/shape_builder.py b/src/ifcopenshell-python/ifcopenshell/util/shape_builder.py index 0811b8798cd..e53d069a76b 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/shape_builder.py +++ b/src/ifcopenshell-python/ifcopenshell/util/shape_builder.py @@ -17,20 +17,21 @@ # along with IfcOpenShell. If not, see . from __future__ import annotations + +import collections.abc +from collections.abc import Sequence +from itertools import chain +from math import atan, cos, degrees, pi, radians, sin, sqrt, tan +from typing import TYPE_CHECKING, Any, Literal, Optional, Union + import numpy as np import numpy.typing as npt -import collections -import collections.abc + import ifcopenshell -import ifcopenshell.api import ifcopenshell.util.element import ifcopenshell.util.placement import ifcopenshell.util.representation import ifcopenshell.util.unit -from math import cos, sin, pi, tan, radians, degrees, atan, sqrt -from typing import Union, Optional, Literal, Any, TYPE_CHECKING -from collections.abc import Sequence -from itertools import chain PRECISION = 1.0e-5 @@ -39,7 +40,7 @@ # NOTE: mathutils is never used at runtime in ifcopenshell, # only for type checking to ensure methods are compatible with # Blender vectors. - from mathutils import Vector + from mathutils import Vector # pyright: ignore[reportMissingImports] # ty:ignore[unresolved-import] # Support both numpy arrays and python sequences as inputs. VectorType = Union[Sequence[float], Vector, np.ndarray] @@ -312,7 +313,7 @@ def polyline( Generate an IfcIndexedPolyCurve based on the provided points. :param points: List of 2d or 3d points - :param closed: Whether polyline should be closed. Default is `False` + :param closed: Whether polyline should be closed. :param position_offset: offset to be applied to all points :param arc_points: Indices of the middle points for arcs. For creating an arc segment, provide 3 points: `arc_start`, `arc_middle` and `arc_end` to `points` and add the `arc_middle` @@ -415,8 +416,9 @@ def get_rectangle_coords(size: VectorType = (1.0, 1.0), position: Optional[Vecto 3 2 0 1 - :param size: rectangle size, could be either 2d or 3d, defaults to `(1,1)` - :param position: rectangle position, default to `None`. + :param size: rectangle size, could be either 2d or 3d. + Use 0 for one of 3d dimensions to create 2d rectangle in 3d space. + :param position: rectangle position. if `position` not specified zero-vector will be used :return: list of rectangle coords """ @@ -441,9 +443,11 @@ def rectangle( """ Generate a rectangle polyline. - :param size: rectangle size, could be either 2d or 3d, defaults to `(1,1)` - :param position: rectangle position, default to `None`. - if `position` not specified zero-vector will be used + :param size: rectangle. + :param position: rectangle position. + + See ``get_rectangle_coords`` for more information. + :return: IfcIndexedPolyCurve """ return self.polyline(self.get_rectangle_coords(size, position), closed=True) @@ -513,11 +517,18 @@ def get_trim_points_from_mask( trim_points_mask: Sequence[int], position_offset: Optional[VectorType] = None, ) -> np.ndarray: - """Handy way to get edge points of the ellipse like shape of a given radiuses. + """Get cardinal-point coordinates of an ellipse by index mask. - Mask points are numerated from 0 to 3 ccw starting from (x_axis_radius/2; 0). + The four cardinal points are numbered 0–3 counter-clockwise starting from the + positive X axis: 0 → ``(x, 0)``, 1 → ``(0, y)``, 2 → ``(-x, 0)``, 3 → ``(0, -y)``. - Example: mask (0, 1, 2, 3) will return points (x, 0), (0, y), (-x, 0), (0, -y) + Example: mask ``(0, 1, 2, 3)`` returns all four points in order. + + :param x_axis_radius: Radius (semi-axis length) along the X axis. + :param y_axis_radius: Radius (semi-axis length) along the Y axis. + :param trim_points_mask: Sequence of cardinal-point indices (0–3) to select. + :param position_offset: Optional 2D offset added to all returned points. + :return: Numpy array of the selected 2D points. """ points = np.array( ( @@ -542,15 +553,23 @@ def create_ellipse_curve( ref_x_direction: VectorType = (1.0, 0.0), trim_points_mask: Sequence[int] = (), ) -> ifcopenshell.entity_instance: - """ - Ellipse trimming points should be specified in counter clockwise order. - - For example, if you need to get the part of the ellipse ABOVE y-axis, you need to use mask (0,2). Below y-axis - (2,0) - - For more information about trim_points_mask check builder.get_trim_points_from_mask - - Notion: trimmed ellipse also contains polyline between trim points, meaning IfcTrimmedCurve could be used - for further extrusion. + """Create an IfcEllipse, optionally trimmed to an arc. + + If neither ``trim_points`` nor ``trim_points_mask`` is provided, a full IfcEllipse is returned. + Trimming points must be given in counter-clockwise order. For example, to get the arc + above the Y-axis use mask ``(0, 2)``; below the Y-axis use ``(2, 0)``. + + A trimmed result (IfcTrimmedCurve) includes a closing segment between the trim points, + making it suitable for use as a profile in :meth:`extrude`. + + :param x_axis_radius: Semi-axis length along the local X axis. + :param y_axis_radius: Semi-axis length along the local Y axis. + :param position: 2D centre of the ellipse. + :param trim_points: Explicit pair of 2D trim points. Takes precedence over ``trim_points_mask``. + :param ref_x_direction: Direction of the local X axis. + :param trim_points_mask: Pair of cardinal-point indices (0–3) used when ``trim_points`` is empty. + See :meth:`get_trim_points_from_mask` for index definitions. + :return: IfcEllipse (untrimmed) or IfcTrimmedCurve (trimmed). """ ifc_position = self.create_axis2_placement_2d(position, ref_x_direction) ifc_ellipse = self.file.createIfcEllipse( @@ -681,6 +700,14 @@ def rotate_2d_point( pivot_point: VectorType = (0.0, 0.0), counter_clockwise: bool = False, ) -> np.ndarray: + """Rotate a single 2D point around a pivot. + + :param point_2d: The 2D point to rotate. + :param angle: Rotation angle, in degrees. Defaults to 90. + :param pivot_point: The point to rotate around. + :param counter_clockwise: If True, rotate counter-clockwise. Defaults to clockwise. + :return: Rotated 2D point as a numpy array. + """ angle_rad = radians(angle) * (1 if counter_clockwise else -1) relative_point = np.array(point_2d) - pivot_point relative_point = np_rotation_matrix(angle_rad, 2) @ relative_point @@ -748,7 +775,16 @@ def mirror_2d_point( mirror_axes: VectorType = (1.0, 1.0), mirror_point: VectorType = (0.0, 0.0), ) -> np.ndarray: - """mirror_axes - along which axes mirror will be applied""" + """Mirror a single 2D point across the specified axes. + + :param point_2d: The 2D point to mirror. + :param mirror_axes: Indicates which axes to mirror across. A positive value in a + component means that axis is mirrored (negated relative to ``mirror_point``). + Example: ``(1, 0)`` mirrors across the Y-axis (negates X only), + ``(1, 1)`` mirrors across both axes. + :param mirror_point: Origin of the mirror operation. + :return: Mirrored 2D point as a numpy array. + """ mirror_axes: np.ndarray = np.where(np.array(mirror_axes) > 0, -1, 1) mirror_point: np.ndarray = np.array(mirror_point) relative_point = point_2d - mirror_point @@ -784,7 +820,7 @@ def create_axis2_placement_3d_from_matrix( """ Create IfcAxis2Placement3D from numpy matrix. - :param matrix: 4x4 transformation matrix, defaults to `np.eye(4)` + :param matrix: 4x4 transformation matrix, defaults to ``np.eye(4)`` :return: IfcAxis2Placement3D """ if matrix is None: @@ -794,7 +830,13 @@ def create_axis2_placement_3d_from_matrix( def create_axis2_placement_2d( self, position: VectorType = (0.0, 0.0), x_direction: Optional[VectorType] = None ) -> ifcopenshell.entity_instance: - """Create IfcAxis2Placement2D.""" + """Create IfcAxis2Placement2D. + + :param position: 2D origin of the placement. + :param x_direction: Direction of the local X axis. If not provided, defaults to + the global X axis ``(1, 0)``. + :return: IfcAxis2Placement2D + """ ref_direction = ( self.file.create_entity("IfcDirection", ifc_safe_vector_type(x_direction)) if x_direction else None ) @@ -965,8 +1007,8 @@ def mirror( def sphere(self, radius: float = 1.0, center: VectorType = (0.0, 0.0, 0.0)) -> ifcopenshell.entity_instance: """ - :param radius: radius of the sphere, defaults to 1.0 - :param center: sphere position, defaults to `(0.0, 0.0, 0.0)` + :param radius: radius of the sphere. + :param center: sphere position. :return: IfcSphere """ @@ -996,7 +1038,7 @@ def half_space_solid( ) -> ifcopenshell.entity_instance: """ :param plane: The IfcPlane representing the half space. - :param agreement_flag: False if +Z represents the void + :param agreement_flag: If False (default), the plane normal points toward the **removed** material (the void). The kept region is on the opposite side from the normal. :return: IfcHalfSpaceSolid """ return self.file.createIfcHalfSpaceSolid(plane, AgreementFlag=agreement_flag) @@ -1049,7 +1091,14 @@ def extrude( def create_swept_disk_solid( self, path_curve: ifcopenshell.entity_instance, radius: float ) -> ifcopenshell.entity_instance: - """Create IfcSweptDiskSolid from `path_curve` (must be 3D) and `radius`""" + """Create an IfcSweptDiskSolid — a circular cross-section swept along a 3D path. + + Useful for modelling round pipes, conduits, and cables. + + :param path_curve: A 3D curve entity defining the centreline path. Must have ``Dim == 3``. + :param radius: Radius of the circular disk cross-section. + :return: IfcSweptDiskSolid + """ if path_curve.Dim != 3: raise Exception( f"Path curve for IfcSweptDiskSolid should be 3D to be valid, currently it has {path_curve.Dim} dimensions.\n" @@ -1067,10 +1116,22 @@ def get_representation( ) -> ifcopenshell.entity_instance: """Create IFC representation for the specified context and items. + **All items must belong to the same geometry category.** IFC prohibits + mixing incompatible item types in one representation (e.g. + ``IfcExtrudedAreaSolid`` with ``IfcBlock``, or solids with curves). + When ``representation_type`` is omitted the type is inferred via + :func:`ifcopenshell.util.representation.guess_type`; if the items are + heterogeneous ``guess_type`` returns ``None`` and the representation is + written with no ``RepresentationType``, which fails IFC validation. + Avoid mixing swept-solid primitives (``IfcExtrudedAreaSolid``, + ``IfcRevolvedAreaSolid``) with CSG primitives (``IfcBlock``, + ``IfcSphere``, etc.) or any other category in a single call. + :param context: IfcGeometricRepresentationSubContext - :param items: could be a list or single curve/IfcExtrudedAreaSolid - :param representation_type: Explicitly specified RepresentationType, defaults to `None`. - If not provided it will be guessed from the items types + :param items: A single item or list of items, all of the same geometry + category (e.g. all ``IfcExtrudedAreaSolid``, all ``IfcIndexedPolyCurve``) + :param representation_type: Explicitly specified RepresentationType. + If not provided it will be guessed from the items types. :return: IfcShapeRepresentation """ if not isinstance(items, collections.abc.Iterable): @@ -1092,18 +1153,26 @@ def get_representation( ) def deep_copy(self, element: ifcopenshell.entity_instance) -> ifcopenshell.entity_instance: + """Create a deep copy of an IFC element and all its referenced entities. + + :param element: The IFC entity to copy. + :return: A new independent copy of the element. + """ return ifcopenshell.util.element.copy_deep(self.file, element) # UTILITIES def extrude_kwargs(self, axis: Literal["Y", "X", "Z"]) -> dict[str, tuple[float, float, float]]: - """Shortcut to get kwargs for `ShapeBuilder.extrude` to extrude by some axis. + """Shortcut to get kwargs for :meth:`extrude` to extrude along a principal axis. - It assumes you have 2D profile in: - XZ plane for Y axis extrusion, \n - YZ plane for X axis extrusion, \n - XY plane for Z axis extrusion, \n + Assumes the 2D profile lies in the plane perpendicular to the extrusion axis: + XZ plane for Y-axis extrusion, YZ plane for X-axis extrusion, XY plane for Z-axis extrusion. - Extruding by X/Y using other kwargs might break ValidExtrusionDirection.""" + Extruding along X or Y with other kwargs may violate the IFC ValidExtrusionDirection constraint. + + :param axis: The extrusion axis: ``'X'``, ``'Y'``, or ``'Z'``. + :return: A dict with keys ``position_x_axis``, ``position_z_axis``, and ``extrusion_vector`` + suitable for passing as ``**kwargs`` to :meth:`extrude`. + """ if axis == "Y": return { @@ -1127,13 +1196,16 @@ def extrude_kwargs(self, axis: Literal["Y", "X", "Z"]) -> dict[str, tuple[float, def rotate_extrusion_kwargs_by_z( self, kwargs: dict[str, Any], angle: float, counter_clockwise: bool = False ) -> dict[str, VectorType]: - """shortcut to rotate extrusion kwargs by z axis - - `kwargs` expected to have `position_x_axis` and `position_z_axis` keys + """Rotate extrusion kwargs around the Z axis. - `angle` is a rotation value in radians + A shortcut to rotate the ``position_x_axis`` and ``position_z_axis`` values returned by + :meth:`extrude_kwargs` around the Z axis before passing them to :meth:`extrude`. - by default rotation is clockwise, to make it counter clockwise use `counter_clockwise` flag + :param kwargs: A dict with ``position_x_axis`` and ``position_z_axis`` keys, + as returned by :meth:`extrude_kwargs`. The original dict is not mutated. + :param angle: Rotation angle, in radians. + :param counter_clockwise: If True, rotate counter-clockwise. Defaults to clockwise. + :return: A new dict with ``position_x_axis`` and ``position_z_axis`` rotated around Z. """ rot = np_rotation_matrix(-angle, 3, "Z") kwargs = kwargs.copy() # prevent mutation of original kwargs @@ -1142,7 +1214,11 @@ def rotate_extrusion_kwargs_by_z( return kwargs def get_polyline_coords(self, polyline: ifcopenshell.entity_instance) -> np.ndarray: - """polyline should be either `IfcIndexedPolyCurve` or `IfcPolyline`""" + """Extract the coordinate array from a polyline entity. + + :param polyline: An ``IfcIndexedPolyCurve`` or ``IfcPolyline`` entity. + :return: Numpy array of the polyline's point coordinates. + """ coords = None if polyline.is_a("IfcIndexedPolyCurve"): coords = np.array(polyline.Points.CoordList) @@ -1153,7 +1229,12 @@ def get_polyline_coords(self, polyline: ifcopenshell.entity_instance) -> np.ndar return coords def set_polyline_coords(self, polyline: ifcopenshell.entity_instance, coords: SequenceOfVectors) -> None: - """polyline should be either `IfcIndexedPolyCurve` or `IfcPolyline`""" + """Update the coordinates of a polyline entity in-place. + + :param polyline: An ``IfcIndexedPolyCurve`` or ``IfcPolyline`` entity. + :param coords: New sequence of point coordinates. Must contain the same number of + points as the original polyline. + """ if polyline.is_a("IfcIndexedPolyCurve"): polyline.Points.CoordList = ifc_safe_vector_type(coords) elif polyline.is_a("IfcPolyline"): @@ -1180,8 +1261,8 @@ def get_simple_2dcurve_data( :param fillets: list of points from `coords` to base fillet on. Example: (1,) :param fillet_radius: list of fillet radius for each of corresponding point form `fillets`. Example: (5.,) Note: `fillet_radius` could be just 1 float value if it's the same for all fillets. - :param closed: boolean whether curve should be closed (whether last point connected to first one). Default: True - :param create_ifc_curve: create IfcIndexedPolyCurve or just return the data. Default: False + :param closed: boolean whether curve should be closed (whether last point connected to first one). + :param create_ifc_curve: create IfcIndexedPolyCurve or just return the data. :return: (points, segments, ifc_curve) for the created simple curve if both points in e are equally far from pt, then v1 is returned. @@ -1292,6 +1373,18 @@ def create_z_profile_lips_curve( WallThickness: float, FilletRadius: float, ) -> ifcopenshell.entity_instance: + """Create a Z-profile (cold-formed steel section) outline curve with lips and fillets. + + All dimensions are in the IFC project's length units. + + :param FirstFlangeWidth: Width of the first (top) flange, measured from the web centreline. + :param SecondFlangeWidth: Width of the second (bottom) flange, measured from the web centreline. + :param Depth: Total depth of the section (web height). + :param Girth: Length of the return lips on each flange. + :param WallThickness: Uniform material thickness. + :param FilletRadius: Inner bend radius at each corner. + :return: IfcIndexedPolyCurve representing the closed Z-profile outline. + """ x1 = FirstFlangeWidth x2 = SecondFlangeWidth y = Depth / 2 @@ -1333,10 +1426,17 @@ def create_z_profile_lips_curve( def create_transition_arc_ifc( self, width: float, height: float, create_ifc_curve: bool = False ) -> tuple[SequenceOfVectors, list[list[int]], Union[ifcopenshell.entity_instance, None]]: - """Create an arc in the rectangle with specified width and height. + """Create an arc fitting inside a rectangle of the given width and height. + + If a single arc cannot span the full width, the longest possible radius is used and + a straight segment is inserted in the middle. - If it's not possible to make a complete arc, create an arc with longest radius possible - and straight segment in the middle. + :param width: Width of the bounding rectangle. + :param height: Height of the bounding rectangle (also the maximum arc radius). + :param create_ifc_curve: If True, also create and return an ``IfcIndexedPolyCurve``. + If False, only return the raw point and segment data. + :return: A tuple ``(points, segments, ifc_curve)`` where ``ifc_curve`` is an + ``IfcIndexedPolyCurve`` when ``create_ifc_curve=True``, otherwise ``None``. """ fillet_size = (width / 2) / height if fillet_size <= 1: @@ -1366,6 +1466,14 @@ def create_transition_arc_ifc( return points, segments, transition_arc def mesh(self, points: SequenceOfVectors, faces: Sequence[Sequence[int]]) -> ifcopenshell.entity_instance: + """Create a tessellated mesh from points and face indices. + + Delegates to :meth:`faceted_brep` for IFC2X3, or :meth:`polygonal_face_set` for IFC4 and later. + + :param points: List of 3D coordinates. + :param faces: List of faces, each face a sequence of zero-based point indices. + :return: IfcFacetedBrep (IFC2X3) or IfcPolygonalFaceSet (IFC4+). + """ if self.file.schema == "IFC2X3": return self.faceted_brep(points, faces) return self.polygonal_face_set(points, faces) @@ -1460,10 +1568,10 @@ def extrude_face_set( :param points: list of points, assuming they form consecutive closed polyline. :param magnitude: extrusion magnitude - :param extrusion_vector: extrusion direction, by default it's extruding by Z+ axis + :param extrusion_vector: extrusion direction. :param offset: offset from the points - :param start_cap: if True, create start cap, by default it's True - :param end_cap: if True, create end cap, by default it's True + :param start_cap: if True, create start cap. + :param end_cap: if True, create end cap. :return: IfcPolygonalFaceSet """ @@ -1719,11 +1827,20 @@ def mep_transition_length( angle: float, profile_offset: VectorType = (0.0, 0.0), verbose: bool = True, - ): - """get the final transition length for two profiles dimensions, angle and XY offset between them, - - the difference from `calculate_transition` - `get_transition_length` is making sure - that length will fit both sides of the transition + ) -> Optional[float]: + """Get the transition length for two profile half-dimensions, an angle, and an XY offset. + + Unlike :meth:`mep_transition_calculate`, this method checks that the resulting length + satisfies the angle constraint from both the start and end profile perspectives. + + :param start_half_dim: Half-dimensions of the start profile as a 3-element array + ``[half_x, half_y, depth]``. For circular profiles ``half_x == half_y == radius``. + :param end_half_dim: Half-dimensions of the end profile in the same format. + :param angle: Maximum allowed transition angle, in degrees. + :param profile_offset: 2D XY offset between the centrelines of the start and end profiles. + :param verbose: If True, print diagnostic values during calculation. + :return: Transition length in project length units, or ``None`` if no valid length exists + for the given angle and offset. """ print = lambda *args, **kwargs: __builtins__["print"](*args, **kwargs) if verbose else None np_X, np_Y = 0, 1 @@ -1784,9 +1901,23 @@ def mep_transition_calculate( angle: Optional[float] = None, verbose: bool = True, ) -> Union[float, None]: - """will return transition length based on the profile dimension differences and offset. - - If `length` is provided will return transition angle""" + """Calculate MEP transition length from angle, or transition angle from length. + + Low-level calculation kernel used by :meth:`mep_transition_length`. Provide either + ``angle`` or ``length`` (not both); the other value is computed and returned. + + :param start_half_dim: Half-dimensions of the start profile ``[half_x, half_y, depth]``. + :param end_half_dim: Half-dimensions of the end profile ``[half_x, half_y, depth]``. + :param offset: 2D XY offset between profile centrelines. + :param diff: Pre-computed absolute difference of start and end half-dimensions (XY only). + Computed from ``start_half_dim`` and ``end_half_dim`` if not provided. + :param end_profile: If True, swap X and Y axes to compute from the end-profile perspective. + :param length: Known transition length. If provided, the corresponding angle is returned. + :param angle: Known transition angle, in degrees. If provided, the corresponding length is returned. + :param verbose: If True, print diagnostic values during calculation. + :return: Transition length (if ``angle`` was given) or transition angle in degrees + (if ``length`` was given), or ``None`` if the geometry is not feasible. + """ print = lambda *args, **kwargs: __builtins__["print"](*args, **kwargs) if verbose else None diff --git a/src/ifcopenshell-python/ifcopenshell/util/system.py b/src/ifcopenshell-python/ifcopenshell/util/system.py index 832daefc0d6..39d92070180 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/system.py +++ b/src/ifcopenshell-python/ifcopenshell/util/system.py @@ -17,8 +17,9 @@ # along with IfcOpenShell. If not, see . +from typing import Literal, Optional, Union + import ifcopenshell.util.system -from typing import Optional, Union, Literal group_types: dict[str, tuple[str, ...]] = { "IfcZone": ("IfcZone", "IfcSpace", "IfcSpatialZone"), diff --git a/src/ifcopenshell-python/ifcopenshell/util/type.py b/src/ifcopenshell-python/ifcopenshell/util/type.py index ab588142671..86a1ccc7c94 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/type.py +++ b/src/ifcopenshell-python/ifcopenshell/util/type.py @@ -16,8 +16,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with IfcOpenShell. If not, see . -import os import json +import os + import ifcopenshell.util.schema cwd = os.path.dirname(os.path.realpath(__file__)) diff --git a/src/ifcopenshell-python/ifcopenshell/util/unit.py b/src/ifcopenshell-python/ifcopenshell/util/unit.py index c45d226128b..7e4686d4e4a 100644 --- a/src/ifcopenshell-python/ifcopenshell/util/unit.py +++ b/src/ifcopenshell-python/ifcopenshell/util/unit.py @@ -16,14 +16,13 @@ # You should have received a copy of the GNU Lesser General Public License # along with IfcOpenShell. If not, see . +from collections.abc import Generator from fractions import Fraction from math import pi from typing import Literal, Optional, Union -from collections.abc import Generator import ifcopenshell import ifcopenshell.ifcopenshell_wrapper as ifcopenshell_wrapper -import ifcopenshell.api.unit prefixes = { "EXA": 1e18, @@ -209,6 +208,7 @@ "pound": 0.454, "ton UK": 1016.0469088, "ton US": 907.18474, + "tonne": 1000.0, "lbf": 4.4482216153, "kip": 4448.2216153, "psi": 6894.7572932, @@ -253,6 +253,7 @@ "pound": "MASSUNIT", "ton UK": "MASSUNIT", "ton US": "MASSUNIT", + "tonne": "MASSUNIT", "lbf": "FORCEUNIT", "kip": "FORCEUNIT", "psi": "PRESSUREUNIT", @@ -323,6 +324,7 @@ "pound": "lb", "ton UK": "ton", "ton US": "ton", + "tonne": "t", "lbf": "lbf", "kip": "kip", "psi": "psi", @@ -475,7 +477,7 @@ def get_property_unit( measure_class = None if prop.is_a("IfcPhysicalSimpleQuantity"): - entity = prop.declaration + entity = prop.wrapped_data.declaration().as_entity() measure_class = entity.attribute_by_index(3).type_of_attribute().declared_type().name() elif prop.is_a("IfcPropertySingleValue"): measure_class = prop.NominalValue.is_a() @@ -866,16 +868,16 @@ def iter_element_and_attributes_per_type(ifc_file: ifcopenshell.file, attr_type_ def convert_file_length_units(ifc_file: ifcopenshell.file, target_units: str = "METER") -> ifcopenshell.file: """Converts all units in an IFC file to the specified target units. Returns a new file.""" - import ifcopenshell.util.element - import ifcopenshell.util.geolocation import ifcopenshell.api.georeference import ifcopenshell.api.unit + import ifcopenshell.util.element + import ifcopenshell.util.geolocation prefix = get_prefix(target_units) si_unit = get_unit_name(target_units) # Copy all elements from the original file to the patched file - file_patched = ifcopenshell.file.from_string(ifc_file.to_string()) + file_patched = ifcopenshell.file.from_string(ifc_file.wrapped_data.to_string()) old_length = get_project_unit(file_patched, "LENGTHUNIT") if si_unit: From b13795341919ce2708a5f4f35c6bf5331e524c98 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sat, 11 Apr 2026 16:27:06 +1000 Subject: [PATCH 02/37] Local hacks to compile and monkey patch issues in the Python world All AI generated slop. Do NOT trust these "fixes". It's just to get it working on my machine. --- build.sh | 30 ++ cmake/CMakeLists.txt | 3 +- findings.md | 317 ++++++++++++++++++ .../ifcopenshell/__init__.py | 3 + .../ifcopenshell/entity_instance.py | 52 ++- src/ifcparse/spf_header.h | 2 +- src/ifcparse/storage.h | 1 + 7 files changed, 404 insertions(+), 4 deletions(-) create mode 100755 build.sh create mode 100644 findings.md diff --git a/build.sh b/build.sh new file mode 100755 index 00000000000..939c4ae4435 --- /dev/null +++ b/build.sh @@ -0,0 +1,30 @@ +#!/bin/sh +set -e + +mkdir -p build && cd build + +cmake ../cmake \ + -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DPython_EXECUTABLE=/home/dion/Projects/env/bin/python3.11 \ + -DPython_INCLUDE_DIR=/usr/include/python3.11 \ + -DBUILD_IFCPYTHON=ON \ + -DBUILD_IFCGEOM=ON \ + -DBUILD_CONVERT=ON \ + -DBUILD_GEOMSERVER=OFF \ + -DBUILD_EXAMPLES=OFF \ + -DWITH_OPENCASCADE=ON \ + -DWITH_CGAL=ON \ + -DWITH_MANIFOLD=ON \ + -DHDF5_SUPPORT=OFF \ + -DGLTF_SUPPORT=ON \ + -DIFCXML_SUPPORT=OFF \ + -DCOLLADA_SUPPORT=OFF \ + -DSCHEMA_VERSIONS="2x3;4;4x3_add2" \ + -DOCC_INCLUDE_DIR=/usr/include/opencascade \ + -DOCC_LIBRARY_DIR=/usr/lib64/opencascade + +ninja + +cp ifcwrap/_ifcopenshell_wrapper*.so ifcwrap/ifcopenshell_wrapper.py \ + ../src/ifcopenshell-python/ifcopenshell/ diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index cbc67041792..8a0518c87f1 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -332,7 +332,8 @@ if(WASM_BUILD) else() # @todo review this, shouldn't this be all possible header-only now? # ... or rewritten using C++17 features? - set(BOOST_COMPONENTS system program_options regex thread date_time iostreams) + # set(BOOST_COMPONENTS system program_options regex thread date_time iostreams) + set(BOOST_COMPONENTS program_options regex thread date_time iostreams) endif() if(USE_MMAP) diff --git a/findings.md b/findings.md new file mode 100644 index 00000000000..ea211e59528 --- /dev/null +++ b/findings.md @@ -0,0 +1,317 @@ +# Build fix: remove `boost_system` from CMake components + +`Boost.System` became header-only in Boost 1.69. Boost 1.90.0 no longer ships a compiled library or CMake config for it, so `find_package(Boost REQUIRED COMPONENTS system ...)` fails. + +## Fix + +`cmake/CMakeLists.txt`: + +```diff +- set(BOOST_COMPONENTS system program_options regex thread date_time iostreams) ++ set(BOOST_COMPONENTS program_options regex thread date_time iostreams) +``` + +The headers are still available; no linking is needed. + +# Build fix: add `template` keyword for dependent template member calls + +Calling a template member function through a dependent expression (e.g. `storage->has_attribute_value(...)` where `storage`'s type depends on a template parameter) requires the `template` keyword to disambiguate from a less-than comparison. + +## Error + +``` +src/ifcparse/IfcParse.cpp:1856:67: error: expected primary-expression before '>' token + 1856 | if (storage->has_attribute_value(attr_index)) { + | ^ +``` + +Six identical errors at lines 1856, 1865, 1896, 1905, 1934, 1943. + +## Fix + +`src/ifcparse/IfcParse.cpp`: + +```diff +-storage->has_attribute_value(attr_index) ++storage->template has_attribute_value(attr_index) + +-storage->has_attribute_value(attr_index) ++storage->template has_attribute_value(attr_index) +``` + +Applied at all six call sites in `in_memory_file_storage::read_from_stream`. + +# Linker fix: missing explicit template instantiations for `InstanceStreamer` + +`InstanceStreamer` is a class template with methods defined in `IfcParse.cpp`, not the header. Without explicit instantiations, the linker can't find the symbols when the SWIG wrapper loads. + +## Error + +``` +ImportError: undefined symbol: _ZN8IfcParse16InstanceStreamerINS_10FileReaderINS_14FullBufferImplEEEEC1EPS3_PNS_7IfcFileE + (IfcParse::InstanceStreamer>::InstanceStreamer(FileReader*, IfcFile*)) +``` + +## Fix + +Cannot use `template class InstanceStreamer<...>` because some constructors have `static_assert` guards that reject certain reader types. Instead, instantiate each member function individually per reader type, only including the constructors valid for that type. + +`src/ifcparse/IfcParse.cpp` (after the last `InstanceStreamer` method definition): + +```cpp +// FullBufferImpl +template IfcParse::InstanceStreamer>::InstanceStreamer(IfcParse::IfcFile*); +template IfcParse::InstanceStreamer>::InstanceStreamer(const std::string&, bool, IfcParse::IfcFile*); +template IfcParse::InstanceStreamer>::InstanceStreamer(void*, int, IfcParse::IfcFile*); +template IfcParse::InstanceStreamer>::InstanceStreamer(FileReader*, IfcParse::IfcFile*); +// ... plus ensure_header, initialize_header, hasSemicolon, semicolonCount, +// pushPage, bypassTypes, readInstance + +// PushedSequentialImpl — same pattern, different valid constructors + +// MMapFileReader (ifdef USE_MMAP) — same pattern +``` + +# Linker fix: `FullBufferImpl` missing buffer constructor + +SWIG's `stream_from_string` calls `InstanceStreamer>(void*, int, IfcFile*)`, but the `(void*, int)` constructor previously hit a `static_assert` for `FullBufferImpl` — it only allowed `PushedSequentialImpl`. + +## Error + +``` +ImportError: undefined symbol: _ZN8IfcParse16InstanceStreamerINS_10FileReaderINS_14FullBufferImplEEEEC1EPviPNS_7IfcFileE + (InstanceStreamer>::InstanceStreamer(void*, int, IfcFile*)) +``` + +## Fix + +Three changes to make `FullBufferImpl` support buffer-based and default construction: + +`src/ifcparse/FileReader.h` — add buffer constructor to `FullBufferImpl`: + +```diff + class IFC_PARSE_API FullBufferImpl { + public: + explicit FullBufferImpl(const std::string& fn); ++ FullBufferImpl(void* data, size_t length); +``` + +`src/ifcparse/FileReader.h` — add `FileReader(void*, size_t)` forwarding constructor: + +```diff ++ FileReader(void* data, size_t length) ++ : cursor_(0) { ++ if constexpr (std::is_same_v) { ++ impl_ = std::make_shared(data, length); ++ } else { ++ static_assert(...); ++ } ++ } +``` + +`src/ifcparse/FileReader.cpp` — implement the constructor: + +```cpp +FullBufferImpl::FullBufferImpl(void* data, size_t length) + : buf_(static_cast(data), static_cast(data) + length) + , size_(length) { +} +``` + +`src/ifcparse/IfcParse.cpp` — extend the two `InstanceStreamer` constructors to accept `FullBufferImpl`: + +```diff + // InstanceStreamer(IfcFile*): ++ } else if constexpr (std::is_same_v>) { ++ owned_stream_ = std::make_unique(nullptr, (size_t)0); + + // InstanceStreamer(void*, int, IfcFile*): ++ } else if constexpr (std::is_same_v>) { ++ owned_stream_ = std::make_unique(data, (size_t)length); +``` + +# Runtime fix: segfault in `parse_context::push()` due to vector reallocation + +`parse_context_pool` stores nodes in a `std::vector`. During parsing, `load()` takes a `parse_context&` parameter and calls `context.push()`, which calls `pool_->make()`. If the pool's vector reallocates (via `emplace_back`), all existing references into the vector — including the `context` reference held by the caller — become dangling. Subsequent access through the dangling reference causes a segfault. + +Triggered by larger IFC files (e.g. `ISSUE_159_kleine_Wohnung_R22.ifc`, 9.5 MB) that cause enough pool growth to trigger reallocation. + +## Error + +``` +Thread 1 received signal SIGSEGV, Segmentation fault. +0x... in IfcParse::parse_context::push() + #1 in_memory_file_storage::load(...) // context& is dangling after reallocation + #2 in_memory_file_storage::load(...) // parent call + #3 InstanceStreamer::readInstance() +``` + +## Fix + +`src/ifcparse/storage.h` — change the pool container from `std::vector` to `std::deque`, which does not invalidate references on `push_back`/`emplace_back`: + +```diff ++#include + + struct parse_context_pool { +- std::vector nodes_; ++ std::deque nodes_; +``` + +# Runtime fix: `express::Base` comparison operators throw on null/expired instances + +`express::Base::operator<` and `operator==` called `data()`, which throws `std::runtime_error("Trying to access deleted instance reference")` when the internal `weak_ptr` is expired. A default-constructed `express::Base` (the value-type equivalent of a null pointer) always has an expired `weak_ptr`. + +## Why this model triggers it + +The bug requires two conditions to coincide: + +1. A representation is shared by **more than one product** (via `IfcRepresentationMap` / `IfcMappedItem`). +2. At least one of those products has **no material association**, so `get_single_material_association()` returns `express::Base{}` (the null equivalent). + +In `advanced_model.ifc`, Body representations like `#449` (Body/Brep) have a single `IfcRepresentationMap` (`#453`) with 13 `IfcMappedItem` usages, meaning 13 products share the geometry. Some of those products (e.g. `IfcFlowTerminal` instances) have no `IfcRelAssociatesMaterial`, so `get_single_material_association` returns `express::Base{}`. + +Smaller or simpler models don't hit this because either: +- Every representation maps to only 1 product → `reuse_ok_` short-circuits at `products.size() == 1` before reaching the material check. +- Every product has a material association → no null `express::Base` is ever inserted into the set. + +## Exact call sequence + +``` +Iterator::initialize() + try { + mapping::get_representations(reps, filters_) + addRepresentationsFromDefaultContexts(representations) + → collects reps from subcontexts in order: + Axis (#115): 143 reps + Body (#117): 7550 reps + FootPrint (#119): 12 reps + + for (auto representation : representations): + + ── Axis reps (indices 0–142) ────────────────────────── + products_represented_by(rep, rmap) + → OfProductRepresentation: 1 product each + filter_products(products, filters) → 1 product + reuse_ok_(ifcproducts) + → products.size() == 1 → return true ← SHORT-CIRCUIT, no material check + representation_mapped_to(rep) → null (no MappedItem) + → task created. 143 tasks accumulated. + + ── First Body rep #449 (Body/Brep) ──────────────────── + products_represented_by(#449, rmap) + → OfProductRepresentation: empty + → RepresentationMap: 1 map (#453) + → MapUsage: 13 MappedItems → traces through to 13 IfcProducts + filter_products(products, filters) → 13 products + reuse_ok_(ifcproducts) ← CRASH HERE + → products.size() == 1? NO (13 products) + → for each product: + find_openings(product) → OK + get_single_material_association(product) + → some products have no IfcRelAssociatesMaterial + → returns express::Base{} (expired weak_ptr) + associated_single_materials.insert(result) + → std::set::insert calls operator< + → operator< calls data() + → data() calls data_.lock() → expired → THROWS + "Trying to access deleted instance reference" + + } catch (const std::exception& e) { + Logger::Error(e) ← exception caught here, get_representations aborted + } + + → reps contains only the 143 Axis tasks created before the throw + → all 143 Axis reps have Curve2D geometry → map(representation) returns null + → no valid elements produced → initialize() returns false +``` + +In the old pointer-based code, `reuse_ok_` used `std::set` and `get_single_material_association` returned `nullptr`. Inserting `nullptr` into a `std::set` is a plain pointer comparison — no dereference, no throw. The refactoring to `std::set` changed the comparison from pointer comparison to `express::Base::operator<`, which unconditionally dereferences through `data()`. + +## Error + +``` +[Error] Trying to access deleted instance reference +[Notice] Created 143 tasks for 143 products ← only Axis reps; all Body reps lost +initialize() returned: False +``` + +## Fix + +`src/ifcparse/express.h` — use `weak_ptr::lock().get()` instead of `data()` so that expired pointers compare as `nullptr` (matching old raw-pointer semantics): + +```diff + bool operator<(const Base& other) const { +- return data() < other.data(); ++ auto a = data_.lock(); ++ auto b = other.data_.lock(); ++ return a.get() < b.get(); + } + + bool operator==(const Base& other) const { +- return data() == other.data(); ++ auto a = data_.lock(); ++ auto b = other.data_.lock(); ++ return a.get() == b.get(); + } +``` + +# Runtime fix: `entity_instance` missing `get_inverse` due to SWIG `%rename` collision + +Accessing inverse attributes (e.g. `element.IsDecomposedBy`) on any entity raises `AttributeError: entity instance of type 'IFC2X3.IfcProject' has no attribute 'get_inverse'`. + +## Why + +`entity_instance_mixin.__getattr__` (line 106 of `entity_instance.py`) calls `self.get_inverse(name)` when it detects an inverse attribute. Since the mixin inherits into the SWIG-generated `entity_instance` class (via the `object = custom_base` hack in `IfcParseWrapper.i:936`), `self.get_inverse` must resolve to a method on the SWIG class. + +However, `IfcParseWrapper.i:70` has a global rename: + +``` +%rename("get_inverses_by_declaration") get_inverse; +``` + +This was intended for `ifcopenshell::file::get_inverse` (which takes an entity + declaration and returns instances by reference), but SWIG `%rename` is global — it also renames the `%extend express::Base` method `get_inverse(const std::string& a)` at line 551. So the Python-side `entity_instance` class exposes the method as `get_inverses_by_declaration`, not `get_inverse`. + +The old code (`v0.8.0`) didn't hit this because `__getattr__` called `self.wrapped_data.get_inverse(name)` on an inner `ifcopenshell_wrapper.entity_instance` object — but in that old layout, the inner object was constructed differently and the rename didn't apply the same way (or the method had a different path). In the new mixin approach, `self` **is** the SWIG object, so the rename is directly visible. + +## Fix + +`src/ifcwrap/IfcParseWrapper.i` — override the global rename specifically for `express::Base::get_inverse`, restoring the original name on entity instances: + +```diff ++%rename("get_inverse") express::Base::get_inverse; + %rename("get_inverses_by_declaration") get_inverse; +``` + +Add this line **before** the global rename (or anywhere before the `%extend express::Base` block). This scoped rename takes precedence for `express::Base`, so: +- `entity_instance.get_inverse(name)` works as the mixin expects +- `file.get_inverses_by_declaration(...)` keeps its intended name + +## Python-side workaround + +`entity_instance.py:106` — call the method by its SWIG-renamed name: + +```diff +- vs = self.get_inverse(name) ++ vs = self.get_inverses_by_declaration(name) +``` + +# Runtime fix: `entity_instance` class no longer importable from `entity_instance` module + +The class rename from `entity_instance` to `entity_instance_mixin` broke external code that does `from ifcopenshell.entity_instance import entity_instance`. + +## Error + +``` +ImportError: cannot import name 'entity_instance' from 'ifcopenshell.entity_instance' +``` + +Triggered at import time via `ifcopenshell.util.pset` (and likely other modules). + +## Fix + +`src/ifcopenshell-python/ifcopenshell/entity_instance.py` — add a backwards-compatible alias at the bottom of the module: + +```python +entity_instance = entity_instance_mixin +``` diff --git a/src/ifcopenshell-python/ifcopenshell/__init__.py b/src/ifcopenshell-python/ifcopenshell/__init__.py index faafc055605..c9cffd5c6bc 100644 --- a/src/ifcopenshell-python/ifcopenshell/__init__.py +++ b/src/ifcopenshell-python/ifcopenshell/__init__.py @@ -94,6 +94,9 @@ from .file import rocksdb_lazy_instance from . import guid from .ifcopenshell_wrapper import entity_instance +from .entity_instance import _patch_swig_comparisons +_patch_swig_comparisons() +del _patch_swig_comparisons from .sql import sqlite, sqlite_entity # explicitly specify available imported symbols diff --git a/src/ifcopenshell-python/ifcopenshell/entity_instance.py b/src/ifcopenshell-python/ifcopenshell/entity_instance.py index 121d0c68843..b7f5d1ac7c6 100644 --- a/src/ifcopenshell-python/ifcopenshell/entity_instance.py +++ b/src/ifcopenshell-python/ifcopenshell/entity_instance.py @@ -103,7 +103,7 @@ def __getattr__(self, name: str) -> Any: idx = self.get_argument_index(name) return self.get_argument(idx) elif attr_cat == INVERSE: - vs = self.get_inverse(name) + vs = self.get_inverses_by_declaration(name) if settings.unpack_non_aggregate_inverses: schema_name = self.is_a(True).split(".")[0] ent: ifcopenshell_wrapper.entity @@ -213,11 +213,17 @@ def __setitem__(self, idx: int, value: T) -> T: return value def __eq__(self, other: entity_instance_mixin) -> bool: - if not isinstance(self, type(other)): + if other is None or not isinstance(other, entity_instance_mixin): return False else: raise NotImplementedError + def __ne__(self, other: entity_instance_mixin) -> bool: + if other is None or not isinstance(other, entity_instance_mixin): + return True + else: + raise NotImplementedError + def is_entity(self) -> bool: """Tests whether the instance is an entity type as opposed to a simple data type. @@ -395,3 +401,45 @@ def get_info_2( assert return_type is dict assert len(ignore) == 0 return ifcopenshell_wrapper.get_info_cpp(self, recursive, include_identifier) + + +# Alias for backwards compatibility — external code imports this name. +entity_instance = entity_instance_mixin + + +# Monkey-patch SWIG's __eq__, __ne__, __lt__ on the generated entity_instance +# class to guard against None / non-entity arguments. SWIG generates these +# directly on the class (overriding the mixin), and they pass arguments straight +# to C++ which rejects null references. +# Deferred until after ifcopenshell_wrapper finishes loading to avoid circular import. +_swig_comparisons_patched = False + + +def _patch_swig_comparisons(): + global _swig_comparisons_patched + if _swig_comparisons_patched: + return + _swig_cls = ifcopenshell_wrapper.entity_instance + _orig_eq = _swig_cls.__eq__ + _orig_ne = _swig_cls.__ne__ + _orig_lt = _swig_cls.__lt__ + + def _safe_eq(self, other): + if other is None or not isinstance(other, _swig_cls): + return NotImplemented + return _orig_eq(self, other) + + def _safe_ne(self, other): + if other is None or not isinstance(other, _swig_cls): + return NotImplemented + return _orig_ne(self, other) + + def _safe_lt(self, other): + if other is None or not isinstance(other, _swig_cls): + return NotImplemented + return _orig_lt(self, other) + + _swig_cls.__eq__ = _safe_eq + _swig_cls.__ne__ = _safe_ne + _swig_cls.__lt__ = _safe_lt + _swig_comparisons_patched = True diff --git a/src/ifcparse/spf_header.h b/src/ifcparse/spf_header.h index 9daf4a25e80..efd6304553a 100644 --- a/src/ifcparse/spf_header.h +++ b/src/ifcparse/spf_header.h @@ -30,7 +30,7 @@ class file; class IFC_PARSE_API spf_header { private: - file* file_; + ifcopenshell::file* file_; std::array, 3> header_entities_; diff --git a/src/ifcparse/storage.h b/src/ifcparse/storage.h index 06592b84777..38ada04cd81 100644 --- a/src/ifcparse/storage.h +++ b/src/ifcparse/storage.h @@ -31,6 +31,7 @@ namespace rocksdb { #include #include #include +#include #include #include #include From 92372f4324979001a6620217f0447f10d3b58020 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sat, 11 Apr 2026 16:30:10 +1000 Subject: [PATCH 03/37] Dump of hello world ifc viewer code --- cmake/CMakeLists.txt | 4 + src/ifcviewer/AppSettings.cpp | 57 +++ src/ifcviewer/AppSettings.h | 48 ++ src/ifcviewer/CMakeLists.txt | 61 +++ src/ifcviewer/GeometryStreamer.cpp | 285 ++++++++++++ src/ifcviewer/GeometryStreamer.h | 89 ++++ src/ifcviewer/MainWindow.cpp | 270 ++++++++++++ src/ifcviewer/MainWindow.h | 80 ++++ src/ifcviewer/README.md | 129 ++++++ src/ifcviewer/SettingsWindow.cpp | 68 +++ src/ifcviewer/SettingsWindow.h | 46 ++ src/ifcviewer/ViewportWindow.cpp | 674 +++++++++++++++++++++++++++++ src/ifcviewer/ViewportWindow.h | 146 +++++++ src/ifcviewer/main.cpp | 55 +++ 14 files changed, 2012 insertions(+) create mode 100644 src/ifcviewer/AppSettings.cpp create mode 100644 src/ifcviewer/AppSettings.h create mode 100644 src/ifcviewer/CMakeLists.txt create mode 100644 src/ifcviewer/GeometryStreamer.cpp create mode 100644 src/ifcviewer/GeometryStreamer.h create mode 100644 src/ifcviewer/MainWindow.cpp create mode 100644 src/ifcviewer/MainWindow.h create mode 100644 src/ifcviewer/README.md create mode 100644 src/ifcviewer/SettingsWindow.cpp create mode 100644 src/ifcviewer/SettingsWindow.h create mode 100644 src/ifcviewer/ViewportWindow.cpp create mode 100644 src/ifcviewer/ViewportWindow.h create mode 100644 src/ifcviewer/main.cpp diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 8a0518c87f1..e06bdc1b45a 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -70,6 +70,7 @@ option(BUILD_EXAMPLES "Build example applications." ON) option(BUILD_GEOMSERVER "Build IfcGeomServer executable (Open CASCADE is required)." ON) option(BUILD_IFCMAX "Build IfcMax, a 3ds Max plug-in, Windows-only." OFF) option(BUILD_QTVIEWER "Build IfcOpenShell Qt GUI Viewer" OFF) # QtViewer requires Qt6 +option(BUILD_IFCVIEWER "Build IfcViewer, a high-performance IFC viewer" OFF) # Requires Qt6 + OpenGL 4.5 option(BUILD_PACKAGE "" OFF) option(WITH_OPENCASCADE "Enable geometry interpretation using Open CASCADE" ON) @@ -713,6 +714,9 @@ if(BUILD_IFCGEOM) install(TARGETS ${IFCGEOM_SCHEMA_LIBRARIES} ${kernel_libraries} IfcGeom) endif(BUILD_IFCGEOM) +if(BUILD_IFCVIEWER) + add_subdirectory(../src/ifcviewer ifcviewer) +endif() # Cmake uninstall target if(NOT TARGET uninstall) diff --git a/src/ifcviewer/AppSettings.cpp b/src/ifcviewer/AppSettings.cpp new file mode 100644 index 00000000000..07c5f8c3bc2 --- /dev/null +++ b/src/ifcviewer/AppSettings.cpp @@ -0,0 +1,57 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#include "AppSettings.h" + +#include + +namespace { +constexpr const char* kGeometryLibraryKey = "geometry/library"; +constexpr const char* kGeometryLibraryDefault = "hybrid-cgal-simple-opencascade"; +} + +AppSettings& AppSettings::instance() { + static AppSettings inst; + return inst; +} + +AppSettings::AppSettings() { + load(); +} + +QString AppSettings::geometryLibrary() const { + return geometry_library_; +} + +void AppSettings::setGeometryLibrary(const QString& value) { + if (geometry_library_ == value) return; + geometry_library_ = value; + persist(); + emit geometryLibraryChanged(value); +} + +void AppSettings::load() { + QSettings settings; + geometry_library_ = settings.value(kGeometryLibraryKey, kGeometryLibraryDefault).toString(); +} + +void AppSettings::persist() { + QSettings settings; + settings.setValue(kGeometryLibraryKey, geometry_library_); +} diff --git a/src/ifcviewer/AppSettings.h b/src/ifcviewer/AppSettings.h new file mode 100644 index 00000000000..9658c10b955 --- /dev/null +++ b/src/ifcviewer/AppSettings.h @@ -0,0 +1,48 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#ifndef APPSETTINGS_H +#define APPSETTINGS_H + +#include +#include + +// Application-wide preferences. Cached in memory, persisted via QSettings to +// the OS-native config location (registry on Windows, plist on macOS, INI on +// Linux). Access via AppSettings::instance(). +class AppSettings : public QObject { + Q_OBJECT +public: + static AppSettings& instance(); + + QString geometryLibrary() const; + void setGeometryLibrary(const QString& value); + +signals: + void geometryLibraryChanged(const QString& value); + +private: + AppSettings(); + void load(); + void persist(); + + QString geometry_library_; +}; + +#endif // APPSETTINGS_H diff --git a/src/ifcviewer/CMakeLists.txt b/src/ifcviewer/CMakeLists.txt new file mode 100644 index 00000000000..9f1c4dac502 --- /dev/null +++ b/src/ifcviewer/CMakeLists.txt @@ -0,0 +1,61 @@ +################################################################################ +# # +# This file is part of IfcOpenShell. # +# # +# IfcOpenShell is free software: you can redistribute it and/or modify # +# it under the terms of the Lesser GNU General Public License as published by # +# the Free Software Foundation, either version 3.0 of the License, or # +# (at your option) any later version. # +# # +# IfcOpenShell is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# Lesser GNU General Public License for more details. # +# # +# You should have received a copy of the Lesser GNU General Public License # +# along with this program. If not, see . # +# # +################################################################################ + +message("Running CMakeLists.txt in /src/ifcviewer") + +set(QT_VERSION 6 CACHE STRING "Qt version") +# IfcViewer always needs OpenGL in addition to Core/Gui/Widgets. We don't use +# the CACHE'd QT_COMPONENTS here because it may have been set by another target +# (e.g. qtviewer) without the OpenGL component. +find_package(Qt${QT_VERSION} COMPONENTS Core Gui Widgets OpenGL REQUIRED PATHS ${QT_DIR}) + +find_package(OpenGL REQUIRED) + +file(GLOB IFCVIEWER_CPP_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) +file(GLOB IFCVIEWER_H_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.h) +set(IFCVIEWER_FILES ${IFCVIEWER_CPP_FILES} ${IFCVIEWER_H_FILES}) + +add_executable(IfcViewer ${IFCVIEWER_FILES}) + +set_target_properties(IfcViewer PROPERTIES + AUTOMOC ON + WIN32_EXECUTABLE ON + MACOSX_BUNDLE ON +) + +target_link_libraries(IfcViewer PRIVATE + IfcGeom + IfcParse + ${kernel_libraries} + ${OpenCASCADE_LIBRARIES} + ${Boost_LIBRARIES} + ${CGAL_LIBRARIES} + Qt${QT_VERSION}::Core + Qt${QT_VERSION}::Gui + Qt${QT_VERSION}::Widgets + Qt${QT_VERSION}::OpenGL + OpenGL::GL +) + +if(UNIX AND NOT APPLE) + find_package(Threads REQUIRED) + target_link_libraries(IfcViewer PRIVATE Threads::Threads) +endif() + +install(TARGETS IfcViewer EXPORT ${IFCOPENSHELL_EXPORT_TARGETS}) diff --git a/src/ifcviewer/GeometryStreamer.cpp b/src/ifcviewer/GeometryStreamer.cpp new file mode 100644 index 00000000000..39698c84e62 --- /dev/null +++ b/src/ifcviewer/GeometryStreamer.cpp @@ -0,0 +1,285 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#include "GeometryStreamer.h" +#include "AppSettings.h" +#include "../ifcgeom/hybrid_kernel.h" + +#include +#include +#include +#include +#include + +GeometryStreamer::GeometryStreamer(QObject* parent) + : QObject(parent) +{ +} + +GeometryStreamer::~GeometryStreamer() { + cancel(); + if (worker_thread_ && worker_thread_->isRunning()) { + worker_thread_->quit(); + worker_thread_->wait(); + } +} + +void GeometryStreamer::loadFile(const std::string& path, int num_threads) { + if (running_.load()) { + cancel(); + if (worker_thread_ && worker_thread_->isRunning()) { + worker_thread_->quit(); + worker_thread_->wait(); + } + } + + cancel_requested_ = false; + running_ = true; + progress_ = 0; + next_object_id_ = 1; + + { + std::lock_guard lock(elements_mutex_); + pending_elements_.clear(); + } + + if (num_threads <= 0) { + num_threads = std::max(1u, std::thread::hardware_concurrency()); + } + + worker_thread_ = std::make_unique(); + QObject* context = new QObject(); + context->moveToThread(worker_thread_.get()); + + connect(worker_thread_.get(), &QThread::started, context, [this, path, num_threads, context]() { + run(path, num_threads); + context->deleteLater(); + worker_thread_->quit(); + }); + + connect(worker_thread_.get(), &QThread::finished, this, [this]() { + running_ = false; + emit finished(); + }); + + worker_thread_->start(); +} + +void GeometryStreamer::cancel() { + cancel_requested_ = true; +} + +std::vector GeometryStreamer::drainElements() { + std::lock_guard lock(elements_mutex_); + std::vector result; + result.swap(pending_elements_); + return result; +} + +void GeometryStreamer::run(const std::string& path, int num_threads) { + try { + ifc_file_ = std::make_unique(path); + } catch (const std::exception& e) { + emit errorOccurred(QString("Failed to parse IFC file: %1").arg(e.what())); + return; + } + + ifcopenshell::geometry::Settings settings; + settings.set("use-world-coords", true); + settings.set("weld-vertices", false); + settings.set("apply-default-materials", true); + + std::unique_ptr iterator; + try { + const std::string geometry_library = + AppSettings::instance().geometryLibrary().toStdString(); + auto kernel = ifcopenshell::geometry::kernels::construct( + ifc_file_.get(), geometry_library, settings); + iterator = std::make_unique( + std::move(kernel), settings, ifc_file_.get(), std::vector(), num_threads); + } catch (const std::exception& e) { + emit errorOccurred(QString("Failed to create geometry iterator: %1").arg(e.what())); + return; + } + + if (!iterator->initialize()) { + emit errorOccurred("No geometry found in IFC file"); + return; + } + + int last_progress = 0; + + do { + if (cancel_requested_.load()) break; + + const IfcGeom::Element* elem = iterator->get(); + if (!elem) continue; + + const auto* tri_elem = dynamic_cast(elem); + if (!tri_elem) continue; + + uint32_t object_id = next_object_id_++; + + // Record element metadata + ElementInfo info; + info.object_id = object_id; + info.ifc_id = tri_elem->id(); + info.guid = tri_elem->guid(); + info.name = tri_elem->name(); + info.type = tri_elem->type(); + info.parent_id = tri_elem->parent_id(); + + { + std::lock_guard lock(elements_mutex_); + pending_elements_.push_back(std::move(info)); + } + + // Convert geometry to upload chunk + UploadChunk chunk = convertElement(tri_elem, object_id); + if (!chunk.indices.empty()) { + emit elementReady(std::move(chunk)); + } + + int p = iterator->progress(); + if (p != last_progress) { + last_progress = p; + progress_ = p; + emit progressChanged(p); + } + } while (iterator->next()); + + progress_ = 100; + emit progressChanged(100); +} + +static MaterialInfo materialFromStyle(const ifcopenshell::geometry::taxonomy::style::ptr& style) { + MaterialInfo m; + if (!style) return m; + + const auto& color = style->get_color(); + if (color) { + m.r = static_cast(color.r()); + m.g = static_cast(color.g()); + m.b = static_cast(color.b()); + } + if (!std::isnan(style->transparency)) { + m.a = 1.0f - static_cast(style->transparency); + } + return m; +} + +static inline uint32_t packRGBA8(const MaterialInfo& m) { + auto to_byte = [](float v) -> uint32_t { + float c = std::clamp(v, 0.0f, 1.0f); + return static_cast(c * 255.0f + 0.5f); + }; + uint32_t r = to_byte(m.r); + uint32_t g = to_byte(m.g); + uint32_t b = to_byte(m.b); + uint32_t a = to_byte(m.a); + // Layout in memory (little-endian) reads as bytes [r, g, b, a] which is + // what the GL_UNSIGNED_BYTE * 4 normalized vertex attribute expects. + return r | (g << 8) | (b << 16) | (a << 24); +} + +UploadChunk GeometryStreamer::convertElement(const IfcGeom::TriangulationElement* elem, uint32_t object_id) { + UploadChunk chunk; + chunk.object_id = object_id; + + const auto& geom = elem->geometry(); + const auto& verts = geom.verts(); + const auto& faces = geom.faces(); + const auto& normals = geom.normals(); + const auto& materials = geom.materials(); + const auto& material_ids = geom.material_ids(); + + if (verts.empty() || faces.empty()) return chunk; + + // Encode object_id as float bits for the vertex attribute + float id_as_float; + static_assert(sizeof(float) == sizeof(uint32_t)); + std::memcpy(&id_as_float, &object_id, sizeof(float)); + + const size_t num_verts = verts.size() / 3; + const size_t num_tris = faces.size() / 3; + const bool have_per_tri_material = (material_ids.size() == num_tris); + + // Per-vertex color requires that any vertex shared between triangles with + // *different* materials be split. We dedupe (orig_vert_idx, mat_id) pairs + // so vertices that are only ever used by one material stay shared. + auto make_key = [](uint32_t orig_idx, int mat_id) -> uint64_t { + return (static_cast(orig_idx) << 32) | + static_cast(mat_id); + }; + + std::unordered_map remap; + remap.reserve(num_verts); + + chunk.vertices.reserve(num_verts * 8); + chunk.indices.reserve(faces.size()); + + auto emit_vertex = [&](uint32_t orig_idx, int mat_id) -> uint32_t { + const uint64_t key = make_key(orig_idx, mat_id); + auto it = remap.find(key); + if (it != remap.end()) return it->second; + + const uint32_t new_idx = static_cast(chunk.vertices.size() / 8); + + // pos + chunk.vertices.push_back(static_cast(verts[orig_idx * 3 + 0])); + chunk.vertices.push_back(static_cast(verts[orig_idx * 3 + 1])); + chunk.vertices.push_back(static_cast(verts[orig_idx * 3 + 2])); + + // normal + if (orig_idx * 3 + 2 < normals.size()) { + chunk.vertices.push_back(static_cast(normals[orig_idx * 3 + 0])); + chunk.vertices.push_back(static_cast(normals[orig_idx * 3 + 1])); + chunk.vertices.push_back(static_cast(normals[orig_idx * 3 + 2])); + } else { + chunk.vertices.push_back(0.0f); + chunk.vertices.push_back(1.0f); + chunk.vertices.push_back(0.0f); + } + + // object_id (float bits) + chunk.vertices.push_back(id_as_float); + + // color (packed RGBA8 reinterpreted as float) + MaterialInfo m; + if (mat_id >= 0 && mat_id < static_cast(materials.size())) { + m = materialFromStyle(materials[mat_id]); + } + uint32_t packed = packRGBA8(m); + float packed_as_float; + std::memcpy(&packed_as_float, &packed, sizeof(float)); + chunk.vertices.push_back(packed_as_float); + + remap.emplace(key, new_idx); + return new_idx; + }; + + for (size_t t = 0; t < num_tris; ++t) { + const int mat_id = have_per_tri_material ? material_ids[t] : -1; + chunk.indices.push_back(emit_vertex(static_cast(faces[t * 3 + 0]), mat_id)); + chunk.indices.push_back(emit_vertex(static_cast(faces[t * 3 + 1]), mat_id)); + chunk.indices.push_back(emit_vertex(static_cast(faces[t * 3 + 2]), mat_id)); + } + + return chunk; +} diff --git a/src/ifcviewer/GeometryStreamer.h b/src/ifcviewer/GeometryStreamer.h new file mode 100644 index 00000000000..06b6364a244 --- /dev/null +++ b/src/ifcviewer/GeometryStreamer.h @@ -0,0 +1,89 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#ifndef GEOMETRYSTREAMER_H +#define GEOMETRYSTREAMER_H + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "../ifcparse/IfcFile.h" +#include "../ifcgeom/Iterator.h" + +#include "ViewportWindow.h" + +struct ElementInfo { + uint32_t object_id; + int ifc_id; + std::string guid; + std::string name; + std::string type; + int parent_id; +}; + +class GeometryStreamer : public QObject { + Q_OBJECT +public: + explicit GeometryStreamer(QObject* parent = nullptr); + ~GeometryStreamer(); + + void loadFile(const std::string& path, int num_threads = 0); + void cancel(); + + bool isRunning() const { return running_.load(); } + int progress() const { return progress_.load(); } + + IfcParse::IfcFile* ifcFile() const { return ifc_file_.get(); } + + // Thread-safe access to discovered elements + std::vector drainElements(); + +signals: + void progressChanged(int percent); + void elementReady(UploadChunk chunk); + void finished(); + void errorOccurred(const QString& message); + +private: + void run(const std::string& path, int num_threads); + + UploadChunk convertElement(const IfcGeom::TriangulationElement* elem, uint32_t object_id); + + std::unique_ptr ifc_file_; + std::unique_ptr worker_thread_; + std::atomic running_{false}; + std::atomic cancel_requested_{false}; + std::atomic progress_{0}; + + std::mutex elements_mutex_; + std::vector pending_elements_; + + // Map from IFC product id to our compact object_id + uint32_t next_object_id_ = 1; // 0 = no object +}; + +#endif // GEOMETRYSTREAMER_H diff --git a/src/ifcviewer/MainWindow.cpp b/src/ifcviewer/MainWindow.cpp new file mode 100644 index 00000000000..1f32ce0877c --- /dev/null +++ b/src/ifcviewer/MainWindow.cpp @@ -0,0 +1,270 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#include "MainWindow.h" +#include "SettingsWindow.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +MainWindow::MainWindow(QWidget* parent) + : QMainWindow(parent) +{ + setupUi(); + setupMenus(); + + streamer_ = new GeometryStreamer(this); + connect(streamer_, &GeometryStreamer::progressChanged, this, &MainWindow::onProgressChanged, Qt::QueuedConnection); + connect(streamer_, &GeometryStreamer::elementReady, this, &MainWindow::onElementReady, Qt::QueuedConnection); + connect(streamer_, &GeometryStreamer::finished, this, &MainWindow::onStreamingFinished, Qt::QueuedConnection); + connect(streamer_, &GeometryStreamer::errorOccurred, this, [this](const QString& msg) { + QMessageBox::warning(this, "Error", msg); + }, Qt::QueuedConnection); + + connect(&element_poll_timer_, &QTimer::timeout, this, &MainWindow::pollNewElements); + element_poll_timer_.setInterval(100); + + setWindowTitle("IfcViewer"); + resize(1400, 900); +} + +MainWindow::~MainWindow() {} + +void MainWindow::setupUi() { + // 3D Viewport as central widget + viewport_ = new ViewportWindow(); + viewport_container_ = QWidget::createWindowContainer(viewport_, this); + viewport_container_->setMinimumSize(400, 300); + viewport_container_->setFocusPolicy(Qt::StrongFocus); + setCentralWidget(viewport_container_); + + connect(viewport_, &ViewportWindow::objectPicked, this, &MainWindow::onObjectPicked); + + // Element tree dock + auto* tree_dock = new QDockWidget("Elements", this); + tree_dock->setAllowedAreas(Qt::LeftDockWidgetArea | Qt::RightDockWidgetArea); + element_tree_ = new QTreeWidget(); + element_tree_->setHeaderLabels({"Name", "Type", "GUID"}); + element_tree_->setColumnWidth(0, 200); + element_tree_->setColumnWidth(1, 120); + element_tree_->setSelectionMode(QAbstractItemView::SingleSelection); + connect(element_tree_, &QTreeWidget::itemSelectionChanged, this, &MainWindow::onTreeSelectionChanged); + tree_dock->setWidget(element_tree_); + addDockWidget(Qt::LeftDockWidgetArea, tree_dock); + + // Properties dock + auto* prop_dock = new QDockWidget("Properties", this); + prop_dock->setAllowedAreas(Qt::LeftDockWidgetArea | Qt::RightDockWidgetArea); + property_table_ = new QTableWidget(); + property_table_->setColumnCount(2); + property_table_->setHorizontalHeaderLabels({"Property", "Value"}); + property_table_->horizontalHeader()->setStretchLastSection(true); + property_table_->setEditTriggers(QAbstractItemView::NoEditTriggers); + property_table_->setSelectionBehavior(QAbstractItemView::SelectRows); + prop_dock->setWidget(property_table_); + addDockWidget(Qt::RightDockWidgetArea, prop_dock); + + // Status bar with progress + progress_bar_ = new QProgressBar(); + progress_bar_->setMaximumWidth(200); + progress_bar_->setVisible(false); + status_label_ = new QLabel("Ready"); + statusBar()->addWidget(status_label_, 1); + statusBar()->addPermanentWidget(progress_bar_); +} + +void MainWindow::setupMenus() { + auto* file_menu = menuBar()->addMenu("&File"); + auto* open_action = file_menu->addAction("&Open...", this, &MainWindow::onFileOpen); + open_action->setShortcut(QKeySequence::Open); + file_menu->addAction("&Settings...", this, &MainWindow::onFileSettings); + file_menu->addSeparator(); + file_menu->addAction("&Quit", QKeySequence::Quit, qApp, &QApplication::quit); +} + +void MainWindow::onFileOpen() { + QString path = QFileDialog::getOpenFileName(this, "Open IFC File", QString(), "IFC Files (*.ifc *.ifcxml *.ifczip);;All Files (*)"); + if (!path.isEmpty()) { + openFile(path); + } +} + +void MainWindow::onFileSettings() { + if (settings_ == nullptr) { + settings_ = new SettingsWindow(this); + } + settings_->open(); + settings_->activateWindow(); + settings_->raise(); +} + +void MainWindow::openFile(const QString& path) { + viewport_->resetScene(); + element_tree_->clear(); + property_table_->setRowCount(0); + element_map_.clear(); + tree_items_.clear(); + ifc_id_to_object_id_.clear(); + + progress_bar_->setValue(0); + progress_bar_->setVisible(true); + status_label_->setText("Loading: " + path); + + load_timer_.restart(); + element_poll_timer_.start(); + streamer_->loadFile(path.toStdString()); +} + +void MainWindow::onProgressChanged(int percent) { + progress_bar_->setValue(percent); +} + +void MainWindow::onElementReady(UploadChunk chunk) { + viewport_->uploadChunk(chunk); +} + +void MainWindow::onStreamingFinished() { + element_poll_timer_.stop(); + pollNewElements(); // drain remaining + + progress_bar_->setVisible(false); + + qint64 ms = load_timer_.elapsed(); + QString elapsed = (ms >= 1000) + ? QString::number(ms / 1000.0, 'f', 2) + " s" + : QString::number(ms) + " ms"; + status_label_->setText(QString("Loaded %1 elements in %2") + .arg(element_map_.size()) + .arg(elapsed)); +} + +void MainWindow::onObjectPicked(uint32_t object_id) { + viewport_->setSelectedObjectId(object_id); + + // Select in tree + auto it = tree_items_.find(object_id); + if (it != tree_items_.end()) { + element_tree_->blockSignals(true); + element_tree_->setCurrentItem(it->second); + element_tree_->blockSignals(false); + } + + populateProperties(object_id); +} + +void MainWindow::onTreeSelectionChanged() { + auto items = element_tree_->selectedItems(); + if (items.isEmpty()) return; + + uint32_t object_id = items.first()->data(0, Qt::UserRole).toUInt(); + viewport_->setSelectedObjectId(object_id); + populateProperties(object_id); +} + +void MainWindow::pollNewElements() { + auto elements = streamer_->drainElements(); + for (auto& info : elements) { + element_map_[info.object_id] = info; + ifc_id_to_object_id_[info.ifc_id] = info.object_id; + + // Find parent tree item + QTreeWidgetItem* parent_item = nullptr; + auto parent_obj_it = ifc_id_to_object_id_.find(info.parent_id); + if (parent_obj_it != ifc_id_to_object_id_.end()) { + auto tree_it = tree_items_.find(parent_obj_it->second); + if (tree_it != tree_items_.end()) { + parent_item = tree_it->second; + } + } + + QString display_name = QString::fromStdString(info.name); + if (display_name.isEmpty()) { + display_name = QString::fromStdString(info.type) + " #" + QString::number(info.ifc_id); + } + + QTreeWidgetItem* item; + if (parent_item) { + item = new QTreeWidgetItem(parent_item); + } else { + item = new QTreeWidgetItem(element_tree_); + } + item->setText(0, display_name); + item->setText(1, QString::fromStdString(info.type)); + item->setText(2, QString::fromStdString(info.guid)); + item->setData(0, Qt::UserRole, info.object_id); + + tree_items_[info.object_id] = item; + } +} + +void MainWindow::populateProperties(uint32_t object_id) { + property_table_->setRowCount(0); + if (object_id == 0) return; + + auto it = element_map_.find(object_id); + if (it == element_map_.end()) return; + + const auto& info = it->second; + + auto addRow = [this](const QString& key, const QString& value) { + int row = property_table_->rowCount(); + property_table_->insertRow(row); + property_table_->setItem(row, 0, new QTableWidgetItem(key)); + property_table_->setItem(row, 1, new QTableWidgetItem(value)); + }; + + addRow("IFC ID", QString::number(info.ifc_id)); + addRow("GUID", QString::fromStdString(info.guid)); + addRow("Name", QString::fromStdString(info.name)); + addRow("Type", QString::fromStdString(info.type)); + + // If the file is loaded, try to get property sets + auto* file = streamer_->ifcFile(); + if (!file) return; + + auto* product = file->instance_by_id(info.ifc_id); + if (!product) return; + + // Show all direct attributes + auto& decl = product->declaration(); + if (auto* entity = decl.as_entity()) { + for (size_t i = 0; i < entity->attribute_count(); ++i) { + auto* attr = entity->attribute_by_index(i); + try { + auto val = product->get_attribute_value(i); + if (!val.isNull()) { + std::string str_val; + try { + str_val = static_cast(val); + } catch (...) { + // Not a string-convertible attribute (entity ref, aggregate, etc.) + str_val = "<" + std::string(IfcUtil::ArgumentTypeToString(val.type())) + ">"; + } + addRow(QString::fromStdString(attr->name()), QString::fromStdString(str_val)); + } + } catch (...) {} + } + } +} diff --git a/src/ifcviewer/MainWindow.h b/src/ifcviewer/MainWindow.h new file mode 100644 index 00000000000..d5f4c18a395 --- /dev/null +++ b/src/ifcviewer/MainWindow.h @@ -0,0 +1,80 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#ifndef MAINWINDOW_H +#define MAINWINDOW_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "ViewportWindow.h" +#include "GeometryStreamer.h" + +class SettingsWindow; + +class MainWindow : public QMainWindow { + Q_OBJECT +public: + explicit MainWindow(QWidget* parent = nullptr); + ~MainWindow(); + + void openFile(const QString& path); + +private slots: + void onFileOpen(); + void onFileSettings(); + void onProgressChanged(int percent); + void onElementReady(UploadChunk chunk); + void onStreamingFinished(); + void onObjectPicked(uint32_t object_id); + void onTreeSelectionChanged(); + void pollNewElements(); + +private: + void setupUi(); + void setupMenus(); + void populateProperties(uint32_t object_id); + + ViewportWindow* viewport_ = nullptr; + SettingsWindow* settings_ = nullptr; + QWidget* viewport_container_ = nullptr; + QTreeWidget* element_tree_ = nullptr; + QTableWidget* property_table_ = nullptr; + QProgressBar* progress_bar_ = nullptr; + QLabel* status_label_ = nullptr; + QTimer element_poll_timer_; + QElapsedTimer load_timer_; + + GeometryStreamer* streamer_ = nullptr; + + // Map object_id -> tree item and element info + std::unordered_map element_map_; + std::unordered_map tree_items_; + std::unordered_map ifc_id_to_object_id_; +}; + +#endif // MAINWINDOW_H diff --git a/src/ifcviewer/README.md b/src/ifcviewer/README.md new file mode 100644 index 00000000000..b9194cefd1a --- /dev/null +++ b/src/ifcviewer/README.md @@ -0,0 +1,129 @@ +# IfcViewer + +A high-performance native IFC viewer built on IfcOpenShell's C++ geometry engine with a Qt6 interface and OpenGL 4.5 rendering. + +## Architecture + +``` ++-------------------------------------------+ +| Qt6 Application (MainWindow) | +| +----------+ +--------------------------+| +| | Element | | 3D Viewport || +| | Tree | | (QWindow + OpenGL 4.5) || +| | | | || +| +----------+ | Single VBO/EBO || +| | Property | | DrawElementsBaseVertex || +| | Table | | GPU pick pass || +| +----------+ +--------------------------+| +| | Status / Progress | ++-------------------------------------------+ + ^ ^ + | | + element metadata UploadChunks + | | ++-------------------------------------------+ +| GeometryStreamer (background QThread) | +| IfcGeom::Iterator with N threads | +| (one per CPU core by default) | ++-------------------------------------------+ +``` + +### Key design decisions + +- **QWindow viewport** embedded via `QWidget::createWindowContainer()`. This gives us a raw native surface for OpenGL, bypassing `QOpenGLWidget`'s compositor overhead. +- **One big vertex buffer + index buffer** (64 MB + 32 MB initial). Geometry is appended as it streams in. No per-object VBOs, no rebinding. +- **Interleaved vertex format**: position (3 floats) + normal (3 floats) + object ID (1 float, bitcast uint32) = 28 bytes per vertex. +- **GPU object picking**: a second render pass writes object IDs to an R32UI framebuffer. Click reads back one pixel. No CPU-side raycasting. +- **Multi-threaded tessellation**: `IfcGeom::Iterator` runs on a background thread and internally parallelizes geometry conversion across all CPU cores. +- **Non-blocking streaming**: the iterator emits `UploadChunk` signals via Qt's queued connection. The main thread uploads to the GPU without blocking iteration. +- **World coordinates**: geometry is emitted in world space (`use-world-coords=true`) so no per-object transform matrices are needed on the GPU. + +### Files + +| File | Purpose | +|------|---------| +| `main.cpp` | Application entry point, GL 4.5 surface format, CLI argument parsing | +| `MainWindow.h/cpp` | Qt main window: dockable element tree, property table, status bar, menus | +| `ViewportWindow.h/cpp` | OpenGL 4.5 Core renderer: shaders, buffer management, camera, picking | +| `GeometryStreamer.h/cpp` | Background geometry processing: loads IFC, runs iterator, emits chunks | +| `CMakeLists.txt` | Build configuration | + +## Dependencies + +- **Qt6** (Core, Gui, Widgets) +- **OpenGL 4.5** (GL_ARB_direct_state_access) - available on Windows and Linux; macOS will need a Vulkan/MoltenVK backend (not yet implemented) +- **IfcOpenShell C++ libraries** (IfcParse, IfcGeom, and their dependencies: Open CASCADE, Boost, Eigen3, optionally CGAL) + +## Building + +IfcViewer is built as part of the IfcOpenShell CMake project. You do not need to build everything - disable the targets you don't need. + +### Minimal build (IfcViewer only) + +From the repository root: + +```sh +mkdir build && cd build + +cmake ../cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_IFCVIEWER=ON \ + -DBUILD_CONVERT=OFF \ + -DBUILD_IFCPYTHON=OFF \ + -DBUILD_GEOMSERVER=OFF \ + -DBUILD_DOCUMENTATION=OFF \ + -DBUILD_EXAMPLES=OFF \ + -DCOLLADA_SUPPORT=OFF \ + -DGLTF_SUPPORT=OFF \ + -DHDF5_SUPPORT=OFF + +make -j$(nproc) IfcViewer +``` + +This builds only IfcParse, IfcGeom (with geometry kernels), and IfcViewer itself. All other targets (IfcConvert, Python bindings, serializers, etc.) are skipped. + +If Qt6 is not in a standard location, pass `-DQT_DIR=/path/to/qt6`. + +### Full build with IfcViewer enabled + +```sh +cmake ../cmake -DBUILD_IFCVIEWER=ON +make -j$(nproc) +``` + +## Usage + +```sh +# Open a file directly +./IfcViewer model.ifc + +# Or use File -> Open from the menu +./IfcViewer +``` + +### Controls + +| Input | Action | +|-------|--------| +| Middle mouse drag | Orbit camera | +| Shift + middle mouse drag | Pan camera | +| Scroll wheel | Zoom | +| Left click | Select object (highlights in viewport and tree) | + +### Keyboard shortcuts + +| Key | Action | +|-----|--------| +| Ctrl+O | Open file | +| Ctrl+Q | Quit | + +## Roadmap + +- [ ] Material color support (currently renders default grey per batch) +- [ ] Buffer growth (reallocate when 64 MB VBO fills up) +- [ ] `glMultiDrawElementsIndirect` for fewer draw calls +- [ ] Vulkan/MoltenVK backend for macOS +- [ ] Spatial tree (BVH) for frustum culling +- [ ] LOD: coarse tessellation during streaming, refine in background +- [ ] Embedded Python scripting console +- [ ] CJK text input support (Qt6 handles this natively) diff --git a/src/ifcviewer/SettingsWindow.cpp b/src/ifcviewer/SettingsWindow.cpp new file mode 100644 index 00000000000..a24f9bc9763 --- /dev/null +++ b/src/ifcviewer/SettingsWindow.cpp @@ -0,0 +1,68 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#include "SettingsWindow.h" +#include "AppSettings.h" + +#include +#include +#include +#include +#include + +SettingsWindow::SettingsWindow(QWidget *parent) + : QDialog(parent) +{ + setWindowTitle("Settings"); + setupUi(); +} + +void SettingsWindow::setupUi() { + auto* form = new QFormLayout(); + + geometry_library_edit_ = new QLineEdit(this); + geometry_library_edit_->setMinimumWidth(280); + form->addRow("Geometry Library", geometry_library_edit_); + + auto* button_box = new QDialogButtonBox( + QDialogButtonBox::Ok | QDialogButtonBox::Cancel, this); + + auto* root = new QVBoxLayout(this); + root->addLayout(form); + root->addWidget(button_box); + + connect(button_box, &QDialogButtonBox::accepted, this, &SettingsWindow::onAccepted); + connect(button_box, &QDialogButtonBox::rejected, this, &SettingsWindow::reject); +} + +void SettingsWindow::showEvent(QShowEvent* event) { + // Re-sync widgets from the persisted settings every time the dialog is + // shown, so a previous Cancel doesn't leave stale text in the field. + syncFromSettings(); + QDialog::showEvent(event); +} + +void SettingsWindow::syncFromSettings() { + geometry_library_edit_->setText(AppSettings::instance().geometryLibrary()); +} + +void SettingsWindow::onAccepted() { + AppSettings::instance().setGeometryLibrary(geometry_library_edit_->text()); + accept(); +} diff --git a/src/ifcviewer/SettingsWindow.h b/src/ifcviewer/SettingsWindow.h new file mode 100644 index 00000000000..77affe77578 --- /dev/null +++ b/src/ifcviewer/SettingsWindow.h @@ -0,0 +1,46 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#ifndef SETTINGSWINDOW_H +#define SETTINGSWINDOW_H + +#include + +class QLineEdit; +class QShowEvent; + +class SettingsWindow : public QDialog { + Q_OBJECT +public: + explicit SettingsWindow(QWidget *parent = nullptr); + +protected: + void showEvent(QShowEvent* event) override; + +private slots: + void onAccepted(); + +private: + void setupUi(); + void syncFromSettings(); + + QLineEdit* geometry_library_edit_ = nullptr; +}; + +#endif diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp new file mode 100644 index 00000000000..99624cb9f54 --- /dev/null +++ b/src/ifcviewer/ViewportWindow.cpp @@ -0,0 +1,674 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#include "ViewportWindow.h" + +#include +#include +#include +#include +#include + +#include +#include + +static const size_t INITIAL_VBO_SIZE = 64 * 1024 * 1024; // 64 MB +static const size_t INITIAL_EBO_SIZE = 32 * 1024 * 1024; // 32 MB +// Cap buffer growth so a runaway upload can't try to allocate the world. +static const size_t MAX_BUFFER_SIZE = 4ull * 1024 * 1024 * 1024; // 4 GB +static const int VERTEX_STRIDE = 8; // pos(3) + normal(3) + object_id(1) + color(1 packed) + +static const char* MAIN_VERTEX_SHADER = R"( +#version 450 core +layout(location = 0) in vec3 a_position; +layout(location = 1) in vec3 a_normal; +layout(location = 2) in float a_object_id; +layout(location = 3) in vec4 a_color; + +uniform mat4 u_view_projection; +uniform uint u_selected_id; + +out vec3 v_normal; +out vec3 v_position; +out vec4 v_color; +flat out uint v_object_id; +flat out uint v_selected; + +void main() { + gl_Position = u_view_projection * vec4(a_position, 1.0); + v_normal = a_normal; + v_position = a_position; + v_color = a_color; + v_object_id = floatBitsToUint(a_object_id); + v_selected = (v_object_id == u_selected_id) ? 1u : 0u; +} +)"; + +static const char* MAIN_FRAGMENT_SHADER = R"( +#version 450 core +in vec3 v_normal; +in vec3 v_position; +in vec4 v_color; +flat in uint v_object_id; +flat in uint v_selected; + +uniform vec3 u_light_dir; + +out vec4 frag_color; + +void main() { + vec3 n = normalize(v_normal); + float ndotl = max(dot(n, u_light_dir), 0.0); + float ambient = 0.25; + float diffuse = 0.75 * ndotl; + vec3 color = v_color.rgb * (ambient + diffuse); + + if (v_selected == 1u) { + color = mix(color, vec3(0.2, 0.6, 1.0), 0.5); + } + + frag_color = vec4(color, v_color.a); +} +)"; + +static const char* PICK_VERTEX_SHADER = R"( +#version 450 core +layout(location = 0) in vec3 a_position; +layout(location = 1) in vec3 a_normal; +layout(location = 2) in float a_object_id; + +uniform mat4 u_view_projection; + +flat out uint v_object_id; + +void main() { + gl_Position = u_view_projection * vec4(a_position, 1.0); + v_object_id = floatBitsToUint(a_object_id); +} +)"; + +static const char* PICK_FRAGMENT_SHADER = R"( +#version 450 core +flat in uint v_object_id; + +out uint frag_id; + +void main() { + frag_id = v_object_id; +} +)"; + +static const char* AXIS_VERTEX_SHADER = R"( +#version 450 core +layout(location = 0) in vec3 a_position; +layout(location = 1) in vec3 a_color; + +uniform mat4 u_mvp; + +out vec3 v_color; + +void main() { + gl_Position = u_mvp * vec4(a_position, 1.0); + v_color = a_color; +} +)"; + +static const char* AXIS_FRAGMENT_SHADER = R"( +#version 450 core +in vec3 v_color; +out vec4 frag_color; + +void main() { + frag_color = vec4(v_color, 1.0); +} +)"; + +static GLuint compileShader(QOpenGLFunctions_4_5_Core* gl, GLenum type, const char* source) { + GLuint shader = gl->glCreateShader(type); + gl->glShaderSource(shader, 1, &source, nullptr); + gl->glCompileShader(shader); + GLint ok = 0; + gl->glGetShaderiv(shader, GL_COMPILE_STATUS, &ok); + if (!ok) { + char log[1024]; + gl->glGetShaderInfoLog(shader, sizeof(log), nullptr, log); + qWarning("Shader compile error: %s", log); + } + return shader; +} + +static GLuint linkProgram(QOpenGLFunctions_4_5_Core* gl, GLuint vert, GLuint frag) { + GLuint prog = gl->glCreateProgram(); + gl->glAttachShader(prog, vert); + gl->glAttachShader(prog, frag); + gl->glLinkProgram(prog); + GLint ok = 0; + gl->glGetProgramiv(prog, GL_LINK_STATUS, &ok); + if (!ok) { + char log[1024]; + gl->glGetProgramInfoLog(prog, sizeof(log), nullptr, log); + qWarning("Program link error: %s", log); + } + gl->glDeleteShader(vert); + gl->glDeleteShader(frag); + return prog; +} + +ViewportWindow::ViewportWindow(QWindow* parent) + : QWindow(parent) +{ + setSurfaceType(QWindow::OpenGLSurface); + + QSurfaceFormat fmt; + fmt.setVersion(4, 5); + fmt.setProfile(QSurfaceFormat::CoreProfile); + fmt.setDepthBufferSize(24); + fmt.setSwapBehavior(QSurfaceFormat::DoubleBuffer); + fmt.setSamples(4); + setFormat(fmt); + + connect(&render_timer_, &QTimer::timeout, this, [this]() { + if (isExposed()) render(); + }); + render_timer_.setInterval(16); // ~60 fps +} + +ViewportWindow::~ViewportWindow() { + if (context_) { + context_->makeCurrent(this); + if (gl_) { + if (vao_) gl_->glDeleteVertexArrays(1, &vao_); + if (vbo_) gl_->glDeleteBuffers(1, &vbo_); + if (ebo_) gl_->glDeleteBuffers(1, &ebo_); + if (axis_vao_) gl_->glDeleteVertexArrays(1, &axis_vao_); + if (axis_vbo_) gl_->glDeleteBuffers(1, &axis_vbo_); + if (main_program_) gl_->glDeleteProgram(main_program_); + if (pick_program_) gl_->glDeleteProgram(pick_program_); + if (axis_program_) gl_->glDeleteProgram(axis_program_); + if (pick_fbo_) gl_->glDeleteFramebuffers(1, &pick_fbo_); + if (pick_color_tex_) gl_->glDeleteTextures(1, &pick_color_tex_); + if (pick_depth_rbo_) gl_->glDeleteRenderbuffers(1, &pick_depth_rbo_); + } + context_->doneCurrent(); + } +} + +void ViewportWindow::initGL() { + if (gl_initialized_) return; + + context_ = new QOpenGLContext(this); + context_->setFormat(requestedFormat()); + if (!context_->create()) { + qFatal("Failed to create OpenGL context"); + return; + } + context_->makeCurrent(this); + + gl_ = QOpenGLVersionFunctionsFactory::get(context_); + if (!gl_) { + qWarning("OpenGL 4.5 not available, falling back"); + return; + } + + buildShaders(); + buildAxisGizmo(); + + // Create VAO + gl_->glCreateVertexArrays(1, &vao_); + + // Create VBO with initial capacity + vbo_capacity_ = INITIAL_VBO_SIZE; + gl_->glCreateBuffers(1, &vbo_); + gl_->glNamedBufferStorage(vbo_, vbo_capacity_, nullptr, + GL_DYNAMIC_STORAGE_BIT); + + // Create EBO with initial capacity + ebo_capacity_ = INITIAL_EBO_SIZE; + gl_->glCreateBuffers(1, &ebo_); + gl_->glNamedBufferStorage(ebo_, ebo_capacity_, nullptr, + GL_DYNAMIC_STORAGE_BIT); + + // Vertex layout: pos(3f) + normal(3f) + object_id(1f) + color(4 unorm bytes) + // = 8 floats = 32 bytes per vertex. + gl_->glVertexArrayVertexBuffer(vao_, 0, vbo_, 0, VERTEX_STRIDE * sizeof(float)); + gl_->glVertexArrayElementBuffer(vao_, ebo_); + + // position + gl_->glEnableVertexArrayAttrib(vao_, 0); + gl_->glVertexArrayAttribFormat(vao_, 0, 3, GL_FLOAT, GL_FALSE, 0); + gl_->glVertexArrayAttribBinding(vao_, 0, 0); + + // normal + gl_->glEnableVertexArrayAttrib(vao_, 1); + gl_->glVertexArrayAttribFormat(vao_, 1, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float)); + gl_->glVertexArrayAttribBinding(vao_, 1, 0); + + // object_id (passed as float, decoded in shader via floatBitsToUint) + gl_->glEnableVertexArrayAttrib(vao_, 2); + gl_->glVertexArrayAttribFormat(vao_, 2, 1, GL_FLOAT, GL_FALSE, 6 * sizeof(float)); + gl_->glVertexArrayAttribBinding(vao_, 2, 0); + + // color (RGBA8 packed into the 4 bytes at offset 28; normalized to vec4) + gl_->glEnableVertexArrayAttrib(vao_, 3); + gl_->glVertexArrayAttribFormat(vao_, 3, 4, GL_UNSIGNED_BYTE, GL_TRUE, 7 * sizeof(float)); + gl_->glVertexArrayAttribBinding(vao_, 3, 0); + + gl_->glEnable(GL_DEPTH_TEST); + gl_->glEnable(GL_MULTISAMPLE); + gl_->glClearColor(0.18f, 0.20f, 0.22f, 1.0f); + + gl_initialized_ = true; + frame_clock_.start(); + render_timer_.start(); + + emit initialized(); +} + +void ViewportWindow::buildShaders() { + { + GLuint vs = compileShader(gl_, GL_VERTEX_SHADER, MAIN_VERTEX_SHADER); + GLuint fs = compileShader(gl_, GL_FRAGMENT_SHADER, MAIN_FRAGMENT_SHADER); + main_program_ = linkProgram(gl_, vs, fs); + } + { + GLuint vs = compileShader(gl_, GL_VERTEX_SHADER, PICK_VERTEX_SHADER); + GLuint fs = compileShader(gl_, GL_FRAGMENT_SHADER, PICK_FRAGMENT_SHADER); + pick_program_ = linkProgram(gl_, vs, fs); + } + { + GLuint vs = compileShader(gl_, GL_VERTEX_SHADER, AXIS_VERTEX_SHADER); + GLuint fs = compileShader(gl_, GL_FRAGMENT_SHADER, AXIS_FRAGMENT_SHADER); + axis_program_ = linkProgram(gl_, vs, fs); + } +} + +void ViewportWindow::buildAxisGizmo() { + // 3 line segments (X red, Y green, Z blue), 6 vertices, pos(3) + color(3). + static const float axis_data[] = { + // X axis - red + 0.0f, 0.0f, 0.0f, 1.0f, 0.25f, 0.25f, + 1.0f, 0.0f, 0.0f, 1.0f, 0.25f, 0.25f, + // Y axis - green + 0.0f, 0.0f, 0.0f, 0.30f, 0.95f, 0.30f, + 0.0f, 1.0f, 0.0f, 0.30f, 0.95f, 0.30f, + // Z axis - blue + 0.0f, 0.0f, 0.0f, 0.30f, 0.55f, 1.0f, + 0.0f, 0.0f, 1.0f, 0.30f, 0.55f, 1.0f, + }; + + gl_->glCreateVertexArrays(1, &axis_vao_); + gl_->glCreateBuffers(1, &axis_vbo_); + gl_->glNamedBufferStorage(axis_vbo_, sizeof(axis_data), axis_data, 0); + + gl_->glVertexArrayVertexBuffer(axis_vao_, 0, axis_vbo_, 0, 6 * sizeof(float)); + + gl_->glEnableVertexArrayAttrib(axis_vao_, 0); + gl_->glVertexArrayAttribFormat(axis_vao_, 0, 3, GL_FLOAT, GL_FALSE, 0); + gl_->glVertexArrayAttribBinding(axis_vao_, 0, 0); + + gl_->glEnableVertexArrayAttrib(axis_vao_, 1); + gl_->glVertexArrayAttribFormat(axis_vao_, 1, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float)); + gl_->glVertexArrayAttribBinding(axis_vao_, 1, 0); +} + +bool ViewportWindow::growVbo(size_t needed_total) { + // Double until it fits, but don't blow past the cap. + size_t new_capacity = vbo_capacity_; + while (new_capacity < needed_total) { + new_capacity *= 2; + } + if (new_capacity > MAX_BUFFER_SIZE) { + qWarning("VBO grow request (%zu MB) exceeds cap (%zu MB)", + new_capacity / (1024 * 1024), MAX_BUFFER_SIZE / (1024 * 1024)); + return false; + } + + GLuint new_vbo = 0; + gl_->glCreateBuffers(1, &new_vbo); + gl_->glNamedBufferStorage(new_vbo, new_capacity, nullptr, GL_DYNAMIC_STORAGE_BIT); + + if (vbo_used_ > 0) { + gl_->glCopyNamedBufferSubData(vbo_, new_vbo, 0, 0, vbo_used_); + } + + gl_->glDeleteBuffers(1, &vbo_); + vbo_ = new_vbo; + vbo_capacity_ = new_capacity; + + // Rebind on the VAO so subsequent draws see the new buffer. + gl_->glVertexArrayVertexBuffer(vao_, 0, vbo_, 0, VERTEX_STRIDE * sizeof(float)); + + qInfo("VBO grew to %zu MB", vbo_capacity_ / (1024 * 1024)); + return true; +} + +bool ViewportWindow::growEbo(size_t needed_total) { + size_t new_capacity = ebo_capacity_; + while (new_capacity < needed_total) { + new_capacity *= 2; + } + if (new_capacity > MAX_BUFFER_SIZE) { + qWarning("EBO grow request (%zu MB) exceeds cap (%zu MB)", + new_capacity / (1024 * 1024), MAX_BUFFER_SIZE / (1024 * 1024)); + return false; + } + + GLuint new_ebo = 0; + gl_->glCreateBuffers(1, &new_ebo); + gl_->glNamedBufferStorage(new_ebo, new_capacity, nullptr, GL_DYNAMIC_STORAGE_BIT); + + if (ebo_used_ > 0) { + gl_->glCopyNamedBufferSubData(ebo_, new_ebo, 0, 0, ebo_used_); + } + + gl_->glDeleteBuffers(1, &ebo_); + ebo_ = new_ebo; + ebo_capacity_ = new_capacity; + + gl_->glVertexArrayElementBuffer(vao_, ebo_); + + qInfo("EBO grew to %zu MB", ebo_capacity_ / (1024 * 1024)); + return true; +} + +void ViewportWindow::uploadChunk(const UploadChunk& chunk) { + if (!gl_initialized_) return; + if (chunk.vertices.empty() || chunk.indices.empty()) return; + + context_->makeCurrent(this); + + size_t vb_size = chunk.vertices.size() * sizeof(float); + size_t ib_size = chunk.indices.size() * sizeof(uint32_t); + + if (vbo_used_ + vb_size > vbo_capacity_) { + if (!growVbo(vbo_used_ + vb_size)) { + qWarning("VBO at cap, skipping chunk"); + return; + } + } + if (ebo_used_ + ib_size > ebo_capacity_) { + if (!growEbo(ebo_used_ + ib_size)) { + qWarning("EBO at cap, skipping chunk"); + return; + } + } + + uint32_t base_vertex = vertex_count_; + + gl_->glNamedBufferSubData(vbo_, vbo_used_, vb_size, chunk.vertices.data()); + + // Remap chunk-local indices into global indices so the whole EBO can be + // drawn with a single glDrawElements call. + std::vector global_indices(chunk.indices.size()); + for (size_t i = 0; i < chunk.indices.size(); ++i) { + global_indices[i] = chunk.indices[i] + base_vertex; + } + gl_->glNamedBufferSubData(ebo_, ebo_used_, ib_size, global_indices.data()); + + { + std::lock_guard lock(upload_mutex_); + total_index_count_ += static_cast(chunk.indices.size()); + } + + vbo_used_ += vb_size; + ebo_used_ += ib_size; + vertex_count_ += static_cast(chunk.vertices.size() / VERTEX_STRIDE); + total_triangles_ += static_cast(chunk.indices.size() / 3); +} + +void ViewportWindow::resetScene() { + if (!gl_initialized_) return; + + std::lock_guard lock(upload_mutex_); + total_index_count_ = 0; + vbo_used_ = 0; + ebo_used_ = 0; + vertex_count_ = 0; + total_triangles_ = 0; + selected_object_id_ = 0; +} + +void ViewportWindow::setSelectedObjectId(uint32_t id) { + selected_object_id_ = id; +} + +uint32_t ViewportWindow::pickObjectAt(int x, int y) { + if (!gl_initialized_) return 0; + + context_->makeCurrent(this); + + int w = width() * devicePixelRatio(); + int h = height() * devicePixelRatio(); + + // Create/resize pick FBO if needed + if (pick_width_ != w || pick_height_ != h) { + if (pick_fbo_) gl_->glDeleteFramebuffers(1, &pick_fbo_); + if (pick_color_tex_) gl_->glDeleteTextures(1, &pick_color_tex_); + if (pick_depth_rbo_) gl_->glDeleteRenderbuffers(1, &pick_depth_rbo_); + + gl_->glCreateFramebuffers(1, &pick_fbo_); + + gl_->glCreateTextures(GL_TEXTURE_2D, 1, &pick_color_tex_); + gl_->glTextureStorage2D(pick_color_tex_, 1, GL_R32UI, w, h); + gl_->glNamedFramebufferTexture(pick_fbo_, GL_COLOR_ATTACHMENT0, pick_color_tex_, 0); + + gl_->glCreateRenderbuffers(1, &pick_depth_rbo_); + gl_->glNamedRenderbufferStorage(pick_depth_rbo_, GL_DEPTH_COMPONENT24, w, h); + gl_->glNamedFramebufferRenderbuffer(pick_fbo_, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, pick_depth_rbo_); + + pick_width_ = w; + pick_height_ = h; + } + + renderPickPass(); + + int px = x * devicePixelRatio(); + int py = (height() - y) * devicePixelRatio(); + uint32_t pixel = 0; + gl_->glGetTextureSubImage(pick_color_tex_, 0, px, py, 0, 1, 1, 1, GL_RED_INTEGER, GL_UNSIGNED_INT, sizeof(pixel), &pixel); + + return pixel; +} + +void ViewportWindow::updateCamera() { + float yaw_rad = qDegreesToRadians(camera_yaw_); + float pitch_rad = qDegreesToRadians(camera_pitch_); + + // IFC / Blender convention: X right, Y forward, Z up. + QVector3D eye; + eye.setX(camera_target_.x() + camera_distance_ * cosf(pitch_rad) * cosf(yaw_rad)); + eye.setY(camera_target_.y() + camera_distance_ * cosf(pitch_rad) * sinf(yaw_rad)); + eye.setZ(camera_target_.z() + camera_distance_ * sinf(pitch_rad)); + + view_matrix_.setToIdentity(); + view_matrix_.lookAt(eye, camera_target_, QVector3D(0, 0, 1)); + + proj_matrix_.setToIdentity(); + float aspect = width() > 0 ? float(width()) / float(height()) : 1.0f; + proj_matrix_.perspective(45.0f, aspect, 0.1f, camera_distance_ * 10.0f); +} + +void ViewportWindow::render() { + if (!gl_initialized_ || !isExposed()) return; + + context_->makeCurrent(this); + updateCamera(); + + int w = width() * devicePixelRatio(); + int h = height() * devicePixelRatio(); + gl_->glViewport(0, 0, w, h); + gl_->glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + QMatrix4x4 vp = proj_matrix_ * view_matrix_; + + gl_->glUseProgram(main_program_); + gl_->glUniformMatrix4fv(gl_->glGetUniformLocation(main_program_, "u_view_projection"), 1, GL_FALSE, vp.constData()); + gl_->glUniform3f(gl_->glGetUniformLocation(main_program_, "u_light_dir"), 0.3f, 0.5f, 0.8f); + gl_->glUniform1ui(gl_->glGetUniformLocation(main_program_, "u_selected_id"), selected_object_id_); + + gl_->glBindVertexArray(vao_); + + { + std::lock_guard lock(upload_mutex_); + if (total_index_count_ > 0) { + gl_->glDrawElements(GL_TRIANGLES, total_index_count_, GL_UNSIGNED_INT, nullptr); + } + } + + renderAxisGizmo(); + + context_->swapBuffers(this); +} + +void ViewportWindow::renderAxisGizmo() { + if (!axis_program_ || !axis_vao_) return; + + const int dpr = devicePixelRatio(); + const int gizmo_size = 110 * dpr; + const int margin = 10 * dpr; + + gl_->glViewport(margin, margin, gizmo_size, gizmo_size); + gl_->glDisable(GL_DEPTH_TEST); + + // Build a view matrix from the same camera orientation but with a fixed + // close-up distance, so the gizmo rotates with the scene camera. Z-up. + float yaw_rad = qDegreesToRadians(camera_yaw_); + float pitch_rad = qDegreesToRadians(camera_pitch_); + + QVector3D eye_dir; + eye_dir.setX(cosf(pitch_rad) * cosf(yaw_rad)); + eye_dir.setY(cosf(pitch_rad) * sinf(yaw_rad)); + eye_dir.setZ(sinf(pitch_rad)); + + QMatrix4x4 gizmo_view; + gizmo_view.lookAt(eye_dir * 3.0f, QVector3D(0, 0, 0), QVector3D(0, 0, 1)); + + QMatrix4x4 gizmo_proj; + gizmo_proj.ortho(-1.4f, 1.4f, -1.4f, 1.4f, 0.1f, 10.0f); + + QMatrix4x4 mvp = gizmo_proj * gizmo_view; + + gl_->glUseProgram(axis_program_); + gl_->glUniformMatrix4fv(gl_->glGetUniformLocation(axis_program_, "u_mvp"), 1, GL_FALSE, mvp.constData()); + + gl_->glLineWidth(2.5f); // ignored on some core-profile drivers, that's OK + gl_->glBindVertexArray(axis_vao_); + gl_->glDrawArrays(GL_LINES, 0, 6); + + gl_->glEnable(GL_DEPTH_TEST); +} + +void ViewportWindow::renderPickPass() { + gl_->glBindFramebuffer(GL_FRAMEBUFFER, pick_fbo_); + gl_->glViewport(0, 0, pick_width_, pick_height_); + + GLuint clear_val = 0; + gl_->glClearBufferuiv(GL_COLOR, 0, &clear_val); + gl_->glClear(GL_DEPTH_BUFFER_BIT); + + QMatrix4x4 vp = proj_matrix_ * view_matrix_; + gl_->glUseProgram(pick_program_); + gl_->glUniformMatrix4fv(gl_->glGetUniformLocation(pick_program_, "u_view_projection"), 1, GL_FALSE, vp.constData()); + + gl_->glBindVertexArray(vao_); + + { + std::lock_guard lock(upload_mutex_); + if (total_index_count_ > 0) { + gl_->glDrawElements(GL_TRIANGLES, total_index_count_, GL_UNSIGNED_INT, nullptr); + } + } + + gl_->glBindFramebuffer(GL_FRAMEBUFFER, 0); +} + +void ViewportWindow::exposeEvent(QExposeEvent*) { + if (isExposed() && !gl_initialized_) { + initGL(); + } +} + +void ViewportWindow::resizeEvent(QResizeEvent*) { + if (gl_initialized_) render(); +} + +bool ViewportWindow::event(QEvent* e) { + switch (e->type()) { + case QEvent::MouseButtonPress: + handleMousePress(static_cast(e)); + return true; + case QEvent::MouseButtonRelease: + handleMouseRelease(static_cast(e)); + return true; + case QEvent::MouseMove: + handleMouseMove(static_cast(e)); + return true; + case QEvent::Wheel: + handleWheel(static_cast(e)); + return true; + default: + return QWindow::event(e); + } +} + +void ViewportWindow::handleMousePress(QMouseEvent* e) { + active_button_ = e->button(); + last_mouse_pos_ = e->pos(); +} + +void ViewportWindow::handleMouseRelease(QMouseEvent* e) { + if (active_button_ == Qt::LeftButton && (e->pos() - last_mouse_pos_).manhattanLength() < 5) { + uint32_t id = pickObjectAt(e->pos().x(), e->pos().y()); + selected_object_id_ = id; + emit objectPicked(id); + } + active_button_ = Qt::NoButton; +} + +void ViewportWindow::handleMouseMove(QMouseEvent* e) { + QPoint delta = e->pos() - last_mouse_pos_; + last_mouse_pos_ = e->pos(); + + if (active_button_ == Qt::MiddleButton) { + if (e->modifiers() & Qt::ShiftModifier) { + // Pan in screen space, derived from the Z-up camera basis. + float pan_speed = camera_distance_ * 0.002f; + float yaw_rad = qDegreesToRadians(camera_yaw_); + float pitch_rad = qDegreesToRadians(camera_pitch_); + QVector3D right(-sinf(yaw_rad), cosf(yaw_rad), 0.0f); + QVector3D up( + -sinf(pitch_rad) * cosf(yaw_rad), + -sinf(pitch_rad) * sinf(yaw_rad), + cosf(pitch_rad)); + camera_target_ -= right * delta.x() * pan_speed; + camera_target_ += up * delta.y() * pan_speed; + } else { + // Orbit + camera_yaw_ -= delta.x() * 0.3f; + camera_pitch_ += delta.y() * 0.3f; + camera_pitch_ = qBound(-89.0f, camera_pitch_, 89.0f); + } + } +} + +void ViewportWindow::handleWheel(QWheelEvent* e) { + float factor = e->angleDelta().y() > 0 ? 0.9f : 1.1f; + camera_distance_ *= factor; + camera_distance_ = qMax(0.1f, camera_distance_); +} diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h new file mode 100644 index 00000000000..cb718050c8f --- /dev/null +++ b/src/ifcviewer/ViewportWindow.h @@ -0,0 +1,146 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#ifndef VIEWPORTWINDOW_H +#define VIEWPORTWINDOW_H + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +struct MaterialInfo { + float r = 0.75f, g = 0.75f, b = 0.78f, a = 1.0f; +}; + +struct UploadChunk { + // Interleaved per-vertex layout (8 floats / 32 bytes per vertex): + // pos(3 float) + normal(3 float) + object_id(1 float bitcast from uint) + // + color(1 float holding RGBA8 packed bytes, read on the GPU as + // GL_UNSIGNED_BYTE * 4 normalized). + std::vector vertices; + std::vector indices; // local to this chunk's vertices + uint32_t object_id = 0; +}; + +class ViewportWindow : public QWindow { + Q_OBJECT +public: + explicit ViewportWindow(QWindow* parent = nullptr); + ~ViewportWindow(); + + void uploadChunk(const UploadChunk& chunk); + void resetScene(); + + void setSelectedObjectId(uint32_t id); + uint32_t pickObjectAt(int x, int y); + +signals: + void objectPicked(uint32_t object_id); + void initialized(); + +protected: + void exposeEvent(QExposeEvent* event) override; + void resizeEvent(QResizeEvent* event) override; + bool event(QEvent* event) override; + +private: + void initGL(); + void render(); + void renderPickPass(); + void renderAxisGizmo(); + void updateCamera(); + void buildShaders(); + void buildAxisGizmo(); + bool growVbo(size_t needed_total); + bool growEbo(size_t needed_total); + + // Mouse interaction + void handleMousePress(QMouseEvent* event); + void handleMouseRelease(QMouseEvent* event); + void handleMouseMove(QMouseEvent* event); + void handleWheel(QWheelEvent* event); + + QOpenGLContext* context_ = nullptr; + QOpenGLFunctions_4_5_Core* gl_ = nullptr; + QTimer render_timer_; + QElapsedTimer frame_clock_; + bool gl_initialized_ = false; + + // Shaders + GLuint main_program_ = 0; + GLuint pick_program_ = 0; + GLuint axis_program_ = 0; + + // Axis gizmo (separate VAO/VBO since vertex layout differs from scene) + GLuint axis_vao_ = 0; + GLuint axis_vbo_ = 0; + + // Geometry buffers - one big buffer pair + GLuint vao_ = 0; + GLuint vbo_ = 0; + GLuint ebo_ = 0; + size_t vbo_capacity_ = 0; + size_t ebo_capacity_ = 0; + size_t vbo_used_ = 0; // in bytes + size_t ebo_used_ = 0; // in bytes + uint32_t vertex_count_ = 0; + + // Pick framebuffer + GLuint pick_fbo_ = 0; + GLuint pick_color_tex_ = 0; + GLuint pick_depth_rbo_ = 0; + int pick_width_ = 0; + int pick_height_ = 0; + + // The entire scene is a single mega-batch: per-vertex color removes the + // need to switch materials between draw calls. Indices are written into + // the EBO already offset by base_vertex so one glDrawElements covers all. + uint32_t total_index_count_ = 0; + std::mutex upload_mutex_; + + // Camera + QVector3D camera_target_{0, 0, 0}; + float camera_distance_ = 50.0f; + float camera_yaw_ = 45.0f; + float camera_pitch_ = 30.0f; + QMatrix4x4 view_matrix_; + QMatrix4x4 proj_matrix_; + + // Mouse state + Qt::MouseButton active_button_ = Qt::NoButton; + QPoint last_mouse_pos_; + + // Selection + uint32_t selected_object_id_ = 0; + bool pick_requested_ = false; + int pick_x_ = 0, pick_y_ = 0; + + // Stats + uint32_t total_triangles_ = 0; +}; + +#endif // VIEWPORTWINDOW_H diff --git a/src/ifcviewer/main.cpp b/src/ifcviewer/main.cpp new file mode 100644 index 00000000000..3bca693a371 --- /dev/null +++ b/src/ifcviewer/main.cpp @@ -0,0 +1,55 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#include +#include +#include + +#include "MainWindow.h" + +int main(int argc, char* argv[]) { + QApplication app(argc, argv); + app.setApplicationName("IfcViewer"); + app.setOrganizationName("IfcOpenShell"); + + // Request OpenGL 4.5 Core globally + QSurfaceFormat fmt; + fmt.setVersion(4, 5); + fmt.setProfile(QSurfaceFormat::CoreProfile); + fmt.setDepthBufferSize(24); + fmt.setSwapBehavior(QSurfaceFormat::DoubleBuffer); + fmt.setSamples(4); + QSurfaceFormat::setDefaultFormat(fmt); + + QCommandLineParser parser; + parser.setApplicationDescription("IfcOpenShell IFC Viewer"); + parser.addHelpOption(); + parser.addPositionalArgument("file", "IFC file to open"); + parser.process(app); + + MainWindow window; + window.show(); + + auto args = parser.positionalArguments(); + if (!args.isEmpty()) { + window.openFile(args.first()); + } + + return app.exec(); +} From 3c2a5035ac7430fa7be5d7cf8ef9b02828897634 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sat, 11 Apr 2026 19:22:56 +1000 Subject: [PATCH 04/37] More hacks to compile --- src/ifcgeom/mapping/IfcOffsetCurveByDistance.cpp | 2 +- src/ifcparse/hierarchy_helper.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ifcgeom/mapping/IfcOffsetCurveByDistance.cpp b/src/ifcgeom/mapping/IfcOffsetCurveByDistance.cpp index 1a0dc34e388..666f4b61189 100644 --- a/src/ifcgeom/mapping/IfcOffsetCurveByDistance.cpp +++ b/src/ifcgeom/mapping/IfcOffsetCurveByDistance.cpp @@ -158,7 +158,7 @@ taxonomy::ptr mapping::map_impl(const IfcSchema::IfcOffsetCurveByDistances& inst // at this point, next == end and prev == end-1 #if defined SCHEMA_HAS_IfcDistanceExpression - double last_distance = (*prev)->DistanceAlong() * length_unit_; + double last_distance = (*prev).DistanceAlong() * length_unit_; #else double last_distance = (double) prev->DistanceAlong().as() * length_unit_; #endif diff --git a/src/ifcparse/hierarchy_helper.cpp b/src/ifcparse/hierarchy_helper.cpp index 9d7db7a2d01..1f98c0a9e6d 100644 --- a/src/ifcparse/hierarchy_helper.cpp +++ b/src/ifcparse/hierarchy_helper.cpp @@ -625,7 +625,7 @@ Ifc4x3::IfcStyledItem create_styled_item(ifcopenshell::file* file, const Ifc4x3: #ifdef HAS_SCHEMA_4x3_tc1 Ifc4x3_tc1::IfcStyledItem create_styled_item(ifcopenshell::file* file, const Ifc4x3_tc1::IfcRepresentationItem& item, const Ifc4x3_tc1::IfcPresentationStyle& style) { - auto sitem = file.crefile->createate(); + auto sitem = file->create(); sitem.setItem(item); sitem.setStyles(std::vector{style}); return sitem; From fe498ed332e0ad4119d0a6e004785e8991c7c01a Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sat, 11 Apr 2026 19:23:10 +1000 Subject: [PATCH 05/37] Update ifcviewer to compile with datamodel refactor --- src/ifcviewer/GeometryStreamer.cpp | 4 ++-- src/ifcviewer/GeometryStreamer.h | 6 +++--- src/ifcviewer/MainWindow.cpp | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/ifcviewer/GeometryStreamer.cpp b/src/ifcviewer/GeometryStreamer.cpp index 39698c84e62..437209c9098 100644 --- a/src/ifcviewer/GeometryStreamer.cpp +++ b/src/ifcviewer/GeometryStreamer.cpp @@ -94,7 +94,7 @@ std::vector GeometryStreamer::drainElements() { void GeometryStreamer::run(const std::string& path, int num_threads) { try { - ifc_file_ = std::make_unique(path); + ifc_file_ = std::make_unique(path); } catch (const std::exception& e) { emit errorOccurred(QString("Failed to parse IFC file: %1").arg(e.what())); return; @@ -112,7 +112,7 @@ void GeometryStreamer::run(const std::string& path, int num_threads) { auto kernel = ifcopenshell::geometry::kernels::construct( ifc_file_.get(), geometry_library, settings); iterator = std::make_unique( - std::move(kernel), settings, ifc_file_.get(), std::vector(), num_threads); + std::move(kernel), settings, ifc_file_.get(), std::vector(), num_threads); } catch (const std::exception& e) { emit errorOccurred(QString("Failed to create geometry iterator: %1").arg(e.what())); return; diff --git a/src/ifcviewer/GeometryStreamer.h b/src/ifcviewer/GeometryStreamer.h index 06b6364a244..abd087463cc 100644 --- a/src/ifcviewer/GeometryStreamer.h +++ b/src/ifcviewer/GeometryStreamer.h @@ -31,7 +31,7 @@ #include #include -#include "../ifcparse/IfcFile.h" +#include "../ifcparse/file.h" #include "../ifcgeom/Iterator.h" #include "ViewportWindow.h" @@ -57,7 +57,7 @@ class GeometryStreamer : public QObject { bool isRunning() const { return running_.load(); } int progress() const { return progress_.load(); } - IfcParse::IfcFile* ifcFile() const { return ifc_file_.get(); } + ifcopenshell::file* ifcFile() const { return ifc_file_.get(); } // Thread-safe access to discovered elements std::vector drainElements(); @@ -73,7 +73,7 @@ class GeometryStreamer : public QObject { UploadChunk convertElement(const IfcGeom::TriangulationElement* elem, uint32_t object_id); - std::unique_ptr ifc_file_; + std::unique_ptr ifc_file_; std::unique_ptr worker_thread_; std::atomic running_{false}; std::atomic cancel_requested_{false}; diff --git a/src/ifcviewer/MainWindow.cpp b/src/ifcviewer/MainWindow.cpp index 1f32ce0877c..6eede353532 100644 --- a/src/ifcviewer/MainWindow.cpp +++ b/src/ifcviewer/MainWindow.cpp @@ -244,23 +244,23 @@ void MainWindow::populateProperties(uint32_t object_id) { auto* file = streamer_->ifcFile(); if (!file) return; - auto* product = file->instance_by_id(info.ifc_id); + auto product = file->instance_by_id(info.ifc_id); if (!product) return; // Show all direct attributes - auto& decl = product->declaration(); + auto& decl = product.declaration(); if (auto* entity = decl.as_entity()) { for (size_t i = 0; i < entity->attribute_count(); ++i) { auto* attr = entity->attribute_by_index(i); try { - auto val = product->get_attribute_value(i); + auto val = product.get_attribute_value(i); if (!val.isNull()) { std::string str_val; try { str_val = static_cast(val); } catch (...) { // Not a string-convertible attribute (entity ref, aggregate, etc.) - str_val = "<" + std::string(IfcUtil::ArgumentTypeToString(val.type())) + ">"; + str_val = "<" + std::string(ifcopenshell::argument_type_to_string(val.type())) + ">"; } addRow(QString::fromStdString(attr->name()), QString::fromStdString(str_val)); } From 750cc89964cf3829b3ee31184d3a3f05c6f33338 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sat, 11 Apr 2026 19:50:44 +1000 Subject: [PATCH 06/37] Plan out performance strategy --- src/ifcviewer/README.md | 335 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 329 insertions(+), 6 deletions(-) diff --git a/src/ifcviewer/README.md b/src/ifcviewer/README.md index b9194cefd1a..9c6c52560ce 100644 --- a/src/ifcviewer/README.md +++ b/src/ifcviewer/README.md @@ -117,13 +117,336 @@ make -j$(nproc) | Ctrl+O | Open file | | Ctrl+Q | Quit | +## Performance Strategy + +The viewer targets smooth orbiting at 60 fps on models up to 1 million IFC objects. +Rendering performance is addressed in three phases. Each phase builds on the +previous one, and the system is designed so that smaller models never pay for +optimizations they don't need. + +### Phase 1: Per-Object Frustum Culling (CPU) + +**Status:** Implemented. + +The simplest win: don't draw what's off screen. + +#### Data model + +During `uploadChunk()`, the viewport records a small metadata struct for every +object that enters the GPU buffers: + +```cpp +struct ObjectDrawInfo { + uint32_t index_offset; // byte offset into the shared EBO + uint32_t index_count; // number of indices (triangles * 3) + float aabb_min[3]; // world-space axis-aligned bounding box + float aabb_max[3]; // (computed from vertex positions at upload time) +}; +``` + +This costs 32 bytes per object. For 1M objects that's ~32 MB of CPU-side +metadata — negligible next to the vertex data. + +#### Frustum extraction + +Each frame, before drawing, six clip planes are extracted from the +view-projection matrix (`VP = proj * view`). The standard Griess-Hartmann +method pulls them directly from the matrix rows: + +``` +left = VP[3] + VP[0] +right = VP[3] - VP[0] +bottom = VP[3] + VP[1] +top = VP[3] - VP[1] +near = VP[3] + VP[2] +far = VP[3] - VP[2] +``` + +Each plane is stored as (a, b, c, d) and normalized so that +`a*x + b*y + c*z + d` gives the signed distance from the plane. + +#### AABB-frustum test + +For each object, the AABB is tested against all six planes using the +"p-vertex / n-vertex" method: + +- For each plane, find the AABB corner most in the direction of the plane + normal (the p-vertex). +- If the p-vertex is on the negative side of the plane, the entire AABB is + outside the frustum → cull. +- If any plane culls the object, skip it. + +This test is conservative: it never culls a visible object, but may +occasionally keep an invisible one (when the AABB straddles a frustum corner). +That's fine — false positives just cost a few extra triangles. + +#### Drawing visible objects + +The surviving objects' `(index_count, index_offset)` pairs are passed to +`glMultiDrawElements()` in a single call. This replaces the previous single +`glDrawElements()` that drew everything. The GPU processes only the index +ranges that survived the frustum test. + +Alternatively, for the pick pass (which runs less frequently), the same +visibility list is reused — objects culled from the main pass are also culled +from picking. + +#### Performance characteristics + +| Metric | Value | +|--------|-------| +| Per-object cost | ~6 dot products + 6 comparisons per frame | +| 50k objects | ~0.3 ms on a modern CPU core | +| 500k objects | ~3 ms (starts to matter at 60 fps) | +| 1M objects | ~6 ms (too expensive — need phase 3) | +| Memory overhead | 32 bytes/object | +| Load-time overhead | Near zero (AABB computed during existing upload) | + +Phase 1 is sufficient for models up to ~100k objects. Beyond that, the CPU-side +frustum test becomes a measurable fraction of the frame budget, motivating +phase 3. + +### Phase 2: Spatial Tiling (optional, for large models) + +For models exceeding ~10k objects, spatial tiling groups nearby objects into +tiles and culls at the tile level rather than per-object. This reduces the +number of frustum tests from N_objects to N_tiles (typically hundreds to low +thousands). + +#### When tiling activates + +Tiling is **optional and non-disruptive**. The system treats a non-tiled model +as the degenerate case of "one tile containing everything" — the rendering loop +always iterates tiles, so no separate code path is needed. + +Tiling activates in one of three ways: + +1. **Preprocessed cache exists**: If a `.ifcview` sidecar file is found next to + the `.ifc` file, the tile structure is loaded from it instantly. The model + uploads geometry in tile order. +2. **Automatic by size**: If the model has more than a configurable threshold of + objects (default 10k), a background task builds the spatial tree after + initial loading completes. Until it finishes, phase 1 culling handles + visibility. +3. **Explicit user action**: A "preprocess for performance" option builds the + spatial tree and saves the sidecar for future loads. + +#### Spatial subdivision + +The world-space bounding box of the entire model is subdivided using a +**loose octree**: + +- The root node covers the scene AABB. +- Each node is split when it contains more than a threshold number of objects + (e.g. 256). +- Objects are assigned to the smallest node that fully contains their AABB. +- "Loose" bounds (inflated by 1.5x) reduce the number of objects that span + multiple nodes. +- Leaf nodes become tiles. + +An octree adapts to non-uniform object density (common in buildings — lots of +detail in MEP risers, sparse in open atriums) better than a uniform grid. + +#### EBO re-sorting + +For tile-level culling to translate into contiguous index ranges, the EBO must +be sorted so that all indices for objects in the same tile are adjacent. + +This happens via **deferred compaction**: + +1. During initial load, geometry uploads in iterator order (fast first frame, + phase 1 culling active). +2. After loading completes, a background thread: + a. Builds the octree from the per-object AABBs (already computed in phase 1). + b. Determines the tile for each object. + c. Computes the new index order (sorted by tile, then by object within tile). + d. Builds a new EBO on the CPU. +3. The main thread uploads the new EBO in one `glNamedBufferSubData` call and + swaps in the tile metadata. One frame of stutter, bounded by EBO upload + time. + +The per-tile metadata: + +```cpp +struct TileInfo { + float aabb_min[3]; // tile bounding box (union of contained AABBs) + float aabb_max[3]; + uint32_t index_offset; // into the re-sorted EBO + uint32_t index_count; // sum of all contained objects' indices + uint32_t object_count; // for stats / debugging +}; +``` + +#### Preprocessed sidecar format + +The `.ifcview` file stores: + +- Octree structure (node hierarchy, split planes). +- Per-object tile assignment (object_id → tile_id mapping). +- Per-tile index order (so the EBO can be built in tile order directly during + upload, skipping the compaction pass entirely). +- File hash of the source `.ifc` (invalidation check). + +This makes second-and-subsequent loads of the same model significantly faster: +the spatial tree doesn't need to be rebuilt, and geometry uploads in tile order +from the start. + +#### Performance characteristics + +| Metric | Value | +|--------|-------| +| Tile count (typical) | 500–5,000 for a large building | +| Per-frame frustum tests | N_tiles instead of N_objects | +| 500k objects, ~2k tiles | ~0.01 ms frustum testing | +| Memory overhead | ~64 bytes/tile + 32 bytes/object (phase 1 metadata retained) | +| Background compaction | 1–5 seconds for 1M objects (single-threaded) | +| Sidecar file size | ~10–50 KB (indices + tree, no geometry) | + +#### Spatial coherence bonus + +Beyond culling, tile-sorted EBOs improve GPU cache performance. When the GPU +rasterizes a tile's triangles, the vertices are contiguous in the VBO, so the +post-transform vertex cache hits more often. This can yield 10–20% rasterization +speedup even when nothing is culled (e.g. zoomed out to see the whole model). + +### Phase 3: GPU-Driven Indirect Draw + +For models with 500k+ objects, even tile-level CPU culling is fast, but the +real bottleneck shifts to draw call submission. Phase 3 moves all per-frame +visibility decisions to the GPU via compute shaders and indirect draw commands. + +#### How it works + +Phase 3 is **approach 2 layered on top of approach 3**. It does not replace +tiling — it accelerates it. + +1. **Upload phase** (once, at load time): + - Per-tile AABBs are uploaded to a GPU SSBO (`tile_aabbs`). + - One `DrawElementsIndirectCommand` per tile is written to an indirect draw + buffer: + ```c + struct DrawElementsIndirectCommand { + uint count; // tile's total index count + uint instanceCount; // 1 + uint firstIndex; // offset into EBO + uint baseVertex; // 0 (indices are global) + uint baseInstance; // tile_id (available in shader via gl_DrawID) + }; + ``` + - A "template" copy of the indirect buffer is kept so the compute shader + can reset culled commands each frame without re-uploading from CPU. + +2. **Cull phase** (every frame, on the GPU): + - The CPU uploads 6 frustum plane vec4s as a uniform or small UBO. + - A compute shader dispatches `ceil(N_tiles / 64)` workgroups: + ```glsl + layout(local_size_x = 64) in; + + void main() { + uint tile_id = gl_GlobalInvocationID.x; + if (tile_id >= tile_count) return; + + // Copy from template (resets any previously zeroed commands) + commands[tile_id] = template_commands[tile_id]; + + // Frustum test + if (!aabb_vs_frustum(tile_aabbs[tile_id], frustum_planes)) { + commands[tile_id].count = 0; // culled: GPU skips zero-count draws + } + } + ``` + - A memory barrier ensures the indirect buffer is visible to the draw stage. + +3. **Draw phase** (every frame): + - One call: `glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, + nullptr, N_tiles, 0)`. + - The GPU reads the indirect buffer, skips tiles with `count == 0`, and + draws the rest. Zero CPU-side per-object or per-tile work. + +#### What the CPU does per frame + +1. Upload 6 vec4 frustum planes (96 bytes). +2. Dispatch one compute shader. +3. Issue one `glMultiDrawElementsIndirect`. +4. Swap buffers. + +That's it. The CPU frame time is essentially constant regardless of model size. + +#### Future extensions (enabled by this architecture) + +Once the compute-based cull pass exists, it's straightforward to add: + +- **Hierarchical-Z occlusion culling**: render a coarse depth buffer from the + previous frame, then test tile AABBs against it in the compute shader. Tiles + fully behind closer geometry get culled. This handles interior-heavy BIM + models well (most rooms are occluded from any given viewpoint). +- **Distance-based LOD**: the compute shader can select different index ranges + (coarse vs. fine tessellation) per tile based on distance to camera. +- **Contribution culling**: tiles whose screen-space projection is below a + pixel threshold get `count = 0`. Removes distant small objects. + +#### Performance characteristics + +| Metric | Value | +|--------|-------| +| CPU per-frame work | ~0.01 ms (constant, independent of model size) | +| GPU compute dispatch | ~0.02 ms for 2k tiles | +| Draw call overhead | 1 indirect multi-draw call | +| GPU memory overhead | ~48 bytes/tile (AABB SSBO) + 20 bytes/tile (indirect commands) × 2 (template + live) | +| Total for 2k tiles | ~176 KB GPU memory | +| Implementation complexity | High (compute shaders, SSBOs, memory barriers, indirect draw) | + +#### When to use + +Phase 3 is worthwhile when: + +- The model has 500k+ objects (CPU frustum testing > 3 ms). +- Smooth 60 fps orbiting is required during interaction. +- The GPU has compute shader support (OpenGL 4.3+, which is guaranteed since + the viewer requires 4.5). + +For models under 100k objects, phase 1 alone is sufficient. For 100k–500k, +phase 2 (tiling) keeps CPU culling under 1 ms. Phase 3 is the final step that +makes the CPU frame time constant. + +### Summary + +``` +Model size Active phases CPU cull cost Draw calls +───────────── ────────────── ────────────── ────────── +< 10k objects Phase 1 ~0.06 ms 1 multi-draw +10k–100k Phase 1 ~0.6 ms 1 multi-draw +100k–500k Phase 1 + 2 ~0.01 ms 1 multi-draw +500k–1M+ Phase 1 + 2 + 3 ~0 (GPU) 1 indirect multi-draw +``` + +The load path: + +``` +open(model.ifc): + ├─ sidecar exists? + │ ├─ yes: load tile tree from .ifcview + │ │ upload geometry in tile order + │ │ (skip background compaction) + │ └─ no: upload geometry in iterator order (fast first frame) + │ phase 1 culling active immediately + │ if object_count > threshold: + │ background: build octree, re-sort EBO, save .ifcview + │ on completion: swap in tile structure + └─ rendering: + ├─ phase 3 available? → compute cull + indirect multi-draw + └─ else → CPU frustum test + glMultiDrawElements +``` + ## Roadmap -- [ ] Material color support (currently renders default grey per batch) -- [ ] Buffer growth (reallocate when 64 MB VBO fills up) -- [ ] `glMultiDrawElementsIndirect` for fewer draw calls +- [x] Material color support (per-vertex RGBA8) +- [x] Buffer growth (dynamic VBO/EBO resizing up to 4 GB) +- [x] Per-object frustum culling (phase 1) +- [ ] Spatial tiling with octree (phase 2) +- [ ] GPU-driven indirect draw (phase 3) +- [ ] Preprocessed `.ifcview` sidecar for fast re-loads +- [ ] Hierarchical-Z occlusion culling +- [ ] Distance-based LOD selection - [ ] Vulkan/MoltenVK backend for macOS -- [ ] Spatial tree (BVH) for frustum culling -- [ ] LOD: coarse tessellation during streaming, refine in background - [ ] Embedded Python scripting console -- [ ] CJK text input support (Qt6 handles this natively) From 8bd1dee43be71a2d949631089cdd9e28e7c5630a Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sat, 11 Apr 2026 19:50:46 +1000 Subject: [PATCH 07/37] Per-object frustum culling with glMultiDrawElements Track per-object AABB and index range during upload. Each frame, extract frustum planes from the view-projection matrix and cull objects whose AABB is entirely outside any plane. Draw only visible objects via glMultiDrawElements. Document the three-phase rendering performance strategy in README.md. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/ViewportWindow.cpp | 104 ++++++++++++++++++++++++++++--- src/ifcviewer/ViewportWindow.h | 17 ++++- 2 files changed, 108 insertions(+), 13 deletions(-) diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index 99624cb9f54..414b9889fa5 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -26,7 +26,9 @@ #include #include +#include #include +#include static const size_t INITIAL_VBO_SIZE = 64 * 1024 * 1024; // 64 MB static const size_t INITIAL_EBO_SIZE = 32 * 1024 * 1024; // 32 MB @@ -421,9 +423,31 @@ void ViewportWindow::uploadChunk(const UploadChunk& chunk) { } gl_->glNamedBufferSubData(ebo_, ebo_used_, ib_size, global_indices.data()); + // Compute AABB from vertex positions in this chunk. + ObjectDrawInfo info; + info.index_offset = static_cast(ebo_used_); + info.index_count = static_cast(chunk.indices.size()); + + const size_t num_verts = chunk.vertices.size() / VERTEX_STRIDE; + if (num_verts > 0) { + info.aabb_min[0] = info.aabb_min[1] = info.aabb_min[2] = std::numeric_limits::max(); + info.aabb_max[0] = info.aabb_max[1] = info.aabb_max[2] = -std::numeric_limits::max(); + for (size_t v = 0; v < num_verts; ++v) { + const float* pos = &chunk.vertices[v * VERTEX_STRIDE]; + for (int a = 0; a < 3; ++a) { + if (pos[a] < info.aabb_min[a]) info.aabb_min[a] = pos[a]; + if (pos[a] > info.aabb_max[a]) info.aabb_max[a] = pos[a]; + } + } + } else { + info.aabb_min[0] = info.aabb_min[1] = info.aabb_min[2] = 0.0f; + info.aabb_max[0] = info.aabb_max[1] = info.aabb_max[2] = 0.0f; + } + { std::lock_guard lock(upload_mutex_); total_index_count_ += static_cast(chunk.indices.size()); + object_draw_info_.push_back(info); } vbo_used_ += vb_size; @@ -442,6 +466,7 @@ void ViewportWindow::resetScene() { vertex_count_ = 0; total_triangles_ = 0; selected_object_id_ = 0; + object_draw_info_.clear(); } void ViewportWindow::setSelectedObjectId(uint32_t id) { @@ -504,6 +529,63 @@ void ViewportWindow::updateCamera() { proj_matrix_.perspective(45.0f, aspect, 0.1f, camera_distance_ * 10.0f); } +void ViewportWindow::buildVisibleList(const QMatrix4x4& vp) { + visible_counts_.clear(); + visible_offsets_.clear(); + + std::lock_guard lock(upload_mutex_); + if (object_draw_info_.empty()) return; + + // Extract 6 frustum planes from the view-projection matrix. + // Each plane is (a, b, c, d) where ax + by + cz + d >= 0 is inside. + // QMatrix4x4 is stored column-major; operator(row, col) gives element. + float planes[6][4]; + for (int i = 0; i < 4; ++i) { + planes[0][i] = vp(3, i) + vp(0, i); // left + planes[1][i] = vp(3, i) - vp(0, i); // right + planes[2][i] = vp(3, i) + vp(1, i); // bottom + planes[3][i] = vp(3, i) - vp(1, i); // top + planes[4][i] = vp(3, i) + vp(2, i); // near + planes[5][i] = vp(3, i) - vp(2, i); // far + } + // Normalize planes. + for (int p = 0; p < 6; ++p) { + float len = std::sqrt(planes[p][0] * planes[p][0] + + planes[p][1] * planes[p][1] + + planes[p][2] * planes[p][2]); + if (len > 0.0f) { + float inv = 1.0f / len; + planes[p][0] *= inv; + planes[p][1] *= inv; + planes[p][2] *= inv; + planes[p][3] *= inv; + } + } + + visible_counts_.reserve(object_draw_info_.size()); + visible_offsets_.reserve(object_draw_info_.size()); + + for (const auto& obj : object_draw_info_) { + bool visible = true; + for (int p = 0; p < 6; ++p) { + // p-vertex: the AABB corner most in the direction of the plane normal. + float px = planes[p][0] >= 0.0f ? obj.aabb_max[0] : obj.aabb_min[0]; + float py = planes[p][1] >= 0.0f ? obj.aabb_max[1] : obj.aabb_min[1]; + float pz = planes[p][2] >= 0.0f ? obj.aabb_max[2] : obj.aabb_min[2]; + float dist = planes[p][0] * px + planes[p][1] * py + planes[p][2] * pz + planes[p][3]; + if (dist < 0.0f) { + visible = false; + break; + } + } + if (visible) { + visible_counts_.push_back(static_cast(obj.index_count)); + visible_offsets_.push_back(reinterpret_cast( + static_cast(obj.index_offset))); + } + } +} + void ViewportWindow::render() { if (!gl_initialized_ || !isExposed()) return; @@ -524,11 +606,12 @@ void ViewportWindow::render() { gl_->glBindVertexArray(vao_); - { - std::lock_guard lock(upload_mutex_); - if (total_index_count_ > 0) { - gl_->glDrawElements(GL_TRIANGLES, total_index_count_, GL_UNSIGNED_INT, nullptr); - } + buildVisibleList(vp); + if (!visible_counts_.empty()) { + gl_->glMultiDrawElements(GL_TRIANGLES, + visible_counts_.data(), GL_UNSIGNED_INT, + visible_offsets_.data(), + static_cast(visible_counts_.size())); } renderAxisGizmo(); @@ -588,11 +671,12 @@ void ViewportWindow::renderPickPass() { gl_->glBindVertexArray(vao_); - { - std::lock_guard lock(upload_mutex_); - if (total_index_count_ > 0) { - gl_->glDrawElements(GL_TRIANGLES, total_index_count_, GL_UNSIGNED_INT, nullptr); - } + // Reuse the visible list from the most recent render() call. + if (!visible_counts_.empty()) { + gl_->glMultiDrawElements(GL_TRIANGLES, + visible_counts_.data(), GL_UNSIGNED_INT, + visible_offsets_.data(), + static_cast(visible_counts_.size())); } gl_->glBindFramebuffer(GL_FRAMEBUFFER, 0); diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index cb718050c8f..363158b16f7 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -36,6 +36,13 @@ struct MaterialInfo { float r = 0.75f, g = 0.75f, b = 0.78f, a = 1.0f; }; +struct ObjectDrawInfo { + uint32_t index_offset; // byte offset into EBO + uint32_t index_count; // number of indices + float aabb_min[3]; // world-space AABB + float aabb_max[3]; +}; + struct UploadChunk { // Interleaved per-vertex layout (8 floats / 32 bytes per vertex): // pos(3 float) + normal(3 float) + object_id(1 float bitcast from uint) @@ -77,6 +84,7 @@ class ViewportWindow : public QWindow { void buildAxisGizmo(); bool growVbo(size_t needed_total); bool growEbo(size_t needed_total); + void buildVisibleList(const QMatrix4x4& vp); // Mouse interaction void handleMousePress(QMouseEvent* event); @@ -116,12 +124,15 @@ class ViewportWindow : public QWindow { int pick_width_ = 0; int pick_height_ = 0; - // The entire scene is a single mega-batch: per-vertex color removes the - // need to switch materials between draw calls. Indices are written into - // the EBO already offset by base_vertex so one glDrawElements covers all. + // Per-object draw metadata for frustum culling. + std::vector object_draw_info_; uint32_t total_index_count_ = 0; std::mutex upload_mutex_; + // Scratch buffers reused each frame to avoid allocation. + std::vector visible_counts_; + std::vector visible_offsets_; + // Camera QVector3D camera_target_{0, 0, 0}; float camera_distance_ = 50.0f; From 6a001840bcb5a4e68f58ca1f1b2dad485a9845ac Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sat, 11 Apr 2026 20:05:50 +1000 Subject: [PATCH 08/37] Add performance stats overlay in status bar Show FPS, frame time, visible/total objects, and visible/total triangles in the status bar. Toggled via Settings > Show Performance Stats, persisted in app settings. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/AppSettings.cpp | 14 ++++++++++++++ src/ifcviewer/AppSettings.h | 5 +++++ src/ifcviewer/MainWindow.cpp | 21 +++++++++++++++++++++ src/ifcviewer/MainWindow.h | 1 + src/ifcviewer/SettingsWindow.cpp | 6 ++++++ src/ifcviewer/SettingsWindow.h | 2 ++ src/ifcviewer/ViewportWindow.cpp | 21 +++++++++++++++++++++ src/ifcviewer/ViewportWindow.h | 14 ++++++++++++++ 8 files changed, 84 insertions(+) diff --git a/src/ifcviewer/AppSettings.cpp b/src/ifcviewer/AppSettings.cpp index 07c5f8c3bc2..af1edfa36f6 100644 --- a/src/ifcviewer/AppSettings.cpp +++ b/src/ifcviewer/AppSettings.cpp @@ -24,6 +24,7 @@ namespace { constexpr const char* kGeometryLibraryKey = "geometry/library"; constexpr const char* kGeometryLibraryDefault = "hybrid-cgal-simple-opencascade"; +constexpr const char* kShowStatsKey = "viewport/show_stats"; } AppSettings& AppSettings::instance() { @@ -46,12 +47,25 @@ void AppSettings::setGeometryLibrary(const QString& value) { emit geometryLibraryChanged(value); } +bool AppSettings::showStats() const { + return show_stats_; +} + +void AppSettings::setShowStats(bool value) { + if (show_stats_ == value) return; + show_stats_ = value; + persist(); + emit showStatsChanged(value); +} + void AppSettings::load() { QSettings settings; geometry_library_ = settings.value(kGeometryLibraryKey, kGeometryLibraryDefault).toString(); + show_stats_ = settings.value(kShowStatsKey, false).toBool(); } void AppSettings::persist() { QSettings settings; settings.setValue(kGeometryLibraryKey, geometry_library_); + settings.setValue(kShowStatsKey, show_stats_); } diff --git a/src/ifcviewer/AppSettings.h b/src/ifcviewer/AppSettings.h index 9658c10b955..f70062475c6 100644 --- a/src/ifcviewer/AppSettings.h +++ b/src/ifcviewer/AppSettings.h @@ -34,8 +34,12 @@ class AppSettings : public QObject { QString geometryLibrary() const; void setGeometryLibrary(const QString& value); + bool showStats() const; + void setShowStats(bool value); + signals: void geometryLibraryChanged(const QString& value); + void showStatsChanged(bool value); private: AppSettings(); @@ -43,6 +47,7 @@ class AppSettings : public QObject { void persist(); QString geometry_library_; + bool show_stats_ = false; }; #endif // APPSETTINGS_H diff --git a/src/ifcviewer/MainWindow.cpp b/src/ifcviewer/MainWindow.cpp index 6eede353532..4abd929b0b8 100644 --- a/src/ifcviewer/MainWindow.cpp +++ b/src/ifcviewer/MainWindow.cpp @@ -18,6 +18,7 @@ ********************************************************************************/ #include "MainWindow.h" +#include "AppSettings.h" #include "SettingsWindow.h" #include @@ -43,6 +44,23 @@ MainWindow::MainWindow(QWidget* parent) QMessageBox::warning(this, "Error", msg); }, Qt::QueuedConnection); + connect(viewport_, &ViewportWindow::frameStatsUpdated, this, [this](const ViewportWindow::FrameStats& s) { + if (!stats_label_->isVisible()) return; + stats_label_->setText( + QString("%1 fps | %2 ms | %3/%4 obj | %5/%6 tri") + .arg(s.fps, 0, 'f', 1) + .arg(s.frame_time_ms, 0, 'f', 1) + .arg(s.visible_objects) + .arg(s.total_objects) + .arg(s.visible_triangles) + .arg(s.total_triangles)); + }); + + connect(&AppSettings::instance(), &AppSettings::showStatsChanged, this, [this](bool show) { + stats_label_->setVisible(show); + if (!show) stats_label_->clear(); + }); + connect(&element_poll_timer_, &QTimer::timeout, this, &MainWindow::pollNewElements); element_poll_timer_.setInterval(100); @@ -91,7 +109,10 @@ void MainWindow::setupUi() { progress_bar_->setMaximumWidth(200); progress_bar_->setVisible(false); status_label_ = new QLabel("Ready"); + stats_label_ = new QLabel(); + stats_label_->setVisible(AppSettings::instance().showStats()); statusBar()->addWidget(status_label_, 1); + statusBar()->addPermanentWidget(stats_label_); statusBar()->addPermanentWidget(progress_bar_); } diff --git a/src/ifcviewer/MainWindow.h b/src/ifcviewer/MainWindow.h index d5f4c18a395..bbec6ce83de 100644 --- a/src/ifcviewer/MainWindow.h +++ b/src/ifcviewer/MainWindow.h @@ -66,6 +66,7 @@ private slots: QTableWidget* property_table_ = nullptr; QProgressBar* progress_bar_ = nullptr; QLabel* status_label_ = nullptr; + QLabel* stats_label_ = nullptr; QTimer element_poll_timer_; QElapsedTimer load_timer_; diff --git a/src/ifcviewer/SettingsWindow.cpp b/src/ifcviewer/SettingsWindow.cpp index a24f9bc9763..c4ebddc650e 100644 --- a/src/ifcviewer/SettingsWindow.cpp +++ b/src/ifcviewer/SettingsWindow.cpp @@ -20,6 +20,7 @@ #include "SettingsWindow.h" #include "AppSettings.h" +#include #include #include #include @@ -40,6 +41,9 @@ void SettingsWindow::setupUi() { geometry_library_edit_->setMinimumWidth(280); form->addRow("Geometry Library", geometry_library_edit_); + show_stats_check_ = new QCheckBox(this); + form->addRow("Show Performance Stats", show_stats_check_); + auto* button_box = new QDialogButtonBox( QDialogButtonBox::Ok | QDialogButtonBox::Cancel, this); @@ -60,9 +64,11 @@ void SettingsWindow::showEvent(QShowEvent* event) { void SettingsWindow::syncFromSettings() { geometry_library_edit_->setText(AppSettings::instance().geometryLibrary()); + show_stats_check_->setChecked(AppSettings::instance().showStats()); } void SettingsWindow::onAccepted() { AppSettings::instance().setGeometryLibrary(geometry_library_edit_->text()); + AppSettings::instance().setShowStats(show_stats_check_->isChecked()); accept(); } diff --git a/src/ifcviewer/SettingsWindow.h b/src/ifcviewer/SettingsWindow.h index 77affe77578..ea55252682e 100644 --- a/src/ifcviewer/SettingsWindow.h +++ b/src/ifcviewer/SettingsWindow.h @@ -22,6 +22,7 @@ #include +class QCheckBox; class QLineEdit; class QShowEvent; @@ -41,6 +42,7 @@ private slots: void syncFromSettings(); QLineEdit* geometry_library_edit_ = nullptr; + QCheckBox* show_stats_check_ = nullptr; }; #endif diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index 414b9889fa5..1ebe988554a 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -532,6 +532,7 @@ void ViewportWindow::updateCamera() { void ViewportWindow::buildVisibleList(const QMatrix4x4& vp) { visible_counts_.clear(); visible_offsets_.clear(); + visible_triangles_ = 0; std::lock_guard lock(upload_mutex_); if (object_draw_info_.empty()) return; @@ -582,6 +583,7 @@ void ViewportWindow::buildVisibleList(const QMatrix4x4& vp) { visible_counts_.push_back(static_cast(obj.index_count)); visible_offsets_.push_back(reinterpret_cast( static_cast(obj.index_offset))); + visible_triangles_ += obj.index_count / 3; } } } @@ -617,6 +619,25 @@ void ViewportWindow::render() { renderAxisGizmo(); context_->swapBuffers(this); + + // Compute FPS (updated once per second to avoid flicker). + float dt = frame_clock_.restart() / 1000.0f; + accumulated_time_ += dt; + frame_count_++; + if (accumulated_time_ >= 1.0f) { + last_fps_ = static_cast(frame_count_) / accumulated_time_; + frame_count_ = 0; + accumulated_time_ = 0.0f; + + FrameStats stats; + stats.fps = last_fps_; + stats.frame_time_ms = 1000.0f / last_fps_; + stats.total_objects = static_cast(object_draw_info_.size()); + stats.visible_objects = static_cast(visible_counts_.size()); + stats.total_triangles = total_triangles_; + stats.visible_triangles = visible_triangles_; + emit frameStatsUpdated(stats); + } } void ViewportWindow::renderAxisGizmo() { diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index 363158b16f7..58a63343212 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -65,9 +65,19 @@ class ViewportWindow : public QWindow { void setSelectedObjectId(uint32_t id); uint32_t pickObjectAt(int x, int y); + struct FrameStats { + float fps; + float frame_time_ms; + uint32_t total_objects; + uint32_t visible_objects; + uint32_t total_triangles; + uint32_t visible_triangles; + }; + signals: void objectPicked(uint32_t object_id); void initialized(); + void frameStatsUpdated(const ViewportWindow::FrameStats& stats); protected: void exposeEvent(QExposeEvent* event) override; @@ -152,6 +162,10 @@ class ViewportWindow : public QWindow { // Stats uint32_t total_triangles_ = 0; + uint32_t visible_triangles_ = 0; + int frame_count_ = 0; + float accumulated_time_ = 0.0f; + float last_fps_ = 0.0f; }; #endif // VIEWPORTWINDOW_H From 2ffd2da540f22828dd7ccde07c8b08ada4ac0ffe Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sat, 11 Apr 2026 20:49:39 +1000 Subject: [PATCH 09/37] Multi-model project support with sequential loading Introduce ModelHandle and per-model GeometryStreamers so multiple IFC files can be loaded simultaneously. Object IDs are globally unique (monotonically increasing across models). File picker is now multiselect. Each model gets a top-level tree node. Property lookup uses the correct model's ifcopenshell::file. ViewportWindow supports hide/show/remove per model via model_id filtering in the frustum cull pass. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/GeometryStreamer.cpp | 7 +- src/ifcviewer/GeometryStreamer.h | 7 +- src/ifcviewer/MainWindow.cpp | 134 +++++++++++++++++++++-------- src/ifcviewer/MainWindow.h | 31 ++++++- src/ifcviewer/ViewportWindow.cpp | 22 +++++ src/ifcviewer/ViewportWindow.h | 9 ++ src/ifcviewer/main.cpp | 4 +- 7 files changed, 167 insertions(+), 47 deletions(-) diff --git a/src/ifcviewer/GeometryStreamer.cpp b/src/ifcviewer/GeometryStreamer.cpp index 437209c9098..7235bced9f8 100644 --- a/src/ifcviewer/GeometryStreamer.cpp +++ b/src/ifcviewer/GeometryStreamer.cpp @@ -40,7 +40,7 @@ GeometryStreamer::~GeometryStreamer() { } } -void GeometryStreamer::loadFile(const std::string& path, int num_threads) { +void GeometryStreamer::loadFile(const std::string& path, uint32_t start_object_id, uint32_t model_id, int num_threads) { if (running_.load()) { cancel(); if (worker_thread_ && worker_thread_->isRunning()) { @@ -52,7 +52,8 @@ void GeometryStreamer::loadFile(const std::string& path, int num_threads) { cancel_requested_ = false; running_ = true; progress_ = 0; - next_object_id_ = 1; + next_object_id_ = start_object_id; + model_id_ = model_id; { std::lock_guard lock(elements_mutex_); @@ -139,6 +140,7 @@ void GeometryStreamer::run(const std::string& path, int num_threads) { // Record element metadata ElementInfo info; info.object_id = object_id; + info.model_id = model_id_; info.ifc_id = tri_elem->id(); info.guid = tri_elem->guid(); info.name = tri_elem->name(); @@ -201,6 +203,7 @@ static inline uint32_t packRGBA8(const MaterialInfo& m) { UploadChunk GeometryStreamer::convertElement(const IfcGeom::TriangulationElement* elem, uint32_t object_id) { UploadChunk chunk; chunk.object_id = object_id; + chunk.model_id = model_id_; const auto& geom = elem->geometry(); const auto& verts = geom.verts(); diff --git a/src/ifcviewer/GeometryStreamer.h b/src/ifcviewer/GeometryStreamer.h index abd087463cc..0d49a12ca70 100644 --- a/src/ifcviewer/GeometryStreamer.h +++ b/src/ifcviewer/GeometryStreamer.h @@ -38,6 +38,7 @@ struct ElementInfo { uint32_t object_id; + uint32_t model_id; int ifc_id; std::string guid; std::string name; @@ -51,11 +52,13 @@ class GeometryStreamer : public QObject { explicit GeometryStreamer(QObject* parent = nullptr); ~GeometryStreamer(); - void loadFile(const std::string& path, int num_threads = 0); + void loadFile(const std::string& path, uint32_t start_object_id, uint32_t model_id, int num_threads = 0); void cancel(); bool isRunning() const { return running_.load(); } int progress() const { return progress_.load(); } + uint32_t lastObjectId() const { return next_object_id_; } + uint32_t modelId() const { return model_id_; } ifcopenshell::file* ifcFile() const { return ifc_file_.get(); } @@ -82,8 +85,8 @@ class GeometryStreamer : public QObject { std::mutex elements_mutex_; std::vector pending_elements_; - // Map from IFC product id to our compact object_id uint32_t next_object_id_ = 1; // 0 = no object + uint32_t model_id_ = 0; }; #endif // GEOMETRYSTREAMER_H diff --git a/src/ifcviewer/MainWindow.cpp b/src/ifcviewer/MainWindow.cpp index 4abd929b0b8..3b4e58fbacc 100644 --- a/src/ifcviewer/MainWindow.cpp +++ b/src/ifcviewer/MainWindow.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -36,14 +37,6 @@ MainWindow::MainWindow(QWidget* parent) setupUi(); setupMenus(); - streamer_ = new GeometryStreamer(this); - connect(streamer_, &GeometryStreamer::progressChanged, this, &MainWindow::onProgressChanged, Qt::QueuedConnection); - connect(streamer_, &GeometryStreamer::elementReady, this, &MainWindow::onElementReady, Qt::QueuedConnection); - connect(streamer_, &GeometryStreamer::finished, this, &MainWindow::onStreamingFinished, Qt::QueuedConnection); - connect(streamer_, &GeometryStreamer::errorOccurred, this, [this](const QString& msg) { - QMessageBox::warning(this, "Error", msg); - }, Qt::QueuedConnection); - connect(viewport_, &ViewportWindow::frameStatsUpdated, this, [this](const ViewportWindow::FrameStats& s) { if (!stats_label_->isVisible()) return; stats_label_->setText( @@ -118,7 +111,7 @@ void MainWindow::setupUi() { void MainWindow::setupMenus() { auto* file_menu = menuBar()->addMenu("&File"); - auto* open_action = file_menu->addAction("&Open...", this, &MainWindow::onFileOpen); + auto* open_action = file_menu->addAction("&Add Files...", this, &MainWindow::onFileOpen); open_action->setShortcut(QKeySequence::Open); file_menu->addAction("&Settings...", this, &MainWindow::onFileSettings); file_menu->addSeparator(); @@ -126,9 +119,11 @@ void MainWindow::setupMenus() { } void MainWindow::onFileOpen() { - QString path = QFileDialog::getOpenFileName(this, "Open IFC File", QString(), "IFC Files (*.ifc *.ifcxml *.ifczip);;All Files (*)"); - if (!path.isEmpty()) { - openFile(path); + QStringList paths = QFileDialog::getOpenFileNames( + this, "Add IFC Files", QString(), + "IFC Files (*.ifc *.ifcxml *.ifczip);;All Files (*)"); + if (!paths.isEmpty()) { + addFiles(paths); } } @@ -141,21 +136,64 @@ void MainWindow::onFileSettings() { settings_->raise(); } -void MainWindow::openFile(const QString& path) { - viewport_->resetScene(); - element_tree_->clear(); - property_table_->setRowCount(0); - element_map_.clear(); - tree_items_.clear(); - ifc_id_to_object_id_.clear(); +void MainWindow::addFiles(const QStringList& paths) { + for (const auto& path : paths) { + ModelId id = next_model_id_++; + + ModelHandle handle; + handle.id = id; + handle.file_path = path; + handle.display_name = QFileInfo(path).fileName(); + handle.streamer = new GeometryStreamer(this); + + // Create top-level tree item for this model + auto* root = new QTreeWidgetItem(element_tree_); + root->setText(0, handle.display_name); + root->setText(1, "IFC Model"); + root->setData(0, Qt::UserRole, static_cast(0)); // 0 = not a pickable object + handle.tree_root = root; + + models_[id] = handle; + load_queue_.push_back(id); + } + + if (loading_model_id_ == 0) { + startNextLoad(); + } +} + +void MainWindow::connectStreamer(GeometryStreamer* streamer) { + connect(streamer, &GeometryStreamer::progressChanged, + this, &MainWindow::onProgressChanged, Qt::QueuedConnection); + connect(streamer, &GeometryStreamer::elementReady, + this, &MainWindow::onElementReady, Qt::QueuedConnection); + connect(streamer, &GeometryStreamer::finished, + this, &MainWindow::onStreamingFinished, Qt::QueuedConnection); + connect(streamer, &GeometryStreamer::errorOccurred, this, [this](const QString& msg) { + QMessageBox::warning(this, "Error", msg); + }, Qt::QueuedConnection); +} + +void MainWindow::startNextLoad() { + if (load_queue_.empty()) { + loading_model_id_ = 0; + return; + } + + loading_model_id_ = load_queue_.front(); + load_queue_.pop_front(); + + auto& model = models_[loading_model_id_]; + connectStreamer(model.streamer); progress_bar_->setValue(0); progress_bar_->setVisible(true); - status_label_->setText("Loading: " + path); + status_label_->setText("Loading: " + model.display_name); load_timer_.restart(); element_poll_timer_.start(); - streamer_->loadFile(path.toStdString()); + model.streamer->loadFile( + model.file_path.toStdString(), next_object_id_, loading_model_id_); } void MainWindow::onProgressChanged(int percent) { @@ -170,15 +208,30 @@ void MainWindow::onStreamingFinished() { element_poll_timer_.stop(); pollNewElements(); // drain remaining + // Update next_object_id_ from the streamer that just finished. + if (loading_model_id_ != 0) { + auto it = models_.find(loading_model_id_); + if (it != models_.end()) { + next_object_id_ = it->second.streamer->lastObjectId(); + } + } + progress_bar_->setVisible(false); qint64 ms = load_timer_.elapsed(); QString elapsed = (ms >= 1000) ? QString::number(ms / 1000.0, 'f', 2) + " s" : QString::number(ms) + " ms"; - status_label_->setText(QString("Loaded %1 elements in %2") - .arg(element_map_.size()) + + size_t total_elements = element_map_.size(); + size_t num_models = models_.size(); + status_label_->setText(QString("%1 elements across %2 model(s) — last loaded in %3") + .arg(total_elements) + .arg(num_models) .arg(elapsed)); + + // Start next model if queued. + startNextLoad(); } void MainWindow::onObjectPicked(uint32_t object_id) { @@ -205,15 +258,23 @@ void MainWindow::onTreeSelectionChanged() { } void MainWindow::pollNewElements() { - auto elements = streamer_->drainElements(); + if (loading_model_id_ == 0) return; + + auto it = models_.find(loading_model_id_); + if (it == models_.end()) return; + + auto& model = it->second; + auto elements = model.streamer->drainElements(); + for (auto& info : elements) { element_map_[info.object_id] = info; - ifc_id_to_object_id_[info.ifc_id] = info.object_id; + scoped_ifc_id_to_object_id_[scopedKey(info.model_id, info.ifc_id)] = info.object_id; - // Find parent tree item - QTreeWidgetItem* parent_item = nullptr; - auto parent_obj_it = ifc_id_to_object_id_.find(info.parent_id); - if (parent_obj_it != ifc_id_to_object_id_.end()) { + // Find parent tree item (scoped to this model) + QTreeWidgetItem* parent_item = model.tree_root; + auto parent_obj_it = scoped_ifc_id_to_object_id_.find( + scopedKey(info.model_id, info.parent_id)); + if (parent_obj_it != scoped_ifc_id_to_object_id_.end()) { auto tree_it = tree_items_.find(parent_obj_it->second); if (tree_it != tree_items_.end()) { parent_item = tree_it->second; @@ -225,12 +286,7 @@ void MainWindow::pollNewElements() { display_name = QString::fromStdString(info.type) + " #" + QString::number(info.ifc_id); } - QTreeWidgetItem* item; - if (parent_item) { - item = new QTreeWidgetItem(parent_item); - } else { - item = new QTreeWidgetItem(element_tree_); - } + auto* item = new QTreeWidgetItem(parent_item); item->setText(0, display_name); item->setText(1, QString::fromStdString(info.type)); item->setText(2, QString::fromStdString(info.guid)); @@ -261,8 +317,11 @@ void MainWindow::populateProperties(uint32_t object_id) { addRow("Name", QString::fromStdString(info.name)); addRow("Type", QString::fromStdString(info.type)); - // If the file is loaded, try to get property sets - auto* file = streamer_->ifcFile(); + // Find the correct model's file for property lookup + auto model_it = models_.find(info.model_id); + if (model_it == models_.end()) return; + + auto* file = model_it->second.streamer->ifcFile(); if (!file) return; auto product = file->instance_by_id(info.ifc_id); @@ -280,7 +339,6 @@ void MainWindow::populateProperties(uint32_t object_id) { try { str_val = static_cast(val); } catch (...) { - // Not a string-convertible attribute (entity ref, aggregate, etc.) str_val = "<" + std::string(ifcopenshell::argument_type_to_string(val.type())) + ">"; } addRow(QString::fromStdString(attr->name()), QString::fromStdString(str_val)); diff --git a/src/ifcviewer/MainWindow.h b/src/ifcviewer/MainWindow.h index bbec6ce83de..e9bcc37cb6b 100644 --- a/src/ifcviewer/MainWindow.h +++ b/src/ifcviewer/MainWindow.h @@ -29,6 +29,8 @@ #include #include +#include +#include #include #include "ViewportWindow.h" @@ -36,13 +38,24 @@ class SettingsWindow; +using ModelId = uint32_t; + +struct ModelHandle { + ModelId id = 0; + QString file_path; + QString display_name; + GeometryStreamer* streamer = nullptr; + QTreeWidgetItem* tree_root = nullptr; + bool visible = true; +}; + class MainWindow : public QMainWindow { Q_OBJECT public: explicit MainWindow(QWidget* parent = nullptr); ~MainWindow(); - void openFile(const QString& path); + void addFiles(const QStringList& paths); private slots: void onFileOpen(); @@ -58,6 +71,8 @@ private slots: void setupUi(); void setupMenus(); void populateProperties(uint32_t object_id); + void startNextLoad(); + void connectStreamer(GeometryStreamer* streamer); ViewportWindow* viewport_ = nullptr; SettingsWindow* settings_ = nullptr; @@ -70,12 +85,22 @@ private slots: QTimer element_poll_timer_; QElapsedTimer load_timer_; - GeometryStreamer* streamer_ = nullptr; + // Multi-model state + std::map models_; + ModelId next_model_id_ = 1; + uint32_t next_object_id_ = 1; // monotonically increasing across all models + std::deque load_queue_; + ModelId loading_model_id_ = 0; // Map object_id -> tree item and element info std::unordered_map element_map_; std::unordered_map tree_items_; - std::unordered_map ifc_id_to_object_id_; + // Scoped (model_id, ifc_id) -> object_id + std::unordered_map scoped_ifc_id_to_object_id_; + + static uint64_t scopedKey(uint32_t model_id, int ifc_id) { + return (static_cast(model_id) << 32) | static_cast(ifc_id); + } }; #endif // MAINWINDOW_H diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index 1ebe988554a..4217c997423 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -427,6 +427,7 @@ void ViewportWindow::uploadChunk(const UploadChunk& chunk) { ObjectDrawInfo info; info.index_offset = static_cast(ebo_used_); info.index_count = static_cast(chunk.indices.size()); + info.model_id = chunk.model_id; const size_t num_verts = chunk.vertices.size() / VERTEX_STRIDE; if (num_verts > 0) { @@ -467,6 +468,23 @@ void ViewportWindow::resetScene() { total_triangles_ = 0; selected_object_id_ = 0; object_draw_info_.clear(); + hidden_models_.clear(); + removed_models_.clear(); +} + +void ViewportWindow::hideModel(uint32_t model_id) { + std::lock_guard lock(upload_mutex_); + hidden_models_.insert(model_id); +} + +void ViewportWindow::showModel(uint32_t model_id) { + std::lock_guard lock(upload_mutex_); + hidden_models_.erase(model_id); +} + +void ViewportWindow::removeModel(uint32_t model_id) { + std::lock_guard lock(upload_mutex_); + removed_models_.insert(model_id); } void ViewportWindow::setSelectedObjectId(uint32_t id) { @@ -567,6 +585,10 @@ void ViewportWindow::buildVisibleList(const QMatrix4x4& vp) { visible_offsets_.reserve(object_draw_info_.size()); for (const auto& obj : object_draw_info_) { + // Skip hidden or removed models. + if (hidden_models_.count(obj.model_id) || removed_models_.count(obj.model_id)) + continue; + bool visible = true; for (int p = 0; p < 6; ++p) { // p-vertex: the AABB corner most in the direction of the plane normal. diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index 58a63343212..fda82a1db5e 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -29,6 +29,7 @@ #include #include +#include #include #include @@ -39,6 +40,7 @@ struct MaterialInfo { struct ObjectDrawInfo { uint32_t index_offset; // byte offset into EBO uint32_t index_count; // number of indices + uint32_t model_id; // which model this object belongs to float aabb_min[3]; // world-space AABB float aabb_max[3]; }; @@ -51,6 +53,7 @@ struct UploadChunk { std::vector vertices; std::vector indices; // local to this chunk's vertices uint32_t object_id = 0; + uint32_t model_id = 0; }; class ViewportWindow : public QWindow { @@ -62,6 +65,10 @@ class ViewportWindow : public QWindow { void uploadChunk(const UploadChunk& chunk); void resetScene(); + void hideModel(uint32_t model_id); + void showModel(uint32_t model_id); + void removeModel(uint32_t model_id); + void setSelectedObjectId(uint32_t id); uint32_t pickObjectAt(int x, int y); @@ -136,6 +143,8 @@ class ViewportWindow : public QWindow { // Per-object draw metadata for frustum culling. std::vector object_draw_info_; + std::unordered_set hidden_models_; + std::unordered_set removed_models_; uint32_t total_index_count_ = 0; std::mutex upload_mutex_; diff --git a/src/ifcviewer/main.cpp b/src/ifcviewer/main.cpp index 3bca693a371..a5bb487db80 100644 --- a/src/ifcviewer/main.cpp +++ b/src/ifcviewer/main.cpp @@ -40,7 +40,7 @@ int main(int argc, char* argv[]) { QCommandLineParser parser; parser.setApplicationDescription("IfcOpenShell IFC Viewer"); parser.addHelpOption(); - parser.addPositionalArgument("file", "IFC file to open"); + parser.addPositionalArgument("files", "IFC file(s) to open", "[files...]"); parser.process(app); MainWindow window; @@ -48,7 +48,7 @@ int main(int argc, char* argv[]) { auto args = parser.positionalArguments(); if (!args.isEmpty()) { - window.openFile(args.first()); + window.addFiles(args); } return app.exec(); From 49b70e8276d0244d619452f4fce58cabb3c24c92 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sat, 11 Apr 2026 20:55:24 +1000 Subject: [PATCH 10/37] Update README for multi-model support and frustum culling Reflect current architecture: per-model streamers, glMultiDrawElements with frustum culling, 32-byte vertex format with color, multiselect file picker, settings/stats files. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/README.md | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/ifcviewer/README.md b/src/ifcviewer/README.md index 9c6c52560ce..d2fb084a416 100644 --- a/src/ifcviewer/README.md +++ b/src/ifcviewer/README.md @@ -10,21 +10,22 @@ A high-performance native IFC viewer built on IfcOpenShell's C++ geometry engine | +----------+ +--------------------------+| | | Element | | 3D Viewport || | | Tree | | (QWindow + OpenGL 4.5) || -| | | | || -| +----------+ | Single VBO/EBO || -| | Property | | DrawElementsBaseVertex || +| | (per- | | || +| | model) | | Single VBO/EBO || +| +----------+ | glMultiDrawElements || +| | Property | | frustum culling || | | Table | | GPU pick pass || | +----------+ +--------------------------+| -| | Status / Progress | +| | Status / Progress / Stats | +-------------------------------------------+ ^ ^ | | element metadata UploadChunks | | +-------------------------------------------+ -| GeometryStreamer (background QThread) | +| GeometryStreamer (one per loaded model) | | IfcGeom::Iterator with N threads | -| (one per CPU core by default) | +| (models loaded sequentially) | +-------------------------------------------+ ``` @@ -32,8 +33,10 @@ A high-performance native IFC viewer built on IfcOpenShell's C++ geometry engine - **QWindow viewport** embedded via `QWidget::createWindowContainer()`. This gives us a raw native surface for OpenGL, bypassing `QOpenGLWidget`'s compositor overhead. - **One big vertex buffer + index buffer** (64 MB + 32 MB initial). Geometry is appended as it streams in. No per-object VBOs, no rebinding. -- **Interleaved vertex format**: position (3 floats) + normal (3 floats) + object ID (1 float, bitcast uint32) = 28 bytes per vertex. +- **Interleaved vertex format**: position (3 floats) + normal (3 floats) + object ID (1 float, bitcast uint32) + color (RGBA8 packed into 1 float) = 32 bytes per vertex. +- **Per-object frustum culling**: each object's AABB is tested against 6 frustum planes each frame. Only visible objects are drawn via `glMultiDrawElements`. - **GPU object picking**: a second render pass writes object IDs to an R32UI framebuffer. Click reads back one pixel. No CPU-side raycasting. +- **Multi-model support**: multiple IFC files can be loaded simultaneously. Each model gets its own `GeometryStreamer` (owning the `ifcopenshell::file` for property lookup). Models are loaded sequentially; geometry from all models coexists in the shared VBO/EBO. Per-model visibility toggle and removal are supported. - **Multi-threaded tessellation**: `IfcGeom::Iterator` runs on a background thread and internally parallelizes geometry conversion across all CPU cores. - **Non-blocking streaming**: the iterator emits `UploadChunk` signals via Qt's queued connection. The main thread uploads to the GPU without blocking iteration. - **World coordinates**: geometry is emitted in world space (`use-world-coords=true`) so no per-object transform matrices are needed on the GPU. @@ -43,9 +46,11 @@ A high-performance native IFC viewer built on IfcOpenShell's C++ geometry engine | File | Purpose | |------|---------| | `main.cpp` | Application entry point, GL 4.5 surface format, CLI argument parsing | -| `MainWindow.h/cpp` | Qt main window: dockable element tree, property table, status bar, menus | -| `ViewportWindow.h/cpp` | OpenGL 4.5 Core renderer: shaders, buffer management, camera, picking | -| `GeometryStreamer.h/cpp` | Background geometry processing: loads IFC, runs iterator, emits chunks | +| `MainWindow.h/cpp` | Qt main window: multi-model project management, element tree, property table, status bar | +| `ViewportWindow.h/cpp` | OpenGL 4.5 Core renderer: shaders, buffer management, camera, frustum culling, picking | +| `GeometryStreamer.h/cpp` | Background geometry processing: loads IFC, runs iterator, emits chunks (one per model) | +| `AppSettings.h/cpp` | Persisted application preferences (geometry library, show stats) | +| `SettingsWindow.h/cpp` | Settings dialog UI | | `CMakeLists.txt` | Build configuration | ## Dependencies @@ -94,10 +99,10 @@ make -j$(nproc) ## Usage ```sh -# Open a file directly -./IfcViewer model.ifc +# Open one or more files from the command line +./IfcViewer arch.ifc struct.ifc mep.ifc -# Or use File -> Open from the menu +# Or use File -> Add Files from the menu (supports multiselect) ./IfcViewer ``` @@ -114,7 +119,7 @@ make -j$(nproc) | Key | Action | |-----|--------| -| Ctrl+O | Open file | +| Ctrl+O | Add files | | Ctrl+Q | Quit | ## Performance Strategy From 4d5256b427593dc232aaa8ad962fa836d46ce524 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sun, 12 Apr 2026 09:09:32 +1000 Subject: [PATCH 11/37] BVH frustum culling, sidecar cache, per-model buffers, progressive upload Phase 2 performance: BVH acceleration with median-split build, per-model trees, and EBO re-sorting for GPU cache coherence. Raw binary .ifcview sidecar stores full geometry + BVH for instant subsequent loads (skip tessellation entirely). Per-model GPU buffers (VAO/VBO/EBO per model) eliminate cross-model buffer copies on growth. Sidecar reads happen on a background thread. Bulk GPU uploads are progressive (48 MB/frame chunks) so the viewport stays interactive while multi-GB models stream in. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/BvhAccel.cpp | 226 +++++++++++ src/ifcviewer/BvhAccel.h | 75 ++++ src/ifcviewer/MainWindow.cpp | 178 ++++++++- src/ifcviewer/MainWindow.h | 7 + src/ifcviewer/README.md | 349 +++++++++++------ src/ifcviewer/SidecarCache.cpp | 196 ++++++++++ src/ifcviewer/SidecarCache.h | 76 ++++ src/ifcviewer/ViewportWindow.cpp | 631 +++++++++++++++++++++++-------- src/ifcviewer/ViewportWindow.h | 116 ++++-- 9 files changed, 1534 insertions(+), 320 deletions(-) create mode 100644 src/ifcviewer/BvhAccel.cpp create mode 100644 src/ifcviewer/BvhAccel.h create mode 100644 src/ifcviewer/SidecarCache.cpp create mode 100644 src/ifcviewer/SidecarCache.h diff --git a/src/ifcviewer/BvhAccel.cpp b/src/ifcviewer/BvhAccel.cpp new file mode 100644 index 00000000000..e0b232a283c --- /dev/null +++ b/src/ifcviewer/BvhAccel.cpp @@ -0,0 +1,226 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#include "BvhAccel.h" + +#include +#include +#include +#include +#include + +namespace { + +struct Centroid { + float x, y, z; +}; + +Centroid computeCentroid(const ObjectDrawInfo& obj) { + return { + (obj.aabb_min[0] + obj.aabb_max[0]) * 0.5f, + (obj.aabb_min[1] + obj.aabb_max[1]) * 0.5f, + (obj.aabb_min[2] + obj.aabb_max[2]) * 0.5f + }; +} + +void computeAABB(const std::vector& draw_info, + const uint32_t* indices, uint32_t count, + float out_min[3], float out_max[3]) { + out_min[0] = out_min[1] = out_min[2] = std::numeric_limits::max(); + out_max[0] = out_max[1] = out_max[2] = -std::numeric_limits::max(); + for (uint32_t i = 0; i < count; ++i) { + const auto& obj = draw_info[indices[i]]; + for (int a = 0; a < 3; ++a) { + if (obj.aabb_min[a] < out_min[a]) out_min[a] = obj.aabb_min[a]; + if (obj.aabb_max[a] > out_max[a]) out_max[a] = obj.aabb_max[a]; + } + } +} + +// Recursive BVH builder. Writes nodes in pre-order DFS into mbvh.nodes. +// object_indices[start..start+count) are the indices to partition. +void buildRecursive(ModelBvh& mbvh, + const std::vector& draw_info, + uint32_t start, uint32_t count) { + uint32_t node_idx = static_cast(mbvh.nodes.size()); + mbvh.nodes.emplace_back(); + BvhNode& node = mbvh.nodes[node_idx]; + + computeAABB(draw_info, &mbvh.object_indices[start], count, + node.aabb_min, node.aabb_max); + + if (count <= BVH_MAX_LEAF_SIZE) { + node.right_or_first = start; + node.count = static_cast(count); + node.axis = 0; + return; + } + + // Find longest axis of node AABB. + float extent[3] = { + node.aabb_max[0] - node.aabb_min[0], + node.aabb_max[1] - node.aabb_min[1], + node.aabb_max[2] - node.aabb_min[2] + }; + int axis = 0; + if (extent[1] > extent[axis]) axis = 1; + if (extent[2] > extent[axis]) axis = 2; + + // Partition at median centroid on the chosen axis. + uint32_t mid = count / 2; + std::nth_element( + mbvh.object_indices.begin() + start, + mbvh.object_indices.begin() + start + mid, + mbvh.object_indices.begin() + start + count, + [&](uint32_t a, uint32_t b) { + Centroid ca = computeCentroid(draw_info[a]); + Centroid cb = computeCentroid(draw_info[b]); + return (&ca.x)[axis] < (&cb.x)[axis]; + }); + + node.count = 0; // interior + node.axis = static_cast(axis); + + // Left child is always node_idx + 1 (implicit in pre-order DFS). + // Build left subtree first. Note: &node is invalidated after this call + // because the vector may reallocate. + buildRecursive(mbvh, draw_info, start, mid); + + // Right child is the next node written after the entire left subtree. + uint32_t right_child_idx = static_cast(mbvh.nodes.size()); + buildRecursive(mbvh, draw_info, start + mid, count - mid); + + // Patch the right child index (left is implicit = node_idx + 1). + mbvh.nodes[node_idx].right_or_first = right_child_idx; +} + +} // anonymous namespace + +ModelBvh buildModelBvh(const std::vector& draw_info, + const std::vector& model_object_indices, + uint32_t model_id) { + ModelBvh mbvh; + mbvh.model_id = model_id; + mbvh.object_indices = model_object_indices; + + uint32_t count = static_cast(model_object_indices.size()); + if (count == 0) return mbvh; + + // Reserve a rough estimate: ~2*n nodes for a balanced binary tree. + mbvh.nodes.reserve(count * 2); + + buildRecursive(mbvh, draw_info, 0, count); + + // Verify: every object appears exactly once in the leaves. + assert(!mbvh.nodes.empty()); + + return mbvh; +} + +std::shared_ptr buildBvhSet(const std::vector& draw_info) { + auto bvh_set = std::make_shared(); + + // Group object indices by model_id. + std::unordered_map> model_objects; + for (uint32_t i = 0; i < static_cast(draw_info.size()); ++i) { + model_objects[draw_info[i].model_id].push_back(i); + } + + // Build per-model BVHs. + for (auto& [model_id, obj_indices] : model_objects) { + if (obj_indices.size() < BVH_MIN_OBJECTS) continue; + + ModelBvh mbvh = buildModelBvh(draw_info, obj_indices, model_id); + bvh_set->bvh_model_ids.insert(model_id); + bvh_set->models[model_id] = std::move(mbvh); + } + + return bvh_set; +} + +EboReorderResult reorderEbo(const BvhSet& bvh_set, + const std::vector& draw_info, + const std::vector& original_ebo) { + EboReorderResult result; + result.reordered_draw_info = draw_info; // copy; we'll update offsets + result.reordered_ebo.reserve(original_ebo.size()); + + // Track which draw_info entries have been placed. + std::vector placed(draw_info.size(), false); + + for (const auto& [model_id, mbvh] : bvh_set.models) { + // DFS traversal of BVH to visit leaves in order. + uint32_t stack[64]; + int sp = 0; + stack[sp++] = 0; + + while (sp > 0) { + uint32_t ni = stack[--sp]; + const BvhNode& node = mbvh.nodes[ni]; + + if (node.count > 0) { + // Leaf: emit objects in order. + for (uint32_t i = 0; i < node.count; ++i) { + uint32_t oi = mbvh.object_indices[node.right_or_first + i]; + if (placed[oi]) continue; + placed[oi] = true; + + const auto& old_info = draw_info[oi]; + uint32_t new_offset = static_cast( + result.reordered_ebo.size() * sizeof(uint32_t)); + + // Copy indices from original EBO. + uint32_t idx_start = old_info.index_offset / sizeof(uint32_t); + uint32_t idx_count = old_info.index_count; + for (uint32_t j = 0; j < idx_count; ++j) { + result.reordered_ebo.push_back(original_ebo[idx_start + j]); + } + + result.reordered_draw_info[oi].index_offset = new_offset; + } + } else { + // Interior: push left (=ni+1) last so it's processed first. + stack[sp++] = node.right_or_first; // right child + stack[sp++] = ni + 1; // left child + } + } + } + + // Append non-BVH objects (models too small for BVH). + for (uint32_t oi = 0; oi < static_cast(draw_info.size()); ++oi) { + if (placed[oi]) continue; + placed[oi] = true; + + const auto& old_info = draw_info[oi]; + uint32_t new_offset = static_cast( + result.reordered_ebo.size() * sizeof(uint32_t)); + + uint32_t idx_start = old_info.index_offset / sizeof(uint32_t); + uint32_t idx_count = old_info.index_count; + for (uint32_t j = 0; j < idx_count; ++j) { + result.reordered_ebo.push_back(original_ebo[idx_start + j]); + } + + result.reordered_draw_info[oi].index_offset = new_offset; + } + + assert(result.reordered_ebo.size() == original_ebo.size()); + + return result; +} diff --git a/src/ifcviewer/BvhAccel.h b/src/ifcviewer/BvhAccel.h new file mode 100644 index 00000000000..21c57c2712a --- /dev/null +++ b/src/ifcviewer/BvhAccel.h @@ -0,0 +1,75 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#ifndef BVHACCEL_H +#define BVHACCEL_H + +#include +#include +#include +#include +#include + +struct ObjectDrawInfo { + uint32_t index_offset; // byte offset into EBO + uint32_t index_count; // number of indices + uint32_t model_id; // which model this object belongs to + float aabb_min[3]; // world-space AABB + float aabb_max[3]; +}; + +static constexpr uint32_t BVH_MAX_LEAF_SIZE = 8; +static constexpr uint32_t BVH_MIN_OBJECTS = 32; + +struct BvhNode { + float aabb_min[3]; + float aabb_max[3]; + uint32_t right_or_first; // interior: right child index (left is always this_index+1); leaf: first object index + uint16_t count; // 0 = interior; >0 = leaf with this many objects + uint16_t axis; // split axis (0/1/2) for interior; unused for leaf +}; +static_assert(sizeof(BvhNode) == 32, "BvhNode must be 32 bytes for cache alignment and sidecar format"); + +struct ModelBvh { + uint32_t model_id = 0; + std::vector nodes; + std::vector object_indices; // indices into object_draw_info_ +}; + +struct BvhSet { + std::unordered_map models; + std::unordered_set bvh_model_ids; +}; + +struct EboReorderResult { + std::vector reordered_ebo; + std::vector reordered_draw_info; +}; + +// Build BVH trees for all models in the given draw info snapshot. +// Only builds the tree structure; does not touch EBO data. +std::shared_ptr buildBvhSet(const std::vector& draw_info); + +// Reorder the EBO so objects within each BVH leaf are contiguous. +// Must be called with the CURRENT run's EBO and draw_info (not cached). +EboReorderResult reorderEbo(const BvhSet& bvh_set, + const std::vector& draw_info, + const std::vector& original_ebo); + +#endif // BVHACCEL_H diff --git a/src/ifcviewer/MainWindow.cpp b/src/ifcviewer/MainWindow.cpp index 3b4e58fbacc..b5ee3581c44 100644 --- a/src/ifcviewer/MainWindow.cpp +++ b/src/ifcviewer/MainWindow.cpp @@ -20,6 +20,7 @@ #include "MainWindow.h" #include "AppSettings.h" #include "SettingsWindow.h" +#include "SidecarCache.h" #include #include @@ -61,7 +62,14 @@ MainWindow::MainWindow(QWidget* parent) resize(1400, 900); } -MainWindow::~MainWindow() {} +MainWindow::~MainWindow() { + joinSidecarThread(); +} + +void MainWindow::joinSidecarThread() { + if (sidecar_read_thread_.joinable()) + sidecar_read_thread_.join(); +} void MainWindow::setupUi() { // 3D Viewport as central widget @@ -158,7 +166,7 @@ void MainWindow::addFiles(const QStringList& paths) { } if (loading_model_id_ == 0) { - startNextLoad(); + QTimer::singleShot(0, this, &MainWindow::startNextLoad); } } @@ -184,16 +192,133 @@ void MainWindow::startNextLoad() { load_queue_.pop_front(); auto& model = models_[loading_model_id_]; - connectStreamer(model.streamer); - progress_bar_->setValue(0); - progress_bar_->setVisible(true); + load_timer_.restart(); status_label_->setText("Loading: " + model.display_name); - load_timer_.restart(); - element_poll_timer_.start(); - model.streamer->loadFile( - model.file_path.toStdString(), next_object_id_, loading_model_id_); + // Try sidecar on a background thread so the UI stays responsive. + std::string ifc_path = model.file_path.toStdString(); + uint64_t file_size = static_cast(QFileInfo(model.file_path).size()); + ModelId mid = loading_model_id_; + + joinSidecarThread(); + sidecar_read_thread_ = std::thread([this, ifc_path, file_size, mid]() { + QElapsedTimer rt; rt.start(); + auto cached = readSidecar(ifc_path, file_size); + qDebug(" Sidecar read: %lld ms (%s)", rt.elapsed(), ifc_path.c_str()); + auto result = std::make_shared>(std::move(cached)); + QMetaObject::invokeMethod(this, [this, mid, result]() { + if (*result && !(*result)->draw_info.empty()) { + applySidecarData(mid, std::move(**result)); + } else { + // No sidecar — fall back to streaming from IFC. + auto it = models_.find(mid); + if (it == models_.end()) return; + auto& m = it->second; + connectStreamer(m.streamer); + progress_bar_->setValue(0); + progress_bar_->setVisible(true); + status_label_->setText("Loading: " + m.display_name); + element_poll_timer_.start(); + m.streamer->loadFile( + m.file_path.toStdString(), next_object_id_, loading_model_id_); + } + }, Qt::QueuedConnection); + }); +} + +void MainWindow::applySidecarData(ModelId mid, SidecarData data) { + auto it = models_.find(mid); + if (it == models_.end()) return; + auto& model = it->second; + + QElapsedTimer t; + + qDebug("Sidecar hit: %s (%zu objects, %zu verts, %zu indices, %.1f MB)", + model.file_path.toStdString().c_str(), data.draw_info.size(), + data.vertices.size() / 8, data.indices.size(), + (data.vertices.size() * 4 + data.indices.size() * 4) / (1024.0 * 1024.0)); + + // GL upload — fast, single buffer copy. + t.start(); + viewport_->uploadBulk(mid, data.vertices, data.indices, + data.draw_info, std::move(data.bvh_set)); + qDebug(" GL upload: %lld ms", t.elapsed()); + + // Update next_object_id_ past all objects in this model. + for (const auto& elem : data.elements) { + if (elem.object_id >= next_object_id_) + next_object_id_ = elem.object_id + 1; + } + + // Suppress per-item layout recalcs while building the tree. + t.restart(); + element_tree_->setUpdatesEnabled(false); + populateTreeFromSidecar(model, data.elements, data.string_table); + element_tree_->setUpdatesEnabled(true); + qDebug(" Tree build: %lld ms (%zu elements)", t.elapsed(), data.elements.size()); + + progress_bar_->setVisible(false); + + qint64 ms = load_timer_.elapsed(); + QString elapsed = (ms >= 1000) + ? QString::number(ms / 1000.0, 'f', 2) + " s" + : QString::number(ms) + " ms"; + + status_label_->setText(QString("%1 elements across %2 model(s) — loaded from cache in %3") + .arg(element_map_.size()) + .arg(models_.size()) + .arg(elapsed)); + + loading_model_id_ = 0; + QTimer::singleShot(0, this, &MainWindow::startNextLoad); +} + +void MainWindow::populateTreeFromSidecar(ModelHandle& model, + const std::vector& elements, + const std::string& stbl) { + auto str = [&](uint32_t offset, uint32_t length) -> std::string { + if (length == 0 || offset + length > stbl.size()) return {}; + return stbl.substr(offset, length); + }; + + for (const auto& pe : elements) { + ElementInfo info; + info.object_id = pe.object_id; + info.model_id = pe.model_id; + info.ifc_id = pe.ifc_id; + info.parent_id = pe.parent_id; + info.guid = str(pe.guid_offset, pe.guid_length); + info.name = str(pe.name_offset, pe.name_length); + info.type = str(pe.type_offset, pe.type_length); + + element_map_[info.object_id] = info; + scoped_ifc_id_to_object_id_[scopedKey(info.model_id, info.ifc_id)] = info.object_id; + + // Find parent tree item. + QTreeWidgetItem* parent_item = model.tree_root; + auto parent_obj_it = scoped_ifc_id_to_object_id_.find( + scopedKey(info.model_id, info.parent_id)); + if (parent_obj_it != scoped_ifc_id_to_object_id_.end()) { + auto tree_it = tree_items_.find(parent_obj_it->second); + if (tree_it != tree_items_.end()) { + parent_item = tree_it->second; + } + } + + QString display_name = QString::fromStdString(info.name); + if (display_name.isEmpty()) { + display_name = QString::fromStdString(info.type) + " #" + QString::number(info.ifc_id); + } + + auto* item = new QTreeWidgetItem(parent_item); + item->setText(0, display_name); + item->setText(1, QString::fromStdString(info.type)); + item->setText(2, QString::fromStdString(info.guid)); + item->setData(0, Qt::UserRole, info.object_id); + + tree_items_[info.object_id] = item; + } } void MainWindow::onProgressChanged(int percent) { @@ -230,6 +355,41 @@ void MainWindow::onStreamingFinished() { .arg(num_models) .arg(elapsed)); + // Build BVH and write sidecar (geometry + metadata + BVH). + if (loading_model_id_ != 0) { + auto it = models_.find(loading_model_id_); + if (it != models_.end()) { + std::string ifc_path = it->second.file_path.toStdString(); + QFileInfo fi(it->second.file_path); + uint64_t file_size = static_cast(fi.size()); + + // Pack element info for the sidecar (only this model's elements). + std::vector packed; + std::string stbl; + for (const auto& [oid, info] : element_map_) { + if (info.model_id != loading_model_id_) continue; + PackedElementInfo pe; + pe.object_id = info.object_id; + pe.model_id = info.model_id; + pe.ifc_id = info.ifc_id; + pe.parent_id = info.parent_id; + pe.guid_offset = static_cast(stbl.size()); + pe.guid_length = static_cast(info.guid.size()); + stbl += info.guid; + pe.name_offset = static_cast(stbl.size()); + pe.name_length = static_cast(info.name.size()); + stbl += info.name; + pe.type_offset = static_cast(stbl.size()); + pe.type_length = static_cast(info.type.size()); + stbl += info.type; + packed.push_back(pe); + } + + viewport_->buildBvhAsync(loading_model_id_, ifc_path, file_size, + std::move(packed), std::move(stbl)); + } + } + // Start next model if queued. startNextLoad(); } diff --git a/src/ifcviewer/MainWindow.h b/src/ifcviewer/MainWindow.h index e9bcc37cb6b..f60da70b75d 100644 --- a/src/ifcviewer/MainWindow.h +++ b/src/ifcviewer/MainWindow.h @@ -31,6 +31,7 @@ #include #include +#include #include #include "ViewportWindow.h" @@ -72,6 +73,11 @@ private slots: void setupMenus(); void populateProperties(uint32_t object_id); void startNextLoad(); + void applySidecarData(ModelId mid, SidecarData data); + void joinSidecarThread(); + void populateTreeFromSidecar(ModelHandle& model, + const std::vector& elements, + const std::string& string_table); void connectStreamer(GeometryStreamer* streamer); ViewportWindow* viewport_ = nullptr; @@ -91,6 +97,7 @@ private slots: uint32_t next_object_id_ = 1; // monotonically increasing across all models std::deque load_queue_; ModelId loading_model_id_ = 0; + std::thread sidecar_read_thread_; // Map object_id -> tree item and element info std::unordered_map element_map_; diff --git a/src/ifcviewer/README.md b/src/ifcviewer/README.md index d2fb084a416..d0122d63c83 100644 --- a/src/ifcviewer/README.md +++ b/src/ifcviewer/README.md @@ -11,16 +11,16 @@ A high-performance native IFC viewer built on IfcOpenShell's C++ geometry engine | | Element | | 3D Viewport || | | Tree | | (QWindow + OpenGL 4.5) || | | (per- | | || -| | model) | | Single VBO/EBO || +| | model) | | Per-model VAO/VBO/EBO || | +----------+ | glMultiDrawElements || -| | Property | | frustum culling || +| | Property | | BVH frustum culling || | | Table | | GPU pick pass || | +----------+ +--------------------------+| | | Status / Progress / Stats | +-------------------------------------------+ ^ ^ | | - element metadata UploadChunks + element metadata UploadChunks / Sidecar | | +-------------------------------------------+ | GeometryStreamer (one per loaded model) | @@ -32,11 +32,13 @@ A high-performance native IFC viewer built on IfcOpenShell's C++ geometry engine ### Key design decisions - **QWindow viewport** embedded via `QWidget::createWindowContainer()`. This gives us a raw native surface for OpenGL, bypassing `QOpenGLWidget`'s compositor overhead. -- **One big vertex buffer + index buffer** (64 MB + 32 MB initial). Geometry is appended as it streams in. No per-object VBOs, no rebinding. +- **Per-model GPU buffers**: each loaded model gets its own VAO/VBO/EBO. No shared buffer, no cross-model copies on growth. Removing a model frees its GPU memory immediately. - **Interleaved vertex format**: position (3 floats) + normal (3 floats) + object ID (1 float, bitcast uint32) + color (RGBA8 packed into 1 float) = 32 bytes per vertex. -- **Per-object frustum culling**: each object's AABB is tested against 6 frustum planes each frame. Only visible objects are drawn via `glMultiDrawElements`. +- **Progressive GPU upload**: bulk sidecar loads allocate empty GPU buffers, then stream data in 48 MB chunks per frame. VBO uploads first (no objects visible), then EBO (objects appear progressively as their index range lands). The viewport stays interactive throughout — you can orbit already-loaded models while new ones stream in. +- **Non-blocking sidecar loading**: sidecar files are read on a background thread. The heavy disk I/O (potentially gigabytes) never blocks the render loop. Only the final GPU upload and tree population happen on the main thread. +- **BVH frustum culling**: per-model BVH trees cull entire subtrees of objects in one frustum test, reducing per-frame cost from O(N) to O(log N). Falls back to linear scan during progressive upload; BVH activates once the model is fully loaded. - **GPU object picking**: a second render pass writes object IDs to an R32UI framebuffer. Click reads back one pixel. No CPU-side raycasting. -- **Multi-model support**: multiple IFC files can be loaded simultaneously. Each model gets its own `GeometryStreamer` (owning the `ifcopenshell::file` for property lookup). Models are loaded sequentially; geometry from all models coexists in the shared VBO/EBO. Per-model visibility toggle and removal are supported. +- **Multi-model support**: multiple IFC files can be loaded simultaneously. Each model gets its own `GeometryStreamer` (owning the `ifcopenshell::file` for property lookup). Models are loaded sequentially. Per-model visibility toggle and removal are supported. - **Multi-threaded tessellation**: `IfcGeom::Iterator` runs on a background thread and internally parallelizes geometry conversion across all CPU cores. - **Non-blocking streaming**: the iterator emits `UploadChunk` signals via Qt's queued connection. The main thread uploads to the GPU without blocking iteration. - **World coordinates**: geometry is emitted in world space (`use-world-coords=true`) so no per-object transform matrices are needed on the GPU. @@ -47,8 +49,10 @@ A high-performance native IFC viewer built on IfcOpenShell's C++ geometry engine |------|---------| | `main.cpp` | Application entry point, GL 4.5 surface format, CLI argument parsing | | `MainWindow.h/cpp` | Qt main window: multi-model project management, element tree, property table, status bar | -| `ViewportWindow.h/cpp` | OpenGL 4.5 Core renderer: shaders, buffer management, camera, frustum culling, picking | +| `ViewportWindow.h/cpp` | OpenGL 4.5 Core renderer: shaders, buffer management, camera, frustum culling, BVH traversal, picking | | `GeometryStreamer.h/cpp` | Background geometry processing: loads IFC, runs iterator, emits chunks (one per model) | +| `BvhAccel.h/cpp` | BVH construction (median-split), per-model trees, EBO reordering | +| `SidecarCache.h/cpp` | Raw binary `.ifcview` sidecar read/write | | `AppSettings.h/cpp` | Persisted application preferences (geometry library, show stats) | | `SettingsWindow.h/cpp` | Settings dialog UI | | `CMakeLists.txt` | Build configuration | @@ -142,8 +146,9 @@ object that enters the GPU buffers: ```cpp struct ObjectDrawInfo { - uint32_t index_offset; // byte offset into the shared EBO + uint32_t index_offset; // byte offset into the model's EBO uint32_t index_count; // number of indices (triangles * 3) + uint32_t model_id; // which model this object belongs to float aabb_min[3]; // world-space axis-aligned bounding box float aabb_max[3]; // (computed from vertex positions at upload time) }; @@ -211,108 +216,207 @@ Phase 1 is sufficient for models up to ~100k objects. Beyond that, the CPU-side frustum test becomes a measurable fraction of the frame budget, motivating phase 3. -### Phase 2: Spatial Tiling (optional, for large models) +### Phase 2: BVH Acceleration (optional, for large models) -For models exceeding ~10k objects, spatial tiling groups nearby objects into -tiles and culls at the tile level rather than per-object. This reduces the -number of frustum tests from N_objects to N_tiles (typically hundreds to low -thousands). +**Status:** Implemented. -#### When tiling activates +For models exceeding ~100 objects, a bounding volume hierarchy (BVH) groups +nearby objects into a binary tree and culls entire subtrees in one frustum +test. This reduces the number of AABB-frustum tests from O(N_objects) to +O(log N) in the best case (camera zoomed into a corner) and gives a constant +overhead for the common case where most of the model is on screen. -Tiling is **optional and non-disruptive**. The system treats a non-tiled model -as the degenerate case of "one tile containing everything" — the rendering loop -always iterates tiles, so no separate code path is needed. +A BVH was chosen over an octree because BIM data is spatially non-uniform — +dense MEP risers in one zone, sparse open atriums in another. An octree +subdivides space uniformly, wasting nodes on empty regions and creating deep +chains in dense ones. A BVH adapts its splits to the actual object +distribution, producing balanced trees regardless of density variation. -Tiling activates in one of three ways: +#### When the BVH activates -1. **Preprocessed cache exists**: If a `.ifcview` sidecar file is found next to - the `.ifc` file, the tile structure is loaded from it instantly. The model - uploads geometry in tile order. -2. **Automatic by size**: If the model has more than a configurable threshold of - objects (default 10k), a background task builds the spatial tree after - initial loading completes. Until it finishes, phase 1 culling handles - visibility. -3. **Explicit user action**: A "preprocess for performance" option builds the - spatial tree and saves the sidecar for future loads. +The BVH is **optional and non-disruptive**. Until it is built, phase 1's +linear scan handles all culling. The rendering loop checks for an active BVH +and falls back to the linear scan for any model that doesn't have one. -#### Spatial subdivision +The BVH activates in one of two ways: -The world-space bounding box of the entire model is subdivided using a -**loose octree**: +1. **Sidecar cache exists**: If a `.ifcview` file is found next to the `.ifc` + file, the BVH is loaded from it instantly (raw memory read, no parsing). + The model uses BVH culling from the first frame after loading. +2. **Automatic build**: After streaming finishes, a background thread builds + the BVH from the per-object AABBs already computed in phase 1. Until it + completes, phase 1 culling handles visibility. On completion, the render + thread picks up the BVH on the next frame. The sidecar is written for + future loads. -- The root node covers the scene AABB. -- Each node is split when it contains more than a threshold number of objects - (e.g. 256). -- Objects are assigned to the smallest node that fully contains their AABB. -- "Loose" bounds (inflated by 1.5x) reduce the number of objects that span - multiple nodes. -- Leaf nodes become tiles. +Models with fewer than 32 objects skip the BVH entirely — the overhead of tree +traversal is worse than a linear scan at that scale. -An octree adapts to non-uniform object density (common in buildings — lots of -detail in MEP risers, sparse in open atriums) better than a uniform grid. +#### BVH node layout -#### EBO re-sorting +Each node is 32 bytes, so two nodes fit in one 64-byte cache line: -For tile-level culling to translate into contiguous index ranges, the EBO must -be sorted so that all indices for objects in the same tile are adjacent. +```cpp +struct BvhNode { + float aabb_min[3]; // world-space bounding box (12 bytes) + float aabb_max[3]; // (12 bytes) + uint32_t right_or_first; // interior: right child index; leaf: first object index (4 bytes) + uint16_t count; // 0 = interior node; >0 = leaf with this many objects (2 bytes) + uint16_t axis; // split axis for interior (0=x, 1=y, 2=z); unused for leaf (2 bytes) +}; +``` -This happens via **deferred compaction**: +Interior nodes store the right child index; the left child is always the +immediately next node in the array (implicit in pre-order DFS layout, no +pointer needed). Leaf nodes reference a contiguous range in a sorted +object-index array. -1. During initial load, geometry uploads in iterator order (fast first frame, - phase 1 culling active). -2. After loading completes, a background thread: - a. Builds the octree from the per-object AABBs (already computed in phase 1). - b. Determines the tile for each object. - c. Computes the new index order (sorted by tile, then by object within tile). - d. Builds a new EBO on the CPU. -3. The main thread uploads the new EBO in one `glNamedBufferSubData` call and - swaps in the tile metadata. One frame of stutter, bounded by EBO upload - time. +The BVH is stored as a flat `std::vector` in pre-order DFS layout. +This means a depth-first traversal (which is what frustum culling does) reads +memory sequentially, maximizing prefetch and cache-line utilization. -The per-tile metadata: +#### Build algorithm: object-median split + +1. Compute the centroid of each object's AABB. +2. Find the longest axis of the current node's bounding box. +3. Use `std::nth_element` to partition objects at the median centroid on that + axis. This is O(n) — no full sort needed. +4. Recurse on each half. Terminate when the node contains ≤ 8 objects (leaf). +5. Write nodes into the flat array in pre-order DFS. + +Total build time is O(n log n). For 100k objects this is well under 100 ms on +a single core. + +SAH (Surface Area Heuristic) is the gold standard for ray-tracing BVHs, but +for frustum culling — where we test 6 planes and early-out entire subtrees — +the quality difference vs. median split is negligible. Median split is simpler +and produces reliably balanced trees. + +#### Frustum traversal + +The traversal uses an explicit stack on the C++ stack (no heap allocation, +no recursion): + +``` +stack[64] = {0} // start at root; depth 64 handles billions of objects +while stack not empty: + node = nodes[stack.pop()] + if node AABB outside frustum: continue // cull entire subtree + if leaf: + for each object in node: + if object AABB in frustum: emit to visible list + else: + push right child, push left child // left processed first (DFS) +``` + +When the camera is zoomed into a corner of the model, the traversal skips +large portions of the tree after testing only a handful of interior nodes. +When zoomed out to see everything, the traversal visits all leaves but the +overhead of the interior-node tests is small relative to the leaf work. + +#### Per-model BVH + +Each loaded model gets its own BVH. During frustum culling, the outer loop +iterates over models (skipping hidden/removed ones); the inner loop traverses +that model's BVH. This means hiding or removing a model is free — just skip +its BVH, no tree modification needed. ```cpp -struct TileInfo { - float aabb_min[3]; // tile bounding box (union of contained AABBs) - float aabb_max[3]; - uint32_t index_offset; // into the re-sorted EBO - uint32_t index_count; // sum of all contained objects' indices - uint32_t object_count; // for stats / debugging +struct ModelBvh { + uint32_t model_id; + std::vector nodes; // flat BVH node array + std::vector object_indices; // indices into object_draw_info_ }; ``` -#### Preprocessed sidecar format +#### EBO re-sorting + +For BVH culling to maximise GPU cache performance, the EBO is re-sorted so +that objects in the same BVH leaf are contiguous. This happens via **deferred +compaction**: + +1. During initial load, geometry uploads in iterator order (fast first frame, + phase 1 culling active). +2. After the BVH build completes on the background thread: + a. Walk the BVH leaves in DFS order. + b. For each object in each leaf, copy its index data to a new EBO buffer, + updating `ObjectDrawInfo::index_offset` accordingly. + c. Package the reordered EBO + updated draw info as a `BvhBuildResult`. +3. The render thread picks up the result on the next frame: one + `glNamedBufferSubData` call to re-upload the EBO, then swap in the new + draw info and activate the BVH. One frame of stutter, bounded by EBO + upload time (~5 ms for 32 MB). + +#### Async build and render-thread handoff + +The BVH build must not stall the render loop: + +1. `buildBvhAsync()` snapshots `object_draw_info_` under the upload mutex, + then launches a `std::thread`. +2. The thread builds the BVH and reordered EBO, then stores the result in a + `pending_bvh_result_` pointer under a separate mutex. +3. At the top of each `render()` call, `applyBvhResult()` checks for a + pending result. If found, it re-uploads the EBO (requires GL context), + swaps the draw info, and activates the BVH. +4. Until the BVH is ready, phase 1's linear scan runs every frame as before. + +#### Preprocessed sidecar format (`.ifcview`) + +The sidecar is a raw memory dump (Blender `.blend`-style) — no serialization +format, no parsing. It stores everything needed to display the model without +re-tessellating: vertex data, index data, per-object metadata, element tree +info, and the BVH. Loading is just `fread` into vectors → GPU upload → +render. The expensive `IfcGeom::Iterator` tessellation is skipped entirely. + +The IFC file is still parsed on demand (in background) for detailed property +lookup; the sidecar provides the basic properties (name, type, GUID) +immediately. -The `.ifcview` file stores: +``` +SidecarHeader (16 bytes: magic, version, endian, reserved) +uint64_t source_file_size + +uint32_t + float[] vertex data (interleaved, 8 floats/vertex) +uint32_t + uint32_t[] index data (global indices, ready for EBO) +uint32_t + ObjectDrawInfo[] per-object draw metadata +uint32_t + PackedElementInfo[] element tree records (fixed-size) +uint32_t + char[] string table (concatenated UTF-8: guid, name, type) + +uint32_t num_bvh_models +per model: + uint32_t model_id + uint32_t + BvhNode[] BVH node array + uint32_t + uint32_t[] object indices +``` -- Octree structure (node hierarchy, split planes). -- Per-object tile assignment (object_id → tile_id mapping). -- Per-tile index order (so the EBO can be built in tile order directly during - upload, skipping the compaction pass entirely). -- File hash of the source `.ifc` (invalidation check). +Staleness check: `source_file_size` is compared against the actual IFC file +size. If mismatched, the sidecar is stale and is rebuilt. This is cheap and +sufficient for a local cache (no hash computation on multi-GB files). -This makes second-and-subsequent loads of the same model significantly faster: -the spatial tree doesn't need to be rebuilt, and geometry uploads in tile order -from the start. +Endianness: if the marker reads back as `0x01020304`, the file was written on +the same architecture — just `fread` the structs directly. Otherwise, reject +the sidecar and rebuild. #### Performance characteristics | Metric | Value | |--------|-------| -| Tile count (typical) | 500–5,000 for a large building | -| Per-frame frustum tests | N_tiles instead of N_objects | -| 500k objects, ~2k tiles | ~0.01 ms frustum testing | -| Memory overhead | ~64 bytes/tile + 32 bytes/object (phase 1 metadata retained) | -| Background compaction | 1–5 seconds for 1M objects (single-threaded) | -| Sidecar file size | ~10–50 KB (indices + tree, no geometry) | +| BVH build time (100k objects) | < 100 ms (single-threaded, background) | +| Per-frame traversal (100k objects, 50% visible) | ~0.1 ms | +| Per-frame traversal (100k objects, 5% visible) | ~0.02 ms | +| Memory overhead | 32 bytes/node + 4 bytes/object index (~1.5× object count) | +| EBO reorder (one-time) | 1–5 ms upload for 32 MB EBO | +| Sidecar file size | ~same as geometry data (vertices + indices + metadata) | +| Sidecar read time | bounded by disk I/O (~500 ms for 640 MB, ~2 s for 2.8 GB from NVMe) | +| GPU upload time | progressive: ~48 MB/frame (~1 s for 2.8 GB at 60 fps, non-blocking) | #### Spatial coherence bonus -Beyond culling, tile-sorted EBOs improve GPU cache performance. When the GPU -rasterizes a tile's triangles, the vertices are contiguous in the VBO, so the -post-transform vertex cache hits more often. This can yield 10–20% rasterization -speedup even when nothing is culled (e.g. zoomed out to see the whole model). +Beyond culling, BVH-leaf-sorted EBOs improve GPU cache performance. When the +GPU rasterizes a leaf's triangles, the vertices are close together in the VBO, +so the post-transform vertex cache hits more often. This can yield 10–20% +rasterization speedup even when nothing is culled (e.g. zoomed out to see the +whole model). ### Phase 3: GPU-Driven Indirect Draw @@ -322,20 +426,20 @@ visibility decisions to the GPU via compute shaders and indirect draw commands. #### How it works -Phase 3 is **approach 2 layered on top of approach 3**. It does not replace -tiling — it accelerates it. +Phase 3 builds on the BVH from phase 2. It does not replace the BVH — it +moves the per-frame traversal to the GPU. 1. **Upload phase** (once, at load time): - - Per-tile AABBs are uploaded to a GPU SSBO (`tile_aabbs`). - - One `DrawElementsIndirectCommand` per tile is written to an indirect draw - buffer: + - Per-leaf AABBs from the BVH are uploaded to a GPU SSBO (`leaf_aabbs`). + - One `DrawElementsIndirectCommand` per BVH leaf is written to an indirect + draw buffer: ```c struct DrawElementsIndirectCommand { - uint count; // tile's total index count + uint count; // leaf's total index count uint instanceCount; // 1 - uint firstIndex; // offset into EBO + uint firstIndex; // offset into EBO (from BVH leaf order) uint baseVertex; // 0 (indices are global) - uint baseInstance; // tile_id (available in shader via gl_DrawID) + uint baseInstance; // leaf_id (available in shader via gl_DrawID) }; ``` - A "template" copy of the indirect buffer is kept so the compute shader @@ -343,20 +447,20 @@ tiling — it accelerates it. 2. **Cull phase** (every frame, on the GPU): - The CPU uploads 6 frustum plane vec4s as a uniform or small UBO. - - A compute shader dispatches `ceil(N_tiles / 64)` workgroups: + - A compute shader dispatches `ceil(N_leaves / 64)` workgroups: ```glsl layout(local_size_x = 64) in; void main() { - uint tile_id = gl_GlobalInvocationID.x; - if (tile_id >= tile_count) return; + uint leaf_id = gl_GlobalInvocationID.x; + if (leaf_id >= leaf_count) return; // Copy from template (resets any previously zeroed commands) - commands[tile_id] = template_commands[tile_id]; + commands[leaf_id] = template_commands[leaf_id]; // Frustum test - if (!aabb_vs_frustum(tile_aabbs[tile_id], frustum_planes)) { - commands[tile_id].count = 0; // culled: GPU skips zero-count draws + if (!aabb_vs_frustum(leaf_aabbs[leaf_id], frustum_planes)) { + commands[leaf_id].count = 0; // culled: GPU skips zero-count draws } } ``` @@ -364,7 +468,7 @@ tiling — it accelerates it. 3. **Draw phase** (every frame): - One call: `glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, - nullptr, N_tiles, 0)`. + nullptr, N_leaves, 0)`. - The GPU reads the indirect buffer, skips tiles with `count == 0`, and draws the rest. Zero CPU-side per-object or per-tile work. @@ -382,12 +486,12 @@ That's it. The CPU frame time is essentially constant regardless of model size. Once the compute-based cull pass exists, it's straightforward to add: - **Hierarchical-Z occlusion culling**: render a coarse depth buffer from the - previous frame, then test tile AABBs against it in the compute shader. Tiles - fully behind closer geometry get culled. This handles interior-heavy BIM - models well (most rooms are occluded from any given viewpoint). + previous frame, then test BVH leaf AABBs against it in the compute shader. + Leaves fully behind closer geometry get culled. This handles interior-heavy + BIM models well (most rooms are occluded from any given viewpoint). - **Distance-based LOD**: the compute shader can select different index ranges - (coarse vs. fine tessellation) per tile based on distance to camera. -- **Contribution culling**: tiles whose screen-space projection is below a + (coarse vs. fine tessellation) per leaf based on distance to camera. +- **Contribution culling**: leaves whose screen-space projection is below a pixel threshold get `count = 0`. Removes distant small objects. #### Performance characteristics @@ -395,10 +499,10 @@ Once the compute-based cull pass exists, it's straightforward to add: | Metric | Value | |--------|-------| | CPU per-frame work | ~0.01 ms (constant, independent of model size) | -| GPU compute dispatch | ~0.02 ms for 2k tiles | +| GPU compute dispatch | ~0.02 ms for 2k leaves | | Draw call overhead | 1 indirect multi-draw call | -| GPU memory overhead | ~48 bytes/tile (AABB SSBO) + 20 bytes/tile (indirect commands) × 2 (template + live) | -| Total for 2k tiles | ~176 KB GPU memory | +| GPU memory overhead | ~48 bytes/leaf (AABB SSBO) + 20 bytes/leaf (indirect commands) × 2 (template + live) | +| Total for 2k leaves | ~176 KB GPU memory | | Implementation complexity | High (compute shaders, SSBOs, memory barriers, indirect draw) | #### When to use @@ -411,8 +515,8 @@ Phase 3 is worthwhile when: the viewer requires 4.5). For models under 100k objects, phase 1 alone is sufficient. For 100k–500k, -phase 2 (tiling) keeps CPU culling under 1 ms. Phase 3 is the final step that -makes the CPU frame time constant. +phase 2 (BVH) keeps CPU culling well under 1 ms. Phase 3 is the final step +that makes the CPU frame time constant. ### Summary @@ -429,28 +533,33 @@ The load path: ``` open(model.ifc): - ├─ sidecar exists? - │ ├─ yes: load tile tree from .ifcview - │ │ upload geometry in tile order - │ │ (skip background compaction) - │ └─ no: upload geometry in iterator order (fast first frame) - │ phase 1 culling active immediately - │ if object_count > threshold: - │ background: build octree, re-sort EBO, save .ifcview - │ on completion: swap in tile structure - └─ rendering: - ├─ phase 3 available? → compute cull + indirect multi-draw - └─ else → CPU frustum test + glMultiDrawElements + ├─ sidecar exists (.ifcview)? + │ ├─ yes: background thread reads sidecar file (non-blocking I/O) + │ │ → allocate per-model VAO/VBO/EBO (empty, exact size) + │ │ → progressive GPU upload: 48 MB/frame VBO, then EBO + │ │ → objects appear as EBO chunks land + │ │ → BVH activates once fully loaded + │ │ → viewport interactive throughout + │ └─ no: stream from IFC via GeometryStreamer + │ → uploadChunk() appends to per-model buffers (immediately drawable) + │ → phase 1 linear-scan culling active from first chunk + │ → on completion: background BVH build, re-sort EBO, save .ifcview + └─ rendering (per model, per frame): + ├─ phase 3 available? → compute cull + indirect multi-draw + ├─ BVH available? → BVH traversal + glMultiDrawElements + └─ else / progressive → linear scan of active objects + glMultiDrawElements ``` ## Roadmap - [x] Material color support (per-vertex RGBA8) -- [x] Buffer growth (dynamic VBO/EBO resizing up to 4 GB) +- [x] Per-model GPU buffers (VAO/VBO/EBO per model, no cross-model copies) - [x] Per-object frustum culling (phase 1) -- [ ] Spatial tiling with octree (phase 2) +- [x] BVH acceleration with per-model trees (phase 2) +- [x] Raw binary `.ifcview` sidecar cache (full geometry + BVH, Blender-style) +- [x] Non-blocking sidecar loading (background thread I/O) +- [x] Progressive GPU upload (48 MB/frame chunked VBO/EBO transfer) - [ ] GPU-driven indirect draw (phase 3) -- [ ] Preprocessed `.ifcview` sidecar for fast re-loads - [ ] Hierarchical-Z occlusion culling - [ ] Distance-based LOD selection - [ ] Vulkan/MoltenVK backend for macOS diff --git a/src/ifcviewer/SidecarCache.cpp b/src/ifcviewer/SidecarCache.cpp new file mode 100644 index 00000000000..d77095c9223 --- /dev/null +++ b/src/ifcviewer/SidecarCache.cpp @@ -0,0 +1,196 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#include "SidecarCache.h" + +#include +#include + +// Binary layout (all multi-byte fields native-endian): +// +// SidecarHeader (16 bytes) +// uint64_t source_file_size +// +// uint32_t num_vertices (count of floats) +// float[num_vertices] vertex data +// +// uint32_t num_indices +// uint32_t[num_indices] index data +// +// uint32_t num_draw_infos +// ObjectDrawInfo[N] draw info array +// +// uint32_t num_elements +// PackedElementInfo[N] element records +// uint32_t string_table_bytes +// char[string_table_bytes] +// +// uint32_t num_bvh_models +// for each model: +// uint32_t model_id +// uint32_t num_nodes +// BvhNode[num_nodes] +// uint32_t num_object_indices +// uint32_t[num_object_indices] + +struct SidecarHeader { + uint32_t magic; + uint32_t version; + uint32_t endian; + uint32_t reserved; +}; + +static std::string sidecarPath(const std::string& ifc_path) { + return ifc_path + ".ifcview"; +} + +template +static bool writeVec(FILE* f, const std::vector& v) { + uint32_t n = static_cast(v.size()); + if (fwrite(&n, 4, 1, f) != 1) return false; + if (n > 0 && fwrite(v.data(), sizeof(T), n, f) != n) return false; + return true; +} + +template +static bool readVec(FILE* f, std::vector& v) { + uint32_t n; + if (fread(&n, 4, 1, f) != 1) return false; + v.resize(n); + if (n > 0 && fread(v.data(), sizeof(T), n, f) != n) return false; + return true; +} + +bool writeSidecar(const std::string& ifc_path, + const SidecarData& data, + uint64_t ifc_file_size) { + std::string path = sidecarPath(ifc_path); + FILE* f = fopen(path.c_str(), "wb"); + if (!f) return false; + + // Header + SidecarHeader hdr = { SIDECAR_MAGIC, SIDECAR_VERSION, SIDECAR_ENDIAN, 0 }; + fwrite(&hdr, sizeof(hdr), 1, f); + fwrite(&ifc_file_size, 8, 1, f); + + // Geometry + if (!writeVec(f, data.vertices)) { fclose(f); return false; } + if (!writeVec(f, data.indices)) { fclose(f); return false; } + + // Draw info + if (!writeVec(f, data.draw_info)) { fclose(f); return false; } + + // Elements + string table + if (!writeVec(f, data.elements)) { fclose(f); return false; } + uint32_t stbl_len = static_cast(data.string_table.size()); + fwrite(&stbl_len, 4, 1, f); + if (stbl_len > 0) fwrite(data.string_table.data(), 1, stbl_len, f); + + // BVH + uint32_t num_bvh_models = data.bvh_set + ? static_cast(data.bvh_set->models.size()) : 0; + fwrite(&num_bvh_models, 4, 1, f); + + if (data.bvh_set) { + for (const auto& [model_id, mbvh] : data.bvh_set->models) { + fwrite(&model_id, 4, 1, f); + + uint32_t nn = static_cast(mbvh.nodes.size()); + fwrite(&nn, 4, 1, f); + if (nn > 0) fwrite(mbvh.nodes.data(), sizeof(BvhNode), nn, f); + + uint32_t no = static_cast(mbvh.object_indices.size()); + fwrite(&no, 4, 1, f); + if (no > 0) fwrite(mbvh.object_indices.data(), 4, no, f); + } + } + + fclose(f); + return true; +} + +std::optional readSidecar(const std::string& ifc_path, + uint64_t ifc_file_size) { + std::string path = sidecarPath(ifc_path); + FILE* f = fopen(path.c_str(), "rb"); + if (!f) return std::nullopt; + + auto fail = [&]() -> std::optional { fclose(f); return std::nullopt; }; + + // Header + SidecarHeader hdr; + if (fread(&hdr, sizeof(hdr), 1, f) != 1) return fail(); + if (hdr.magic != SIDECAR_MAGIC || + hdr.version != SIDECAR_VERSION || + hdr.endian != SIDECAR_ENDIAN) return fail(); + + uint64_t stored_size; + if (fread(&stored_size, 8, 1, f) != 1) return fail(); + if (stored_size != ifc_file_size) return fail(); + + SidecarData data; + + // Geometry + if (!readVec(f, data.vertices)) return fail(); + if (!readVec(f, data.indices)) return fail(); + + // Draw info + if (!readVec(f, data.draw_info)) return fail(); + + // Elements + string table + if (!readVec(f, data.elements)) return fail(); + uint32_t stbl_len; + if (fread(&stbl_len, 4, 1, f) != 1) return fail(); + data.string_table.resize(stbl_len); + if (stbl_len > 0 && fread(data.string_table.data(), 1, stbl_len, f) != stbl_len) + return fail(); + + // BVH + uint32_t num_bvh_models; + if (fread(&num_bvh_models, 4, 1, f) != 1) return fail(); + + if (num_bvh_models > 0) { + data.bvh_set = std::make_shared(); + for (uint32_t m = 0; m < num_bvh_models; ++m) { + uint32_t model_id; + if (fread(&model_id, 4, 1, f) != 1) return fail(); + + ModelBvh mbvh; + mbvh.model_id = model_id; + + uint32_t nn; + if (fread(&nn, 4, 1, f) != 1) return fail(); + mbvh.nodes.resize(nn); + if (nn > 0 && fread(mbvh.nodes.data(), sizeof(BvhNode), nn, f) != nn) + return fail(); + + uint32_t no; + if (fread(&no, 4, 1, f) != 1) return fail(); + mbvh.object_indices.resize(no); + if (no > 0 && fread(mbvh.object_indices.data(), 4, no, f) != no) + return fail(); + + data.bvh_set->bvh_model_ids.insert(model_id); + data.bvh_set->models[model_id] = std::move(mbvh); + } + } + + fclose(f); + return data; +} diff --git a/src/ifcviewer/SidecarCache.h b/src/ifcviewer/SidecarCache.h new file mode 100644 index 00000000000..49c36dba15a --- /dev/null +++ b/src/ifcviewer/SidecarCache.h @@ -0,0 +1,76 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#ifndef SIDECARCACHE_H +#define SIDECARCACHE_H + +#include "BvhAccel.h" + +#include +#include +#include +#include + +static constexpr uint32_t SIDECAR_MAGIC = 0x49465657; // "IFVW" +static constexpr uint32_t SIDECAR_VERSION = 3; +static constexpr uint32_t SIDECAR_ENDIAN = 0x01020304; + +// Fixed-size element record for the sidecar. Strings are stored as +// (offset, length) pairs into a separate string table. +struct PackedElementInfo { + uint32_t object_id; + uint32_t model_id; + int32_t ifc_id; + int32_t parent_id; + uint32_t guid_offset; + uint32_t guid_length; + uint32_t name_offset; + uint32_t name_length; + uint32_t type_offset; + uint32_t type_length; +}; + +// Everything the viewer needs to display a model without tessellating. +struct SidecarData { + // GPU geometry (ready to upload as-is) + std::vector vertices; // interleaved, 8 floats per vertex + std::vector indices; // global (already remapped) + + // Per-object metadata + std::vector draw_info; + + // Element tree metadata + std::vector elements; + std::string string_table; // concatenated UTF-8 + + // BVH acceleration + std::shared_ptr bvh_set; +}; + +// Write a full sidecar next to the IFC file. +// Returns true on success. +bool writeSidecar(const std::string& ifc_path, + const SidecarData& data, + uint64_t ifc_file_size); + +// Read a sidecar. Returns nullopt on any failure (missing, stale, corrupt). +std::optional readSidecar(const std::string& ifc_path, + uint64_t ifc_file_size); + +#endif // SIDECARCACHE_H diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index 4217c997423..ae50f6dc44a 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -18,6 +18,7 @@ ********************************************************************************/ #include "ViewportWindow.h" +#include "SidecarCache.h" #include #include @@ -32,7 +33,6 @@ static const size_t INITIAL_VBO_SIZE = 64 * 1024 * 1024; // 64 MB static const size_t INITIAL_EBO_SIZE = 32 * 1024 * 1024; // 32 MB -// Cap buffer growth so a runaway upload can't try to allocate the world. static const size_t MAX_BUFFER_SIZE = 4ull * 1024 * 1024 * 1024; // 4 GB static const int VERTEX_STRIDE = 8; // pos(3) + normal(3) + object_id(1) + color(1 packed) @@ -192,12 +192,16 @@ ViewportWindow::ViewportWindow(QWindow* parent) } ViewportWindow::~ViewportWindow() { + if (bvh_build_thread_.joinable()) + bvh_build_thread_.join(); if (context_) { context_->makeCurrent(this); if (gl_) { - if (vao_) gl_->glDeleteVertexArrays(1, &vao_); - if (vbo_) gl_->glDeleteBuffers(1, &vbo_); - if (ebo_) gl_->glDeleteBuffers(1, &ebo_); + for (auto& [mid, m] : models_gpu_) { + if (m.vao) gl_->glDeleteVertexArrays(1, &m.vao); + if (m.vbo) gl_->glDeleteBuffers(1, &m.vbo); + if (m.ebo) gl_->glDeleteBuffers(1, &m.ebo); + } if (axis_vao_) gl_->glDeleteVertexArrays(1, &axis_vao_); if (axis_vbo_) gl_->glDeleteBuffers(1, &axis_vbo_); if (main_program_) gl_->glDeleteProgram(main_program_); @@ -231,55 +235,40 @@ void ViewportWindow::initGL() { buildShaders(); buildAxisGizmo(); - // Create VAO - gl_->glCreateVertexArrays(1, &vao_); + gl_->glEnable(GL_DEPTH_TEST); + gl_->glEnable(GL_MULTISAMPLE); + gl_->glClearColor(0.18f, 0.20f, 0.22f, 1.0f); - // Create VBO with initial capacity - vbo_capacity_ = INITIAL_VBO_SIZE; - gl_->glCreateBuffers(1, &vbo_); - gl_->glNamedBufferStorage(vbo_, vbo_capacity_, nullptr, - GL_DYNAMIC_STORAGE_BIT); + gl_initialized_ = true; + frame_clock_.start(); + render_timer_.start(); - // Create EBO with initial capacity - ebo_capacity_ = INITIAL_EBO_SIZE; - gl_->glCreateBuffers(1, &ebo_); - gl_->glNamedBufferStorage(ebo_, ebo_capacity_, nullptr, - GL_DYNAMIC_STORAGE_BIT); + emit initialized(); +} - // Vertex layout: pos(3f) + normal(3f) + object_id(1f) + color(4 unorm bytes) - // = 8 floats = 32 bytes per vertex. - gl_->glVertexArrayVertexBuffer(vao_, 0, vbo_, 0, VERTEX_STRIDE * sizeof(float)); - gl_->glVertexArrayElementBuffer(vao_, ebo_); +void ViewportWindow::setupVaoLayout(GLuint vao, GLuint vbo, GLuint ebo) { + gl_->glVertexArrayVertexBuffer(vao, 0, vbo, 0, VERTEX_STRIDE * sizeof(float)); + gl_->glVertexArrayElementBuffer(vao, ebo); // position - gl_->glEnableVertexArrayAttrib(vao_, 0); - gl_->glVertexArrayAttribFormat(vao_, 0, 3, GL_FLOAT, GL_FALSE, 0); - gl_->glVertexArrayAttribBinding(vao_, 0, 0); + gl_->glEnableVertexArrayAttrib(vao, 0); + gl_->glVertexArrayAttribFormat(vao, 0, 3, GL_FLOAT, GL_FALSE, 0); + gl_->glVertexArrayAttribBinding(vao, 0, 0); // normal - gl_->glEnableVertexArrayAttrib(vao_, 1); - gl_->glVertexArrayAttribFormat(vao_, 1, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float)); - gl_->glVertexArrayAttribBinding(vao_, 1, 0); + gl_->glEnableVertexArrayAttrib(vao, 1); + gl_->glVertexArrayAttribFormat(vao, 1, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float)); + gl_->glVertexArrayAttribBinding(vao, 1, 0); // object_id (passed as float, decoded in shader via floatBitsToUint) - gl_->glEnableVertexArrayAttrib(vao_, 2); - gl_->glVertexArrayAttribFormat(vao_, 2, 1, GL_FLOAT, GL_FALSE, 6 * sizeof(float)); - gl_->glVertexArrayAttribBinding(vao_, 2, 0); + gl_->glEnableVertexArrayAttrib(vao, 2); + gl_->glVertexArrayAttribFormat(vao, 2, 1, GL_FLOAT, GL_FALSE, 6 * sizeof(float)); + gl_->glVertexArrayAttribBinding(vao, 2, 0); // color (RGBA8 packed into the 4 bytes at offset 28; normalized to vec4) - gl_->glEnableVertexArrayAttrib(vao_, 3); - gl_->glVertexArrayAttribFormat(vao_, 3, 4, GL_UNSIGNED_BYTE, GL_TRUE, 7 * sizeof(float)); - gl_->glVertexArrayAttribBinding(vao_, 3, 0); - - gl_->glEnable(GL_DEPTH_TEST); - gl_->glEnable(GL_MULTISAMPLE); - gl_->glClearColor(0.18f, 0.20f, 0.22f, 1.0f); - - gl_initialized_ = true; - frame_clock_.start(); - render_timer_.start(); - - emit initialized(); + gl_->glEnableVertexArrayAttrib(vao, 3); + gl_->glVertexArrayAttribFormat(vao, 3, 4, GL_UNSIGNED_BYTE, GL_TRUE, 7 * sizeof(float)); + gl_->glVertexArrayAttribBinding(vao, 3, 0); } void ViewportWindow::buildShaders() { @@ -301,15 +290,11 @@ void ViewportWindow::buildShaders() { } void ViewportWindow::buildAxisGizmo() { - // 3 line segments (X red, Y green, Z blue), 6 vertices, pos(3) + color(3). static const float axis_data[] = { - // X axis - red 0.0f, 0.0f, 0.0f, 1.0f, 0.25f, 0.25f, 1.0f, 0.0f, 0.0f, 1.0f, 0.25f, 0.25f, - // Y axis - green 0.0f, 0.0f, 0.0f, 0.30f, 0.95f, 0.30f, 0.0f, 1.0f, 0.0f, 0.30f, 0.95f, 0.30f, - // Z axis - blue 0.0f, 0.0f, 0.0f, 0.30f, 0.55f, 1.0f, 0.0f, 0.0f, 1.0f, 0.30f, 0.55f, 1.0f, }; @@ -329,15 +314,11 @@ void ViewportWindow::buildAxisGizmo() { gl_->glVertexArrayAttribBinding(axis_vao_, 1, 0); } -bool ViewportWindow::growVbo(size_t needed_total) { - // Double until it fits, but don't blow past the cap. - size_t new_capacity = vbo_capacity_; - while (new_capacity < needed_total) { - new_capacity *= 2; - } +bool ViewportWindow::growModelVbo(ModelGpuData& m, size_t needed_total) { + size_t new_capacity = m.vbo_capacity; + while (new_capacity < needed_total) new_capacity *= 2; if (new_capacity > MAX_BUFFER_SIZE) { - qWarning("VBO grow request (%zu MB) exceeds cap (%zu MB)", - new_capacity / (1024 * 1024), MAX_BUFFER_SIZE / (1024 * 1024)); + qWarning("VBO grow request (%zu MB) exceeds cap", new_capacity / (1024 * 1024)); return false; } @@ -345,29 +326,25 @@ bool ViewportWindow::growVbo(size_t needed_total) { gl_->glCreateBuffers(1, &new_vbo); gl_->glNamedBufferStorage(new_vbo, new_capacity, nullptr, GL_DYNAMIC_STORAGE_BIT); - if (vbo_used_ > 0) { - gl_->glCopyNamedBufferSubData(vbo_, new_vbo, 0, 0, vbo_used_); + if (m.vbo_used > 0) { + gl_->glCopyNamedBufferSubData(m.vbo, new_vbo, 0, 0, m.vbo_used); } - gl_->glDeleteBuffers(1, &vbo_); - vbo_ = new_vbo; - vbo_capacity_ = new_capacity; + gl_->glDeleteBuffers(1, &m.vbo); + m.vbo = new_vbo; + m.vbo_capacity = new_capacity; - // Rebind on the VAO so subsequent draws see the new buffer. - gl_->glVertexArrayVertexBuffer(vao_, 0, vbo_, 0, VERTEX_STRIDE * sizeof(float)); + gl_->glVertexArrayVertexBuffer(m.vao, 0, m.vbo, 0, VERTEX_STRIDE * sizeof(float)); - qInfo("VBO grew to %zu MB", vbo_capacity_ / (1024 * 1024)); + qInfo("Model VBO grew to %zu MB", m.vbo_capacity / (1024 * 1024)); return true; } -bool ViewportWindow::growEbo(size_t needed_total) { - size_t new_capacity = ebo_capacity_; - while (new_capacity < needed_total) { - new_capacity *= 2; - } +bool ViewportWindow::growModelEbo(ModelGpuData& m, size_t needed_total) { + size_t new_capacity = m.ebo_capacity; + while (new_capacity < needed_total) new_capacity *= 2; if (new_capacity > MAX_BUFFER_SIZE) { - qWarning("EBO grow request (%zu MB) exceeds cap (%zu MB)", - new_capacity / (1024 * 1024), MAX_BUFFER_SIZE / (1024 * 1024)); + qWarning("EBO grow request (%zu MB) exceeds cap", new_capacity / (1024 * 1024)); return false; } @@ -375,17 +352,17 @@ bool ViewportWindow::growEbo(size_t needed_total) { gl_->glCreateBuffers(1, &new_ebo); gl_->glNamedBufferStorage(new_ebo, new_capacity, nullptr, GL_DYNAMIC_STORAGE_BIT); - if (ebo_used_ > 0) { - gl_->glCopyNamedBufferSubData(ebo_, new_ebo, 0, 0, ebo_used_); + if (m.ebo_used > 0) { + gl_->glCopyNamedBufferSubData(m.ebo, new_ebo, 0, 0, m.ebo_used); } - gl_->glDeleteBuffers(1, &ebo_); - ebo_ = new_ebo; - ebo_capacity_ = new_capacity; + gl_->glDeleteBuffers(1, &m.ebo); + m.ebo = new_ebo; + m.ebo_capacity = new_capacity; - gl_->glVertexArrayElementBuffer(vao_, ebo_); + gl_->glVertexArrayElementBuffer(m.vao, m.ebo); - qInfo("EBO grew to %zu MB", ebo_capacity_ / (1024 * 1024)); + qInfo("Model EBO grew to %zu MB", m.ebo_capacity / (1024 * 1024)); return true; } @@ -395,37 +372,55 @@ void ViewportWindow::uploadChunk(const UploadChunk& chunk) { context_->makeCurrent(this); + // Get or create per-model GPU data. + auto it = models_gpu_.find(chunk.model_id); + if (it == models_gpu_.end()) { + ModelGpuData m; + gl_->glCreateVertexArrays(1, &m.vao); + gl_->glCreateBuffers(1, &m.vbo); + gl_->glCreateBuffers(1, &m.ebo); + + m.vbo_capacity = INITIAL_VBO_SIZE; + m.ebo_capacity = INITIAL_EBO_SIZE; + gl_->glNamedBufferStorage(m.vbo, m.vbo_capacity, nullptr, GL_DYNAMIC_STORAGE_BIT); + gl_->glNamedBufferStorage(m.ebo, m.ebo_capacity, nullptr, GL_DYNAMIC_STORAGE_BIT); + + setupVaoLayout(m.vao, m.vbo, m.ebo); + it = models_gpu_.emplace(chunk.model_id, std::move(m)).first; + } + + auto& mgpu = it->second; + size_t vb_size = chunk.vertices.size() * sizeof(float); size_t ib_size = chunk.indices.size() * sizeof(uint32_t); - if (vbo_used_ + vb_size > vbo_capacity_) { - if (!growVbo(vbo_used_ + vb_size)) { + if (mgpu.vbo_used + vb_size > mgpu.vbo_capacity) { + if (!growModelVbo(mgpu, mgpu.vbo_used + vb_size)) { qWarning("VBO at cap, skipping chunk"); return; } } - if (ebo_used_ + ib_size > ebo_capacity_) { - if (!growEbo(ebo_used_ + ib_size)) { + if (mgpu.ebo_used + ib_size > mgpu.ebo_capacity) { + if (!growModelEbo(mgpu, mgpu.ebo_used + ib_size)) { qWarning("EBO at cap, skipping chunk"); return; } } - uint32_t base_vertex = vertex_count_; + uint32_t base_vertex = mgpu.vertex_count; - gl_->glNamedBufferSubData(vbo_, vbo_used_, vb_size, chunk.vertices.data()); + gl_->glNamedBufferSubData(mgpu.vbo, mgpu.vbo_used, vb_size, chunk.vertices.data()); - // Remap chunk-local indices into global indices so the whole EBO can be - // drawn with a single glDrawElements call. + // Remap chunk-local indices into model-local global indices. std::vector global_indices(chunk.indices.size()); for (size_t i = 0; i < chunk.indices.size(); ++i) { global_indices[i] = chunk.indices[i] + base_vertex; } - gl_->glNamedBufferSubData(ebo_, ebo_used_, ib_size, global_indices.data()); + gl_->glNamedBufferSubData(mgpu.ebo, mgpu.ebo_used, ib_size, global_indices.data()); // Compute AABB from vertex positions in this chunk. ObjectDrawInfo info; - info.index_offset = static_cast(ebo_used_); + info.index_offset = static_cast(mgpu.ebo_used); info.index_count = static_cast(chunk.indices.size()); info.model_id = chunk.model_id; @@ -445,46 +440,301 @@ void ViewportWindow::uploadChunk(const UploadChunk& chunk) { info.aabb_max[0] = info.aabb_max[1] = info.aabb_max[2] = 0.0f; } - { - std::lock_guard lock(upload_mutex_); - total_index_count_ += static_cast(chunk.indices.size()); - object_draw_info_.push_back(info); - } + mgpu.draw_info.push_back(info); + mgpu.active_draw_count = static_cast(mgpu.draw_info.size()); // immediately drawable + mgpu.vbo_used += vb_size; + mgpu.ebo_used += ib_size; + mgpu.vertex_count += static_cast(num_verts); + mgpu.total_triangles += static_cast(chunk.indices.size() / 3); +} - vbo_used_ += vb_size; - ebo_used_ += ib_size; - vertex_count_ += static_cast(chunk.vertices.size() / VERTEX_STRIDE); - total_triangles_ += static_cast(chunk.indices.size() / 3); +void ViewportWindow::uploadBulk(uint32_t model_id, + std::vector vertices, + std::vector indices, + const std::vector& draw_info, + std::shared_ptr bvh_set) { + if (!gl_initialized_) return; + if (vertices.empty() || indices.empty()) return; + + context_->makeCurrent(this); + + size_t vb_size = vertices.size() * sizeof(float); + size_t ib_size = indices.size() * sizeof(uint32_t); + + // Allocate empty buffers at exact size — no data uploaded yet. + ModelGpuData m; + gl_->glCreateVertexArrays(1, &m.vao); + gl_->glCreateBuffers(1, &m.vbo); + gl_->glCreateBuffers(1, &m.ebo); + + m.vbo_capacity = vb_size; + m.ebo_capacity = ib_size; + gl_->glNamedBufferStorage(m.vbo, vb_size, nullptr, GL_DYNAMIC_STORAGE_BIT); + gl_->glNamedBufferStorage(m.ebo, ib_size, nullptr, GL_DYNAMIC_STORAGE_BIT); + + setupVaoLayout(m.vao, m.vbo, m.ebo); + + m.vbo_used = vb_size; + m.ebo_used = ib_size; + m.vertex_count = static_cast(vertices.size() / VERTEX_STRIDE); + m.draw_info = draw_info; + m.active_draw_count = 0; // nothing drawable yet + + uint32_t total_tri = 0; + for (const auto& di : draw_info) total_tri += di.index_count / 3; + m.total_triangles = total_tri; + + // Delete old model data if re-uploading. + auto it = models_gpu_.find(model_id); + if (it != models_gpu_.end()) { + gl_->glDeleteVertexArrays(1, &it->second.vao); + gl_->glDeleteBuffers(1, &it->second.vbo); + gl_->glDeleteBuffers(1, &it->second.ebo); + } + models_gpu_[model_id] = std::move(m); + + // Queue progressive upload — data will stream in over subsequent frames. + PendingUpload pu; + pu.model_id = model_id; + pu.vertices = std::move(vertices); + pu.indices = std::move(indices); + pu.bvh_set = std::move(bvh_set); + pending_uploads_.push_back(std::move(pu)); + + qDebug("Bulk upload queued: model %u, %zu vertices, %zu indices, %zu objects", + model_id, vertices.size() / VERTEX_STRIDE, indices.size(), draw_info.size()); } void ViewportWindow::resetScene() { if (!gl_initialized_) return; - std::lock_guard lock(upload_mutex_); - total_index_count_ = 0; - vbo_used_ = 0; - ebo_used_ = 0; - vertex_count_ = 0; - total_triangles_ = 0; + if (bvh_build_thread_.joinable()) + bvh_build_thread_.join(); + + context_->makeCurrent(this); + for (auto& [mid, m] : models_gpu_) { + if (m.vao) gl_->glDeleteVertexArrays(1, &m.vao); + if (m.vbo) gl_->glDeleteBuffers(1, &m.vbo); + if (m.ebo) gl_->glDeleteBuffers(1, &m.ebo); + } + models_gpu_.clear(); + model_bvhs_.clear(); + pending_uploads_.clear(); selected_object_id_ = 0; - object_draw_info_.clear(); - hidden_models_.clear(); - removed_models_.clear(); + { + std::lock_guard bvh_lock(bvh_result_mutex_); + pending_bvh_.reset(); + } +} + +static const size_t UPLOAD_CHUNK_BYTES = 48 * 1024 * 1024; // 48 MB per frame + +void ViewportWindow::processPendingUploads() { + if (pending_uploads_.empty()) return; + + auto& pu = pending_uploads_.front(); + auto it = models_gpu_.find(pu.model_id); + if (it == models_gpu_.end()) { + pending_uploads_.pop_front(); + return; + } + auto& mgpu = it->second; + + size_t vbo_total = pu.vertices.size() * sizeof(float); + size_t ebo_total = pu.indices.size() * sizeof(uint32_t); + + // Phase 1: Upload VBO in chunks. + if (pu.vbo_uploaded < vbo_total) { + size_t remaining = vbo_total - pu.vbo_uploaded; + size_t chunk = std::min(remaining, UPLOAD_CHUNK_BYTES); + gl_->glNamedBufferSubData(mgpu.vbo, pu.vbo_uploaded, chunk, + reinterpret_cast(pu.vertices.data()) + pu.vbo_uploaded); + pu.vbo_uploaded += chunk; + + if (pu.vbo_uploaded >= vbo_total) { + // VBO done — free CPU memory. + pu.vertices.clear(); + pu.vertices.shrink_to_fit(); + } + return; // yield to render loop + } + + // Phase 2: Upload EBO in chunks. Objects become drawable as their range lands. + if (pu.ebo_uploaded < ebo_total) { + size_t remaining = ebo_total - pu.ebo_uploaded; + size_t chunk = std::min(remaining, UPLOAD_CHUNK_BYTES); + gl_->glNamedBufferSubData(mgpu.ebo, pu.ebo_uploaded, chunk, + reinterpret_cast(pu.indices.data()) + pu.ebo_uploaded); + pu.ebo_uploaded += chunk; + + // Advance active_draw_count: activate objects whose EBO range is fully uploaded. + while (mgpu.active_draw_count < mgpu.draw_info.size()) { + const auto& obj = mgpu.draw_info[mgpu.active_draw_count]; + size_t obj_end = obj.index_offset + obj.index_count * sizeof(uint32_t); + if (obj_end <= pu.ebo_uploaded) + mgpu.active_draw_count++; + else + break; + } + + if (pu.ebo_uploaded >= ebo_total) { + // EBO done — free CPU memory. + pu.indices.clear(); + pu.indices.shrink_to_fit(); + } else { + return; // yield to render loop + } + } + + // Fully uploaded — activate BVH if present. + mgpu.active_draw_count = static_cast(mgpu.draw_info.size()); + if (pu.bvh_set) { + model_bvhs_[pu.model_id] = std::move(pu.bvh_set); + } + + qDebug("Progressive upload complete: model %u", pu.model_id); + pending_uploads_.pop_front(); } void ViewportWindow::hideModel(uint32_t model_id) { - std::lock_guard lock(upload_mutex_); - hidden_models_.insert(model_id); + auto it = models_gpu_.find(model_id); + if (it != models_gpu_.end()) it->second.hidden = true; } void ViewportWindow::showModel(uint32_t model_id) { - std::lock_guard lock(upload_mutex_); - hidden_models_.erase(model_id); + auto it = models_gpu_.find(model_id); + if (it != models_gpu_.end()) it->second.hidden = false; } void ViewportWindow::removeModel(uint32_t model_id) { - std::lock_guard lock(upload_mutex_); - removed_models_.insert(model_id); + if (!gl_initialized_) return; + context_->makeCurrent(this); + + // Cancel any pending upload for this model. + pending_uploads_.erase( + std::remove_if(pending_uploads_.begin(), pending_uploads_.end(), + [model_id](const PendingUpload& pu) { return pu.model_id == model_id; }), + pending_uploads_.end()); + + auto it = models_gpu_.find(model_id); + if (it != models_gpu_.end()) { + gl_->glDeleteVertexArrays(1, &it->second.vao); + gl_->glDeleteBuffers(1, &it->second.vbo); + gl_->glDeleteBuffers(1, &it->second.ebo); + models_gpu_.erase(it); + } + model_bvhs_.erase(model_id); +} + +std::vector ViewportWindow::readbackEbo(uint32_t model_id) const { + std::vector ebo_data; + auto it = models_gpu_.find(model_id); + if (!gl_ || it == models_gpu_.end() || it->second.ebo_used == 0) return ebo_data; + + const auto& m = it->second; + size_t num_indices = m.ebo_used / sizeof(uint32_t); + ebo_data.resize(num_indices); + gl_->glGetNamedBufferSubData(m.ebo, 0, m.ebo_used, ebo_data.data()); + return ebo_data; +} + +std::vector ViewportWindow::readbackVbo(uint32_t model_id) const { + std::vector vbo_data; + auto it = models_gpu_.find(model_id); + if (!gl_ || it == models_gpu_.end() || it->second.vbo_used == 0) return vbo_data; + + const auto& m = it->second; + size_t num_floats = m.vbo_used / sizeof(float); + vbo_data.resize(num_floats); + gl_->glGetNamedBufferSubData(m.vbo, 0, m.vbo_used, vbo_data.data()); + return vbo_data; +} + +void ViewportWindow::buildBvhAsync(uint32_t model_id, + const std::string& ifc_path, + uint64_t ifc_file_size, + std::vector sidecar_elements, + std::string sidecar_string_table) { + if (bvh_build_thread_.joinable()) + bvh_build_thread_.join(); + + auto it = models_gpu_.find(model_id); + if (it == models_gpu_.end()) return; + + // Snapshot draw info; read back EBO + VBO on GL thread. + std::vector draw_snapshot = it->second.draw_info; + std::vector ebo_snapshot = readbackEbo(model_id); + std::vector vbo_snapshot; + if (!ifc_path.empty() && !sidecar_elements.empty()) { + vbo_snapshot = readbackVbo(model_id); + } + + if (draw_snapshot.empty() || ebo_snapshot.empty()) return; + + bvh_build_thread_ = std::thread([this, + model_id, + draw_info = std::move(draw_snapshot), + ebo_data = std::move(ebo_snapshot), + vbo_data = std::move(vbo_snapshot), + elements = std::move(sidecar_elements), + string_table = std::move(sidecar_string_table), + ifc_path, ifc_file_size]() { + auto bvh_set = buildBvhSet(draw_info); + + EboReorderResult ebo_result = reorderEbo(*bvh_set, draw_info, ebo_data); + + // Write full sidecar if requested. + if (!ifc_path.empty() && !elements.empty() && !vbo_data.empty()) { + SidecarData sd; + sd.vertices = vbo_data; + sd.indices = ebo_result.reordered_ebo; + sd.draw_info = ebo_result.reordered_draw_info; + sd.elements = std::move(elements); + sd.string_table = std::move(string_table); + sd.bvh_set = bvh_set; + writeSidecar(ifc_path, sd, ifc_file_size); + } + + { + std::lock_guard lock(bvh_result_mutex_); + pending_bvh_ = std::make_unique(); + pending_bvh_->model_id = model_id; + pending_bvh_->bvh_set = std::move(bvh_set); + pending_bvh_->ebo_reorder = std::move(ebo_result); + } + }); +} + +void ViewportWindow::applyBvhResult() { + std::unique_ptr result; + { + std::lock_guard lock(bvh_result_mutex_); + result = std::move(pending_bvh_); + } + if (!result) return; + + auto it = models_gpu_.find(result->model_id); + if (it == models_gpu_.end()) return; + + auto& mgpu = it->second; + + // Re-upload the reordered EBO into this model's buffer. + if (!result->ebo_reorder.reordered_ebo.empty()) { + size_t ebo_bytes = result->ebo_reorder.reordered_ebo.size() * sizeof(uint32_t); + if (ebo_bytes <= mgpu.ebo_capacity) { + gl_->glNamedBufferSubData(mgpu.ebo, 0, ebo_bytes, + result->ebo_reorder.reordered_ebo.data()); + } + } + + // Swap draw info. + if (result->ebo_reorder.reordered_draw_info.size() == mgpu.draw_info.size()) { + mgpu.draw_info = std::move(result->ebo_reorder.reordered_draw_info); + } + + model_bvhs_[result->model_id] = std::move(result->bvh_set); + + qDebug("BVH activated for model %u", result->model_id); } void ViewportWindow::setSelectedObjectId(uint32_t id) { @@ -499,7 +749,6 @@ uint32_t ViewportWindow::pickObjectAt(int x, int y) { int w = width() * devicePixelRatio(); int h = height() * devicePixelRatio(); - // Create/resize pick FBO if needed if (pick_width_ != w || pick_height_ != h) { if (pick_fbo_) gl_->glDeleteFramebuffers(1, &pick_fbo_); if (pick_color_tex_) gl_->glDeleteTextures(1, &pick_color_tex_); @@ -533,7 +782,6 @@ void ViewportWindow::updateCamera() { float yaw_rad = qDegreesToRadians(camera_yaw_); float pitch_rad = qDegreesToRadians(camera_pitch_); - // IFC / Blender convention: X right, Y forward, Z up. QVector3D eye; eye.setX(camera_target_.x() + camera_distance_ * cosf(pitch_rad) * cosf(yaw_rad)); eye.setY(camera_target_.y() + camera_distance_ * cosf(pitch_rad) * sinf(yaw_rad)); @@ -547,17 +795,61 @@ void ViewportWindow::updateCamera() { proj_matrix_.perspective(45.0f, aspect, 0.1f, camera_distance_ * 10.0f); } +bool ViewportWindow::aabbInFrustum(const float aabb_min[3], const float aabb_max[3], + const float planes[6][4]) { + for (int p = 0; p < 6; ++p) { + float px = planes[p][0] >= 0.0f ? aabb_max[0] : aabb_min[0]; + float py = planes[p][1] >= 0.0f ? aabb_max[1] : aabb_min[1]; + float pz = planes[p][2] >= 0.0f ? aabb_max[2] : aabb_min[2]; + float dist = planes[p][0] * px + planes[p][1] * py + planes[p][2] * pz + planes[p][3]; + if (dist < 0.0f) return false; + } + return true; +} + +void ViewportWindow::traverseBvh(const ModelBvh& mbvh, const ModelGpuData& mgpu, + const float planes[6][4]) { + if (mbvh.nodes.empty()) return; + + uint32_t stack[64]; + int sp = 0; + stack[sp++] = 0; // root + + // Get the current model's draw command being built. + auto& cmd = frame_draw_cmds_.back(); + + while (sp > 0) { + uint32_t ni = stack[--sp]; + const BvhNode& node = mbvh.nodes[ni]; + + if (!aabbInFrustum(node.aabb_min, node.aabb_max, planes)) + continue; + + if (node.count > 0) { + for (uint32_t i = 0; i < node.count; ++i) { + uint32_t oi = mbvh.object_indices[node.right_or_first + i]; + const auto& obj = mgpu.draw_info[oi]; + if (aabbInFrustum(obj.aabb_min, obj.aabb_max, planes)) { + cmd.counts.push_back(static_cast(obj.index_count)); + cmd.offsets.push_back(reinterpret_cast( + static_cast(obj.index_offset))); + visible_triangles_ += obj.index_count / 3; + } + } + } else { + if (sp < 63) { + stack[sp++] = node.right_or_first; + stack[sp++] = ni + 1; + } + } + } +} + void ViewportWindow::buildVisibleList(const QMatrix4x4& vp) { - visible_counts_.clear(); - visible_offsets_.clear(); + frame_draw_cmds_.clear(); visible_triangles_ = 0; - std::lock_guard lock(upload_mutex_); - if (object_draw_info_.empty()) return; - // Extract 6 frustum planes from the view-projection matrix. - // Each plane is (a, b, c, d) where ax + by + cz + d >= 0 is inside. - // QMatrix4x4 is stored column-major; operator(row, col) gives element. float planes[6][4]; for (int i = 0; i < 4; ++i) { planes[0][i] = vp(3, i) + vp(0, i); // left @@ -567,7 +859,6 @@ void ViewportWindow::buildVisibleList(const QMatrix4x4& vp) { planes[4][i] = vp(3, i) + vp(2, i); // near planes[5][i] = vp(3, i) - vp(2, i); // far } - // Normalize planes. for (int p = 0; p < 6; ++p) { float len = std::sqrt(planes[p][0] * planes[p][0] + planes[p][1] * planes[p][1] + @@ -581,31 +872,40 @@ void ViewportWindow::buildVisibleList(const QMatrix4x4& vp) { } } - visible_counts_.reserve(object_draw_info_.size()); - visible_offsets_.reserve(object_draw_info_.size()); + for (auto& [model_id, mgpu] : models_gpu_) { + if (mgpu.hidden || mgpu.active_draw_count == 0) continue; - for (const auto& obj : object_draw_info_) { - // Skip hidden or removed models. - if (hidden_models_.count(obj.model_id) || removed_models_.count(obj.model_id)) - continue; + frame_draw_cmds_.push_back({mgpu.vao, {}, {}}); + auto& cmd = frame_draw_cmds_.back(); + cmd.counts.reserve(mgpu.active_draw_count); + cmd.offsets.reserve(mgpu.active_draw_count); - bool visible = true; - for (int p = 0; p < 6; ++p) { - // p-vertex: the AABB corner most in the direction of the plane normal. - float px = planes[p][0] >= 0.0f ? obj.aabb_max[0] : obj.aabb_min[0]; - float py = planes[p][1] >= 0.0f ? obj.aabb_max[1] : obj.aabb_min[1]; - float pz = planes[p][2] >= 0.0f ? obj.aabb_max[2] : obj.aabb_min[2]; - float dist = planes[p][0] * px + planes[p][1] * py + planes[p][2] * pz + planes[p][3]; - if (dist < 0.0f) { - visible = false; - break; + bool fully_loaded = (mgpu.active_draw_count == mgpu.draw_info.size()); + auto bvh_it = model_bvhs_.find(model_id); + + // Only use BVH if model is fully uploaded; during progressive upload, + // fall back to linear scan of active objects. + if (fully_loaded && bvh_it != model_bvhs_.end() && bvh_it->second) { + const auto& bvh_set = *bvh_it->second; + auto mbvh_it = bvh_set.models.find(model_id); + if (mbvh_it != bvh_set.models.end()) { + traverseBvh(mbvh_it->second, mgpu, planes); + } + } else { + // Linear scan of active objects only. + for (uint32_t i = 0; i < mgpu.active_draw_count; ++i) { + const auto& obj = mgpu.draw_info[i]; + if (aabbInFrustum(obj.aabb_min, obj.aabb_max, planes)) { + cmd.counts.push_back(static_cast(obj.index_count)); + cmd.offsets.push_back(reinterpret_cast( + static_cast(obj.index_offset))); + visible_triangles_ += obj.index_count / 3; + } } } - if (visible) { - visible_counts_.push_back(static_cast(obj.index_count)); - visible_offsets_.push_back(reinterpret_cast( - static_cast(obj.index_offset))); - visible_triangles_ += obj.index_count / 3; + + if (cmd.counts.empty()) { + frame_draw_cmds_.pop_back(); } } } @@ -614,6 +914,8 @@ void ViewportWindow::render() { if (!gl_initialized_ || !isExposed()) return; context_->makeCurrent(this); + applyBvhResult(); + processPendingUploads(); updateCamera(); int w = width() * devicePixelRatio(); @@ -628,21 +930,20 @@ void ViewportWindow::render() { gl_->glUniform3f(gl_->glGetUniformLocation(main_program_, "u_light_dir"), 0.3f, 0.5f, 0.8f); gl_->glUniform1ui(gl_->glGetUniformLocation(main_program_, "u_selected_id"), selected_object_id_); - gl_->glBindVertexArray(vao_); - buildVisibleList(vp); - if (!visible_counts_.empty()) { + for (const auto& cmd : frame_draw_cmds_) { + gl_->glBindVertexArray(cmd.vao); gl_->glMultiDrawElements(GL_TRIANGLES, - visible_counts_.data(), GL_UNSIGNED_INT, - visible_offsets_.data(), - static_cast(visible_counts_.size())); + cmd.counts.data(), GL_UNSIGNED_INT, + cmd.offsets.data(), + static_cast(cmd.counts.size())); } renderAxisGizmo(); context_->swapBuffers(this); - // Compute FPS (updated once per second to avoid flicker). + // Compute FPS. float dt = frame_clock_.restart() / 1000.0f; accumulated_time_ += dt; frame_count_++; @@ -651,12 +952,23 @@ void ViewportWindow::render() { frame_count_ = 0; accumulated_time_ = 0.0f; + uint32_t total_obj = 0, total_tri = 0, vis_obj = 0; + for (const auto& [mid, m] : models_gpu_) { + if (!m.hidden) { + total_obj += static_cast(m.draw_info.size()); + total_tri += m.total_triangles; + } + } + for (const auto& cmd : frame_draw_cmds_) { + vis_obj += static_cast(cmd.counts.size()); + } + FrameStats stats; stats.fps = last_fps_; stats.frame_time_ms = 1000.0f / last_fps_; - stats.total_objects = static_cast(object_draw_info_.size()); - stats.visible_objects = static_cast(visible_counts_.size()); - stats.total_triangles = total_triangles_; + stats.total_objects = total_obj; + stats.visible_objects = vis_obj; + stats.total_triangles = total_tri; stats.visible_triangles = visible_triangles_; emit frameStatsUpdated(stats); } @@ -672,8 +984,6 @@ void ViewportWindow::renderAxisGizmo() { gl_->glViewport(margin, margin, gizmo_size, gizmo_size); gl_->glDisable(GL_DEPTH_TEST); - // Build a view matrix from the same camera orientation but with a fixed - // close-up distance, so the gizmo rotates with the scene camera. Z-up. float yaw_rad = qDegreesToRadians(camera_yaw_); float pitch_rad = qDegreesToRadians(camera_pitch_); @@ -693,7 +1003,7 @@ void ViewportWindow::renderAxisGizmo() { gl_->glUseProgram(axis_program_); gl_->glUniformMatrix4fv(gl_->glGetUniformLocation(axis_program_, "u_mvp"), 1, GL_FALSE, mvp.constData()); - gl_->glLineWidth(2.5f); // ignored on some core-profile drivers, that's OK + gl_->glLineWidth(2.5f); gl_->glBindVertexArray(axis_vao_); gl_->glDrawArrays(GL_LINES, 0, 6); @@ -712,14 +1022,13 @@ void ViewportWindow::renderPickPass() { gl_->glUseProgram(pick_program_); gl_->glUniformMatrix4fv(gl_->glGetUniformLocation(pick_program_, "u_view_projection"), 1, GL_FALSE, vp.constData()); - gl_->glBindVertexArray(vao_); - // Reuse the visible list from the most recent render() call. - if (!visible_counts_.empty()) { + for (const auto& cmd : frame_draw_cmds_) { + gl_->glBindVertexArray(cmd.vao); gl_->glMultiDrawElements(GL_TRIANGLES, - visible_counts_.data(), GL_UNSIGNED_INT, - visible_offsets_.data(), - static_cast(visible_counts_.size())); + cmd.counts.data(), GL_UNSIGNED_INT, + cmd.offsets.data(), + static_cast(cmd.counts.size())); } gl_->glBindFramebuffer(GL_FRAMEBUFFER, 0); @@ -774,7 +1083,6 @@ void ViewportWindow::handleMouseMove(QMouseEvent* e) { if (active_button_ == Qt::MiddleButton) { if (e->modifiers() & Qt::ShiftModifier) { - // Pan in screen space, derived from the Z-up camera basis. float pan_speed = camera_distance_ * 0.002f; float yaw_rad = qDegreesToRadians(camera_yaw_); float pitch_rad = qDegreesToRadians(camera_pitch_); @@ -786,7 +1094,6 @@ void ViewportWindow::handleMouseMove(QMouseEvent* e) { camera_target_ -= right * delta.x() * pan_speed; camera_target_ += up * delta.y() * pan_speed; } else { - // Orbit camera_yaw_ -= delta.x() * 0.3f; camera_pitch_ += delta.y() * 0.3f; camera_pitch_ = qBound(-89.0f, camera_pitch_, 89.0f); diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index fda82a1db5e..62abc480022 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -28,23 +28,23 @@ #include #include +#include #include #include +#include #include #include +#include +#include +#include + +#include "BvhAccel.h" +#include "SidecarCache.h" struct MaterialInfo { float r = 0.75f, g = 0.75f, b = 0.78f, a = 1.0f; }; -struct ObjectDrawInfo { - uint32_t index_offset; // byte offset into EBO - uint32_t index_count; // number of indices - uint32_t model_id; // which model this object belongs to - float aabb_min[3]; // world-space AABB - float aabb_max[3]; -}; - struct UploadChunk { // Interleaved per-vertex layout (8 floats / 32 bytes per vertex): // pos(3 float) + normal(3 float) + object_id(1 float bitcast from uint) @@ -56,6 +56,32 @@ struct UploadChunk { uint32_t model_id = 0; }; +// Per-model GPU state: own VAO, VBO, EBO, draw info, BVH. +struct ModelGpuData { + GLuint vao = 0; + GLuint vbo = 0; + GLuint ebo = 0; + size_t vbo_capacity = 0; + size_t ebo_capacity = 0; + size_t vbo_used = 0; // bytes + size_t ebo_used = 0; // bytes + uint32_t vertex_count = 0; + uint32_t total_triangles = 0; + std::vector draw_info; + uint32_t active_draw_count = 0; // how many objects are drawable (progressive upload) + bool hidden = false; +}; + +// Pending progressive upload — VBO first, then EBO. +struct PendingUpload { + uint32_t model_id = 0; + std::vector vertices; + std::vector indices; + std::shared_ptr bvh_set; + size_t vbo_uploaded = 0; // bytes + size_t ebo_uploaded = 0; // bytes +}; + class ViewportWindow : public QWindow { Q_OBJECT public: @@ -65,10 +91,29 @@ class ViewportWindow : public QWindow { void uploadChunk(const UploadChunk& chunk); void resetScene(); + // Bulk upload pre-built geometry from a sidecar cache. + // Creates a perfectly-sized per-model buffer set. No copy. + void uploadBulk(uint32_t model_id, + std::vector vertices, + std::vector indices, + const std::vector& draw_info, + std::shared_ptr bvh_set); + void hideModel(uint32_t model_id); void showModel(uint32_t model_id); void removeModel(uint32_t model_id); + // Build BVH and optionally write a sidecar cache. + void buildBvhAsync(uint32_t model_id, + const std::string& ifc_path = "", + uint64_t ifc_file_size = 0, + std::vector sidecar_elements = {}, + std::string sidecar_string_table = {}); + + // Read snapshots of a model's GPU buffers into CPU vectors. + std::vector readbackEbo(uint32_t model_id) const; + std::vector readbackVbo(uint32_t model_id) const; + void setSelectedObjectId(uint32_t id); uint32_t pickObjectAt(int x, int y); @@ -99,9 +144,16 @@ class ViewportWindow : public QWindow { void updateCamera(); void buildShaders(); void buildAxisGizmo(); - bool growVbo(size_t needed_total); - bool growEbo(size_t needed_total); + void setupVaoLayout(GLuint vao, GLuint vbo, GLuint ebo); + bool growModelVbo(ModelGpuData& m, size_t needed_total); + bool growModelEbo(ModelGpuData& m, size_t needed_total); void buildVisibleList(const QMatrix4x4& vp); + void traverseBvh(const ModelBvh& mbvh, const ModelGpuData& mgpu, + const float planes[6][4]); + static bool aabbInFrustum(const float aabb_min[3], const float aabb_max[3], + const float planes[6][4]); + void applyBvhResult(); + void processPendingUploads(); // Mouse interaction void handleMousePress(QMouseEvent* event); @@ -124,15 +176,9 @@ class ViewportWindow : public QWindow { GLuint axis_vao_ = 0; GLuint axis_vbo_ = 0; - // Geometry buffers - one big buffer pair - GLuint vao_ = 0; - GLuint vbo_ = 0; - GLuint ebo_ = 0; - size_t vbo_capacity_ = 0; - size_t ebo_capacity_ = 0; - size_t vbo_used_ = 0; // in bytes - size_t ebo_used_ = 0; // in bytes - uint32_t vertex_count_ = 0; + // Per-model GPU data + std::unordered_map models_gpu_; + std::mutex models_mutex_; // Pick framebuffer GLuint pick_fbo_ = 0; @@ -141,16 +187,20 @@ class ViewportWindow : public QWindow { int pick_width_ = 0; int pick_height_ = 0; - // Per-object draw metadata for frustum culling. - std::vector object_draw_info_; - std::unordered_set hidden_models_; - std::unordered_set removed_models_; - uint32_t total_index_count_ = 0; - std::mutex upload_mutex_; + // Per-model BVH + std::unordered_map> model_bvhs_; + + // Progressive upload queue + std::deque pending_uploads_; // Scratch buffers reused each frame to avoid allocation. - std::vector visible_counts_; - std::vector visible_offsets_; + struct ModelDrawCmd { + GLuint vao; + std::vector counts; + std::vector offsets; + }; + std::vector frame_draw_cmds_; + uint32_t visible_triangles_ = 0; // Camera QVector3D camera_target_{0, 0, 0}; @@ -169,9 +219,17 @@ class ViewportWindow : public QWindow { bool pick_requested_ = false; int pick_x_ = 0, pick_y_ = 0; + // BVH build (phase 2) + struct PendingBvh { + uint32_t model_id; + std::shared_ptr bvh_set; + EboReorderResult ebo_reorder; + }; + std::unique_ptr pending_bvh_; + std::mutex bvh_result_mutex_; + std::thread bvh_build_thread_; + // Stats - uint32_t total_triangles_ = 0; - uint32_t visible_triangles_ = 0; int frame_count_ = 0; float accumulated_time_ = 0.0f; float last_fps_ = 0.0f; From 2d9f3fba7a3208f994ffa3284a5907dde183405e Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sun, 12 Apr 2026 19:01:43 +1000 Subject: [PATCH 12/37] Add profiling for VRAM, FPS ratios, and instancing analysis Per-second frame log reports fps/ms, visible/total object & triangle ratios, VRAM breakdown (VBO+EBO), model count, and pending uploads. Upload-complete log includes per-model VBO/EBO MB and scene total VRAM. Streamer runs an instancing analysis keyed on geom.id(): total shapes, unique representations, dedup ratio, theoretical VBO/EBO/SSBO sizes if instanced, potential savings, and top-5 most-duplicated representations. Used to validate whether GPU instancing is worth the architectural rewrite for a given dataset. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/GeometryStreamer.cpp | 103 +++++++++++++++++++++++++++++ src/ifcviewer/ViewportWindow.cpp | 41 ++++++++++-- 2 files changed, 139 insertions(+), 5 deletions(-) diff --git a/src/ifcviewer/GeometryStreamer.cpp b/src/ifcviewer/GeometryStreamer.cpp index 7235bced9f8..54b37df70ca 100644 --- a/src/ifcviewer/GeometryStreamer.cpp +++ b/src/ifcviewer/GeometryStreamer.cpp @@ -27,6 +27,9 @@ #include #include +#include +#include + GeometryStreamer::GeometryStreamer(QObject* parent) : QObject(parent) { @@ -126,6 +129,20 @@ void GeometryStreamer::run(const std::string& path, int num_threads) { int last_progress = 0; + // Instancing analysis: count shapes grouped by representation id. + struct GeomStat { + uint32_t count = 0; + size_t vertex_count = 0; + size_t index_count = 0; + std::string example_type; + }; + std::unordered_map geom_stats; + uint32_t total_shapes = 0; + size_t total_vertices = 0; + size_t total_indices = 0; + QElapsedTimer stream_timer; + stream_timer.start(); + do { if (cancel_requested_.load()) break; @@ -147,6 +164,24 @@ void GeometryStreamer::run(const std::string& path, int num_threads) { info.type = tri_elem->type(); info.parent_id = tri_elem->parent_id(); + // Instancing stats: key by representation id, count unique vs repeated. + const auto& geom = tri_elem->geometry(); + const std::string& geom_id = geom.id(); + size_t nv = geom.verts().size() / 3; + size_t ni = geom.faces().size(); + if (!geom_id.empty()) { + auto& gs = geom_stats[geom_id]; + gs.count++; + if (gs.count == 1) { + gs.vertex_count = nv; + gs.index_count = ni; + gs.example_type = info.type; + } + } + total_shapes++; + total_vertices += nv; + total_indices += ni; + { std::lock_guard lock(elements_mutex_); pending_elements_.push_back(std::move(info)); @@ -168,6 +203,74 @@ void GeometryStreamer::run(const std::string& path, int num_threads) { progress_ = 100; emit progressChanged(100); + + // === Instancing report === + { + size_t unique_geoms = geom_stats.size(); + size_t unique_vertices = 0; + size_t unique_indices = 0; + size_t repeated_shapes = 0; // total shapes that share a repr with another + for (const auto& [gid, gs] : geom_stats) { + unique_vertices += gs.vertex_count; + unique_indices += gs.index_count; + if (gs.count > 1) repeated_shapes += gs.count; + } + + // Bytes assuming current layout (32 B/vertex, 4 B/index). + size_t baked_vbo_bytes = total_vertices * 32; + size_t baked_ebo_bytes = total_indices * 4; + size_t instanced_vbo_bytes = unique_vertices * 32; + size_t instanced_ebo_bytes = unique_indices * 4; + // Per-instance data: 64 B transform + 8 B (object_id + color). + size_t per_instance_bytes = 72; + size_t instance_ssbo_bytes = total_shapes * per_instance_bytes; + + double dedup_ratio = unique_geoms > 0 + ? static_cast(total_shapes) / static_cast(unique_geoms) + : 1.0; + + qDebug("=== Instancing analysis: %s ===", path.c_str()); + qDebug(" Stream time: %.2f s", stream_timer.elapsed() / 1000.0); + qDebug(" Total shapes: %u", total_shapes); + qDebug(" Unique geometries: %zu (dedup ratio %.2fx)", + unique_geoms, dedup_ratio); + qDebug(" Repeated shapes: %zu (%.1f%% of total)", + repeated_shapes, + total_shapes > 0 ? 100.0 * repeated_shapes / total_shapes : 0.0); + qDebug(" Baked geometry: VBO %.1f MB + EBO %.1f MB = %.1f MB", + baked_vbo_bytes / (1024.0*1024.0), + baked_ebo_bytes / (1024.0*1024.0), + (baked_vbo_bytes + baked_ebo_bytes) / (1024.0*1024.0)); + qDebug(" If instanced: VBO %.1f MB + EBO %.1f MB + SSBO %.1f MB = %.1f MB", + instanced_vbo_bytes / (1024.0*1024.0), + instanced_ebo_bytes / (1024.0*1024.0), + instance_ssbo_bytes / (1024.0*1024.0), + (instanced_vbo_bytes + instanced_ebo_bytes + instance_ssbo_bytes) + / (1024.0*1024.0)); + size_t baked_total = baked_vbo_bytes + baked_ebo_bytes; + size_t inst_total = instanced_vbo_bytes + instanced_ebo_bytes + instance_ssbo_bytes; + if (inst_total > 0 && baked_total > inst_total) { + qDebug(" Potential savings: %.1f MB (%.1f%%)", + (baked_total - inst_total) / (1024.0*1024.0), + 100.0 * (baked_total - inst_total) / baked_total); + } else { + qDebug(" Potential savings: none (instance overhead exceeds dedup win)"); + } + + // Top-5 most duplicated representations. + std::vector> sorted(geom_stats.begin(), geom_stats.end()); + std::partial_sort(sorted.begin(), + sorted.begin() + std::min(5, sorted.size()), + sorted.end(), + [](const auto& a, const auto& b) { return a.second.count > b.second.count; }); + qDebug(" Top duplicated representations:"); + for (size_t i = 0; i < std::min(5, sorted.size()); ++i) { + const auto& [gid, gs] = sorted[i]; + qDebug(" [%zu] count=%u verts=%zu type=%s repr_id=%s", + i + 1, gs.count, gs.vertex_count, + gs.example_type.c_str(), gid.c_str()); + } + } } static MaterialInfo materialFromStyle(const ifcopenshell::geometry::taxonomy::style::ptr& style) { diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index ae50f6dc44a..c872f799b67 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -592,7 +592,19 @@ void ViewportWindow::processPendingUploads() { model_bvhs_[pu.model_id] = std::move(pu.bvh_set); } - qDebug("Progressive upload complete: model %u", pu.model_id); + size_t total_vbo = 0, total_ebo = 0; + for (const auto& [mid, mg] : models_gpu_) { + total_vbo += mg.vbo_capacity; + total_ebo += mg.ebo_capacity; + } + qDebug("Progressive upload complete: model %u (this: vbo %.1f MB + ebo %.1f MB, " + "%u objects, %u triangles) scene total vram %.1f MB", + pu.model_id, + mgpu.vbo_capacity / (1024.0 * 1024.0), + mgpu.ebo_capacity / (1024.0 * 1024.0), + static_cast(mgpu.draw_info.size()), + mgpu.total_triangles, + (total_vbo + total_ebo) / (1024.0 * 1024.0)); pending_uploads_.pop_front(); } @@ -953,12 +965,17 @@ void ViewportWindow::render() { accumulated_time_ = 0.0f; uint32_t total_obj = 0, total_tri = 0, vis_obj = 0; + size_t total_vram = 0, total_vbo = 0, total_ebo = 0; + size_t num_models = 0, num_hidden = 0; for (const auto& [mid, m] : models_gpu_) { - if (!m.hidden) { - total_obj += static_cast(m.draw_info.size()); - total_tri += m.total_triangles; - } + num_models++; + if (m.hidden) { num_hidden++; continue; } + total_obj += static_cast(m.draw_info.size()); + total_tri += m.total_triangles; + total_vbo += m.vbo_capacity; + total_ebo += m.ebo_capacity; } + total_vram = total_vbo + total_ebo; for (const auto& cmd : frame_draw_cmds_) { vis_obj += static_cast(cmd.counts.size()); } @@ -971,6 +988,20 @@ void ViewportWindow::render() { stats.total_triangles = total_tri; stats.visible_triangles = visible_triangles_; emit frameStatsUpdated(stats); + + double vis_obj_pct = total_obj > 0 ? 100.0 * vis_obj / total_obj : 0.0; + double vis_tri_pct = total_tri > 0 ? 100.0 * visible_triangles_ / total_tri : 0.0; + qDebug("[frame] %.1f fps %.2f ms obj %u/%u (%.1f%%) tri %u/%u (%.1f%%) " + "vram %.1f MB (vbo %.1f + ebo %.1f) models %zu (%zu hidden) draws %zu pending_uploads %zu", + last_fps_, 1000.0f / last_fps_, + vis_obj, total_obj, vis_obj_pct, + visible_triangles_, total_tri, vis_tri_pct, + total_vram / (1024.0 * 1024.0), + total_vbo / (1024.0 * 1024.0), + total_ebo / (1024.0 * 1024.0), + num_models, num_hidden, + frame_draw_cmds_.size(), + pending_uploads_.size()); } } From d8362e243703f6dcb9ae457213c91a70c5452db2 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sun, 12 Apr 2026 19:17:44 +1000 Subject: [PATCH 13/37] Leaf-batched BVH draw commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a BVH leaf passes the frustum test, emit a single glMultiDrawElements record covering the leaf's entire index range instead of one per object. Leaves are contiguous in the EBO after reorderEbo, so the range is just [first_object.index_offset, sum(index_count)]. Cuts draw calls by ~8x (BVH_MAX_LEAF_SIZE) and shifts the bottleneck from CPU/driver per-draw overhead toward GPU vertex throughput. Per-object features (selection highlight, per-vertex color, object_id picking) are unchanged — they operate on vertex attributes, not draw state. Future per-object hide/override will use SSBO lookups sampled by object_id in the fragment shader. Slight overdraw from skipping per-object frustum tests within a leaf is negligible given median-split BVH tightness and spare tri throughput. Also adds visible_objects_ counter so stats still report true object counts (not leaf counts), plus leaf_draws/model_draws breakdown in the per-second frame log. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/ViewportWindow.cpp | 40 +++++++++++++++++++++----------- src/ifcviewer/ViewportWindow.h | 1 + 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index c872f799b67..1c6ab786254 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -838,16 +838,25 @@ void ViewportWindow::traverseBvh(const ModelBvh& mbvh, const ModelGpuData& mgpu, continue; if (node.count > 0) { + // Leaf-batched draw: after reorderEbo, a leaf's objects occupy a + // contiguous EBO range. Emit one draw command covering all of them + // instead of N per-object tests/draws. The leaf AABB test above is + // already a conservative cull; any overdraw (up to BVH_MAX_LEAF_SIZE + // objects that may be fully outside the frustum but inside the leaf + // AABB) costs far less than the per-draw CPU/driver overhead we save. + uint32_t first_oi = mbvh.object_indices[node.right_or_first]; + const auto& first_obj = mgpu.draw_info[first_oi]; + uint32_t leaf_offset = first_obj.index_offset; + uint32_t leaf_count = 0; for (uint32_t i = 0; i < node.count; ++i) { uint32_t oi = mbvh.object_indices[node.right_or_first + i]; - const auto& obj = mgpu.draw_info[oi]; - if (aabbInFrustum(obj.aabb_min, obj.aabb_max, planes)) { - cmd.counts.push_back(static_cast(obj.index_count)); - cmd.offsets.push_back(reinterpret_cast( - static_cast(obj.index_offset))); - visible_triangles_ += obj.index_count / 3; - } + leaf_count += mgpu.draw_info[oi].index_count; } + cmd.counts.push_back(static_cast(leaf_count)); + cmd.offsets.push_back(reinterpret_cast( + static_cast(leaf_offset))); + visible_triangles_ += leaf_count / 3; + visible_objects_ += node.count; } else { if (sp < 63) { stack[sp++] = node.right_or_first; @@ -860,6 +869,7 @@ void ViewportWindow::traverseBvh(const ModelBvh& mbvh, const ModelGpuData& mgpu, void ViewportWindow::buildVisibleList(const QMatrix4x4& vp) { frame_draw_cmds_.clear(); visible_triangles_ = 0; + visible_objects_ = 0; // Extract 6 frustum planes from the view-projection matrix. float planes[6][4]; @@ -912,6 +922,7 @@ void ViewportWindow::buildVisibleList(const QMatrix4x4& vp) { cmd.offsets.push_back(reinterpret_cast( static_cast(obj.index_offset))); visible_triangles_ += obj.index_count / 3; + visible_objects_++; } } } @@ -964,9 +975,10 @@ void ViewportWindow::render() { frame_count_ = 0; accumulated_time_ = 0.0f; - uint32_t total_obj = 0, total_tri = 0, vis_obj = 0; + uint32_t total_obj = 0, total_tri = 0; size_t total_vram = 0, total_vbo = 0, total_ebo = 0; size_t num_models = 0, num_hidden = 0; + size_t total_leaf_draws = 0; for (const auto& [mid, m] : models_gpu_) { num_models++; if (m.hidden) { num_hidden++; continue; } @@ -977,29 +989,31 @@ void ViewportWindow::render() { } total_vram = total_vbo + total_ebo; for (const auto& cmd : frame_draw_cmds_) { - vis_obj += static_cast(cmd.counts.size()); + total_leaf_draws += cmd.counts.size(); } FrameStats stats; stats.fps = last_fps_; stats.frame_time_ms = 1000.0f / last_fps_; stats.total_objects = total_obj; - stats.visible_objects = vis_obj; + stats.visible_objects = visible_objects_; stats.total_triangles = total_tri; stats.visible_triangles = visible_triangles_; emit frameStatsUpdated(stats); - double vis_obj_pct = total_obj > 0 ? 100.0 * vis_obj / total_obj : 0.0; + double vis_obj_pct = total_obj > 0 ? 100.0 * visible_objects_ / total_obj : 0.0; double vis_tri_pct = total_tri > 0 ? 100.0 * visible_triangles_ / total_tri : 0.0; qDebug("[frame] %.1f fps %.2f ms obj %u/%u (%.1f%%) tri %u/%u (%.1f%%) " - "vram %.1f MB (vbo %.1f + ebo %.1f) models %zu (%zu hidden) draws %zu pending_uploads %zu", + "vram %.1f MB (vbo %.1f + ebo %.1f) models %zu (%zu hidden) " + "leaf_draws %zu model_draws %zu pending_uploads %zu", last_fps_, 1000.0f / last_fps_, - vis_obj, total_obj, vis_obj_pct, + visible_objects_, total_obj, vis_obj_pct, visible_triangles_, total_tri, vis_tri_pct, total_vram / (1024.0 * 1024.0), total_vbo / (1024.0 * 1024.0), total_ebo / (1024.0 * 1024.0), num_models, num_hidden, + total_leaf_draws, frame_draw_cmds_.size(), pending_uploads_.size()); } diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index 62abc480022..97925e6e2e3 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -201,6 +201,7 @@ class ViewportWindow : public QWindow { }; std::vector frame_draw_cmds_; uint32_t visible_triangles_ = 0; + uint32_t visible_objects_ = 0; // Camera QVector3D camera_target_{0, 0, 0}; From 1097fa3bced668aec3196fb620f7cb3db1e67dd9 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sun, 12 Apr 2026 19:53:06 +1000 Subject: [PATCH 14/37] GPU instancing: streamer, viewport, shaders rewritten Commit A of the instancing migration (Phase 3a). The streamer now runs the iterator with use-world-coords=false and dedupes by the geometry's representation id, emitting a MeshChunk once per unique geometry and an InstanceChunk per placement. The viewport keeps geometry in local coordinates (28 B/vertex, down from 32) and applies the per-instance transform in the vertex shader via an std430 SSBO indexed by gl_InstanceID + a per-draw uniform offset. After streaming finishes finalizeModel() stable-sorts instances by mesh_id, assigns each mesh a contiguous range, and uploads the SSBO; render then issues one glDrawElementsInstancedBaseVertex per mesh. BvhAccel is reshaped to operate on a generic BvhItem (world AABB + model_id) so it can drive instance-level culling, but the path is not wired in yet -- every instance is drawn every frame in this commit. Progressive-during-streaming rendering is likewise disabled: a model appears when its SSBO is uploaded, not incrementally. Sidecar cache is stubbed (reads miss, writes are no-ops); the v4 on-disk format with MeshInfo + InstanceGpu sections lands in Commit B. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/BvhAccel.cpp | 151 +---- src/ifcviewer/BvhAccel.h | 43 +- src/ifcviewer/GeometryStreamer.cpp | 443 ++++++------ src/ifcviewer/GeometryStreamer.h | 11 +- src/ifcviewer/InstancedGeometry.h | 103 +++ src/ifcviewer/MainWindow.cpp | 100 +-- src/ifcviewer/MainWindow.h | 3 +- src/ifcviewer/SidecarCache.cpp | 182 +---- src/ifcviewer/SidecarCache.h | 39 +- src/ifcviewer/ViewportWindow.cpp | 1013 ++++++++++------------------ src/ifcviewer/ViewportWindow.h | 130 ++-- 11 files changed, 811 insertions(+), 1407 deletions(-) create mode 100644 src/ifcviewer/InstancedGeometry.h diff --git a/src/ifcviewer/BvhAccel.cpp b/src/ifcviewer/BvhAccel.cpp index e0b232a283c..c285f1fbfe0 100644 --- a/src/ifcviewer/BvhAccel.cpp +++ b/src/ifcviewer/BvhAccel.cpp @@ -23,7 +23,6 @@ #include #include #include -#include namespace { @@ -31,38 +30,36 @@ struct Centroid { float x, y, z; }; -Centroid computeCentroid(const ObjectDrawInfo& obj) { +Centroid computeCentroid(const BvhItem& it) { return { - (obj.aabb_min[0] + obj.aabb_max[0]) * 0.5f, - (obj.aabb_min[1] + obj.aabb_max[1]) * 0.5f, - (obj.aabb_min[2] + obj.aabb_max[2]) * 0.5f + (it.aabb_min[0] + it.aabb_max[0]) * 0.5f, + (it.aabb_min[1] + it.aabb_max[1]) * 0.5f, + (it.aabb_min[2] + it.aabb_max[2]) * 0.5f }; } -void computeAABB(const std::vector& draw_info, +void computeAABB(const std::vector& items, const uint32_t* indices, uint32_t count, float out_min[3], float out_max[3]) { out_min[0] = out_min[1] = out_min[2] = std::numeric_limits::max(); out_max[0] = out_max[1] = out_max[2] = -std::numeric_limits::max(); for (uint32_t i = 0; i < count; ++i) { - const auto& obj = draw_info[indices[i]]; + const auto& it = items[indices[i]]; for (int a = 0; a < 3; ++a) { - if (obj.aabb_min[a] < out_min[a]) out_min[a] = obj.aabb_min[a]; - if (obj.aabb_max[a] > out_max[a]) out_max[a] = obj.aabb_max[a]; + if (it.aabb_min[a] < out_min[a]) out_min[a] = it.aabb_min[a]; + if (it.aabb_max[a] > out_max[a]) out_max[a] = it.aabb_max[a]; } } } -// Recursive BVH builder. Writes nodes in pre-order DFS into mbvh.nodes. -// object_indices[start..start+count) are the indices to partition. void buildRecursive(ModelBvh& mbvh, - const std::vector& draw_info, + const std::vector& items, uint32_t start, uint32_t count) { uint32_t node_idx = static_cast(mbvh.nodes.size()); mbvh.nodes.emplace_back(); BvhNode& node = mbvh.nodes[node_idx]; - computeAABB(draw_info, &mbvh.object_indices[start], count, + computeAABB(items, &mbvh.item_indices[start], count, node.aabb_min, node.aabb_max); if (count <= BVH_MAX_LEAF_SIZE) { @@ -72,7 +69,6 @@ void buildRecursive(ModelBvh& mbvh, return; } - // Find longest axis of node AABB. float extent[3] = { node.aabb_max[0] - node.aabb_min[0], node.aabb_max[1] - node.aabb_min[1], @@ -82,145 +78,62 @@ void buildRecursive(ModelBvh& mbvh, if (extent[1] > extent[axis]) axis = 1; if (extent[2] > extent[axis]) axis = 2; - // Partition at median centroid on the chosen axis. uint32_t mid = count / 2; std::nth_element( - mbvh.object_indices.begin() + start, - mbvh.object_indices.begin() + start + mid, - mbvh.object_indices.begin() + start + count, + mbvh.item_indices.begin() + start, + mbvh.item_indices.begin() + start + mid, + mbvh.item_indices.begin() + start + count, [&](uint32_t a, uint32_t b) { - Centroid ca = computeCentroid(draw_info[a]); - Centroid cb = computeCentroid(draw_info[b]); + Centroid ca = computeCentroid(items[a]); + Centroid cb = computeCentroid(items[b]); return (&ca.x)[axis] < (&cb.x)[axis]; }); - node.count = 0; // interior + node.count = 0; node.axis = static_cast(axis); - // Left child is always node_idx + 1 (implicit in pre-order DFS). - // Build left subtree first. Note: &node is invalidated after this call - // because the vector may reallocate. - buildRecursive(mbvh, draw_info, start, mid); + buildRecursive(mbvh, items, start, mid); - // Right child is the next node written after the entire left subtree. uint32_t right_child_idx = static_cast(mbvh.nodes.size()); - buildRecursive(mbvh, draw_info, start + mid, count - mid); + buildRecursive(mbvh, items, start + mid, count - mid); - // Patch the right child index (left is implicit = node_idx + 1). mbvh.nodes[node_idx].right_or_first = right_child_idx; } -} // anonymous namespace - -ModelBvh buildModelBvh(const std::vector& draw_info, - const std::vector& model_object_indices, +ModelBvh buildModelBvh(const std::vector& items, + const std::vector& model_item_indices, uint32_t model_id) { ModelBvh mbvh; mbvh.model_id = model_id; - mbvh.object_indices = model_object_indices; + mbvh.item_indices = model_item_indices; - uint32_t count = static_cast(model_object_indices.size()); + uint32_t count = static_cast(model_item_indices.size()); if (count == 0) return mbvh; - // Reserve a rough estimate: ~2*n nodes for a balanced binary tree. mbvh.nodes.reserve(count * 2); + buildRecursive(mbvh, items, 0, count); - buildRecursive(mbvh, draw_info, 0, count); - - // Verify: every object appears exactly once in the leaves. assert(!mbvh.nodes.empty()); - return mbvh; } -std::shared_ptr buildBvhSet(const std::vector& draw_info) { +} // anonymous namespace + +std::shared_ptr buildBvhSet(const std::vector& items) { auto bvh_set = std::make_shared(); - // Group object indices by model_id. - std::unordered_map> model_objects; - for (uint32_t i = 0; i < static_cast(draw_info.size()); ++i) { - model_objects[draw_info[i].model_id].push_back(i); + std::unordered_map> model_items; + for (uint32_t i = 0; i < static_cast(items.size()); ++i) { + model_items[items[i].model_id].push_back(i); } - // Build per-model BVHs. - for (auto& [model_id, obj_indices] : model_objects) { - if (obj_indices.size() < BVH_MIN_OBJECTS) continue; + for (auto& [model_id, idxs] : model_items) { + if (idxs.size() < BVH_MIN_OBJECTS) continue; - ModelBvh mbvh = buildModelBvh(draw_info, obj_indices, model_id); + ModelBvh mbvh = buildModelBvh(items, idxs, model_id); bvh_set->bvh_model_ids.insert(model_id); bvh_set->models[model_id] = std::move(mbvh); } return bvh_set; } - -EboReorderResult reorderEbo(const BvhSet& bvh_set, - const std::vector& draw_info, - const std::vector& original_ebo) { - EboReorderResult result; - result.reordered_draw_info = draw_info; // copy; we'll update offsets - result.reordered_ebo.reserve(original_ebo.size()); - - // Track which draw_info entries have been placed. - std::vector placed(draw_info.size(), false); - - for (const auto& [model_id, mbvh] : bvh_set.models) { - // DFS traversal of BVH to visit leaves in order. - uint32_t stack[64]; - int sp = 0; - stack[sp++] = 0; - - while (sp > 0) { - uint32_t ni = stack[--sp]; - const BvhNode& node = mbvh.nodes[ni]; - - if (node.count > 0) { - // Leaf: emit objects in order. - for (uint32_t i = 0; i < node.count; ++i) { - uint32_t oi = mbvh.object_indices[node.right_or_first + i]; - if (placed[oi]) continue; - placed[oi] = true; - - const auto& old_info = draw_info[oi]; - uint32_t new_offset = static_cast( - result.reordered_ebo.size() * sizeof(uint32_t)); - - // Copy indices from original EBO. - uint32_t idx_start = old_info.index_offset / sizeof(uint32_t); - uint32_t idx_count = old_info.index_count; - for (uint32_t j = 0; j < idx_count; ++j) { - result.reordered_ebo.push_back(original_ebo[idx_start + j]); - } - - result.reordered_draw_info[oi].index_offset = new_offset; - } - } else { - // Interior: push left (=ni+1) last so it's processed first. - stack[sp++] = node.right_or_first; // right child - stack[sp++] = ni + 1; // left child - } - } - } - - // Append non-BVH objects (models too small for BVH). - for (uint32_t oi = 0; oi < static_cast(draw_info.size()); ++oi) { - if (placed[oi]) continue; - placed[oi] = true; - - const auto& old_info = draw_info[oi]; - uint32_t new_offset = static_cast( - result.reordered_ebo.size() * sizeof(uint32_t)); - - uint32_t idx_start = old_info.index_offset / sizeof(uint32_t); - uint32_t idx_count = old_info.index_count; - for (uint32_t j = 0; j < idx_count; ++j) { - result.reordered_ebo.push_back(original_ebo[idx_start + j]); - } - - result.reordered_draw_info[oi].index_offset = new_offset; - } - - assert(result.reordered_ebo.size() == original_ebo.size()); - - return result; -} diff --git a/src/ifcviewer/BvhAccel.h b/src/ifcviewer/BvhAccel.h index 21c57c2712a..a2cb6a13163 100644 --- a/src/ifcviewer/BvhAccel.h +++ b/src/ifcviewer/BvhAccel.h @@ -26,22 +26,22 @@ #include #include -struct ObjectDrawInfo { - uint32_t index_offset; // byte offset into EBO - uint32_t index_count; // number of indices - uint32_t model_id; // which model this object belongs to - float aabb_min[3]; // world-space AABB - float aabb_max[3]; +// Generic BVH item — anything with a world AABB and a model_id. +// For the instanced renderer each item represents one InstanceCpu. +struct BvhItem { + float aabb_min[3]; + float aabb_max[3]; + uint32_t model_id; }; static constexpr uint32_t BVH_MAX_LEAF_SIZE = 8; -static constexpr uint32_t BVH_MIN_OBJECTS = 32; +static constexpr uint32_t BVH_MIN_OBJECTS = 32; struct BvhNode { - float aabb_min[3]; - float aabb_max[3]; - uint32_t right_or_first; // interior: right child index (left is always this_index+1); leaf: first object index - uint16_t count; // 0 = interior; >0 = leaf with this many objects + float aabb_min[3]; + float aabb_max[3]; + uint32_t right_or_first; // interior: right child index (left is always this_index+1); leaf: first item index + uint16_t count; // 0 = interior; >0 = leaf with this many items uint16_t axis; // split axis (0/1/2) for interior; unused for leaf }; static_assert(sizeof(BvhNode) == 32, "BvhNode must be 32 bytes for cache alignment and sidecar format"); @@ -49,7 +49,7 @@ static_assert(sizeof(BvhNode) == 32, "BvhNode must be 32 bytes for cache alignme struct ModelBvh { uint32_t model_id = 0; std::vector nodes; - std::vector object_indices; // indices into object_draw_info_ + std::vector item_indices; // indices into the model's InstanceCpu array }; struct BvhSet { @@ -57,19 +57,10 @@ struct BvhSet { std::unordered_set bvh_model_ids; }; -struct EboReorderResult { - std::vector reordered_ebo; - std::vector reordered_draw_info; -}; - -// Build BVH trees for all models in the given draw info snapshot. -// Only builds the tree structure; does not touch EBO data. -std::shared_ptr buildBvhSet(const std::vector& draw_info); - -// Reorder the EBO so objects within each BVH leaf are contiguous. -// Must be called with the CURRENT run's EBO and draw_info (not cached). -EboReorderResult reorderEbo(const BvhSet& bvh_set, - const std::vector& draw_info, - const std::vector& original_ebo); +// Build BVH trees for all models in the given item snapshot. +// Items are expected to already be grouped/filtered by caller if needed. +// item_indices in the result reference positions within the full `items` +// vector — callers providing a single model's items will see 0..N-1. +std::shared_ptr buildBvhSet(const std::vector& items); #endif // BVHACCEL_H diff --git a/src/ifcviewer/GeometryStreamer.cpp b/src/ifcviewer/GeometryStreamer.cpp index 54b37df70ca..226fb0808ca 100644 --- a/src/ifcviewer/GeometryStreamer.cpp +++ b/src/ifcviewer/GeometryStreamer.cpp @@ -20,16 +20,52 @@ #include "GeometryStreamer.h" #include "AppSettings.h" #include "../ifcgeom/hybrid_kernel.h" +#include "../ifcgeom/taxonomy.h" + +#include #include #include #include #include #include +#include #include #include +struct MaterialInfo { + float r = 0.75f, g = 0.75f, b = 0.78f, a = 1.0f; +}; + +static MaterialInfo materialFromStyle(const ifcopenshell::geometry::taxonomy::style::ptr& style) { + MaterialInfo m; + if (!style) return m; + const auto& color = style->get_color(); + if (color) { + m.r = static_cast(color.r()); + m.g = static_cast(color.g()); + m.b = static_cast(color.b()); + } + if (!std::isnan(style->transparency)) { + m.a = 1.0f - static_cast(style->transparency); + } + return m; +} + +static inline uint32_t packRGBA8(const MaterialInfo& m) { + auto to_byte = [](float v) -> uint32_t { + float c = std::clamp(v, 0.0f, 1.0f); + return static_cast(c * 255.0f + 0.5f); + }; + uint32_t r = to_byte(m.r); + uint32_t g = to_byte(m.g); + uint32_t b = to_byte(m.b); + uint32_t a = to_byte(m.a); + // Little-endian byte layout [r,g,b,a] for GL_UNSIGNED_BYTE * 4 normalized. + return r | (g << 8) | (b << 16) | (a << 24); +} + GeometryStreamer::GeometryStreamer(QObject* parent) : QObject(parent) { @@ -96,6 +132,130 @@ std::vector GeometryStreamer::drainElements() { return result; } +// Build a mesh chunk (local coords, 28-byte interleaved vertices) from a +// TriangulationElement. Per-vertex color is baked from material_ids so that +// triangulations with per-face materials still render correctly. +static MeshChunk buildMeshChunk(uint32_t model_id, + uint32_t local_mesh_id, + const IfcGeom::TriangulationElement* elem) { + MeshChunk chunk; + chunk.model_id = model_id; + chunk.local_mesh_id = local_mesh_id; + + const auto& geom = elem->geometry(); + const auto& verts = geom.verts(); + const auto& faces = geom.faces(); + const auto& normals = geom.normals(); + const auto& materials = geom.materials(); + const auto& material_ids = geom.material_ids(); + + if (verts.empty() || faces.empty()) return chunk; + + const size_t num_verts_src = verts.size() / 3; + const size_t num_tris = faces.size() / 3; + const bool have_per_tri_material = (material_ids.size() == num_tris); + + // Dedupe (original vertex index, material id) so vertices shared across + // triangles of the same material stay shared; vertices spanning multiple + // materials are split (per-face color demands it). + auto make_key = [](uint32_t orig_idx, int mat_id) -> uint64_t { + return (static_cast(orig_idx) << 32) | static_cast(mat_id); + }; + + std::unordered_map remap; + remap.reserve(num_verts_src); + + chunk.vertices.reserve(num_verts_src * INSTANCED_VERTEX_STRIDE_FLOATS); + chunk.indices.reserve(faces.size()); + + // Track local AABB as we emit vertices. + float amin[3] = { std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max() }; + float amax[3] = { -std::numeric_limits::max(), + -std::numeric_limits::max(), + -std::numeric_limits::max() }; + + auto emit_vertex = [&](uint32_t orig_idx, int mat_id) -> uint32_t { + const uint64_t key = make_key(orig_idx, mat_id); + auto it = remap.find(key); + if (it != remap.end()) return it->second; + + const uint32_t new_idx = static_cast( + chunk.vertices.size() / INSTANCED_VERTEX_STRIDE_FLOATS); + + float px = static_cast(verts[orig_idx * 3 + 0]); + float py = static_cast(verts[orig_idx * 3 + 1]); + float pz = static_cast(verts[orig_idx * 3 + 2]); + chunk.vertices.push_back(px); + chunk.vertices.push_back(py); + chunk.vertices.push_back(pz); + if (px < amin[0]) amin[0] = px; if (px > amax[0]) amax[0] = px; + if (py < amin[1]) amin[1] = py; if (py > amax[1]) amax[1] = py; + if (pz < amin[2]) amin[2] = pz; if (pz > amax[2]) amax[2] = pz; + + if (orig_idx * 3 + 2 < normals.size()) { + chunk.vertices.push_back(static_cast(normals[orig_idx * 3 + 0])); + chunk.vertices.push_back(static_cast(normals[orig_idx * 3 + 1])); + chunk.vertices.push_back(static_cast(normals[orig_idx * 3 + 2])); + } else { + chunk.vertices.push_back(0.0f); + chunk.vertices.push_back(1.0f); + chunk.vertices.push_back(0.0f); + } + + MaterialInfo m; + if (mat_id >= 0 && mat_id < static_cast(materials.size())) { + m = materialFromStyle(materials[mat_id]); + } + uint32_t packed = packRGBA8(m); + float packed_as_float; + std::memcpy(&packed_as_float, &packed, sizeof(float)); + chunk.vertices.push_back(packed_as_float); + + remap.emplace(key, new_idx); + return new_idx; + }; + + for (size_t t = 0; t < num_tris; ++t) { + const int mat_id = have_per_tri_material ? material_ids[t] : -1; + chunk.indices.push_back(emit_vertex(static_cast(faces[t * 3 + 0]), mat_id)); + chunk.indices.push_back(emit_vertex(static_cast(faces[t * 3 + 1]), mat_id)); + chunk.indices.push_back(emit_vertex(static_cast(faces[t * 3 + 2]), mat_id)); + } + + if (chunk.vertices.empty()) { + for (int a = 0; a < 3; ++a) amin[a] = amax[a] = 0.0f; + } + for (int a = 0; a < 3; ++a) { + chunk.local_aabb_min[a] = amin[a]; + chunk.local_aabb_max[a] = amax[a]; + } + return chunk; +} + +// Compute the world-space AABB by transforming the 8 corners of the local +// AABB through the column-major 4x4 transform. +static void worldAabbFromLocal(const float local_min[3], + const float local_max[3], + const float M[16], + float out_min[3], float out_max[3]) { + out_min[0] = out_min[1] = out_min[2] = std::numeric_limits::max(); + out_max[0] = out_max[1] = out_max[2] = -std::numeric_limits::max(); + for (int c = 0; c < 8; ++c) { + float x = (c & 1) ? local_max[0] : local_min[0]; + float y = (c & 2) ? local_max[1] : local_min[1]; + float z = (c & 4) ? local_max[2] : local_min[2]; + // Column-major: world = M * [x,y,z,1]. + float wx = M[0]*x + M[4]*y + M[8]*z + M[12]; + float wy = M[1]*x + M[5]*y + M[9]*z + M[13]; + float wz = M[2]*x + M[6]*y + M[10]*z + M[14]; + if (wx < out_min[0]) out_min[0] = wx; if (wx > out_max[0]) out_max[0] = wx; + if (wy < out_min[1]) out_min[1] = wy; if (wy > out_max[1]) out_max[1] = wy; + if (wz < out_min[2]) out_min[2] = wz; if (wz > out_max[2]) out_max[2] = wz; + } +} + void GeometryStreamer::run(const std::string& path, int num_threads) { try { ifc_file_ = std::make_unique(path); @@ -105,7 +265,9 @@ void GeometryStreamer::run(const std::string& path, int num_threads) { } ifcopenshell::geometry::Settings settings; - settings.set("use-world-coords", true); + // Instancing path: geometry stays in local coords; the transform is + // applied on the GPU per instance. + settings.set("use-world-coords", false); settings.set("weld-vertices", false); settings.set("apply-default-materials", true); @@ -129,17 +291,14 @@ void GeometryStreamer::run(const std::string& path, int num_threads) { int last_progress = 0; - // Instancing analysis: count shapes grouped by representation id. - struct GeomStat { - uint32_t count = 0; - size_t vertex_count = 0; - size_t index_count = 0; - std::string example_type; - }; - std::unordered_map geom_stats; + // geom.id() → local_mesh_id within this model. + std::unordered_map geom_to_local_mesh_id; + // local_mesh_id → (local AABB) so we can derive world AABBs for later instances. + struct MeshAabb { float lmin[3], lmax[3]; }; + std::vector mesh_aabbs; + uint32_t total_shapes = 0; - size_t total_vertices = 0; - size_t total_indices = 0; + uint32_t total_meshes = 0; QElapsedTimer stream_timer; stream_timer.start(); @@ -152,9 +311,12 @@ void GeometryStreamer::run(const std::string& path, int num_threads) { const auto* tri_elem = dynamic_cast(elem); if (!tri_elem) continue; + const auto& geom = tri_elem->geometry(); + if (geom.verts().empty() || geom.faces().empty()) continue; + uint32_t object_id = next_object_id_++; - // Record element metadata + // Element metadata. ElementInfo info; info.object_id = object_id; info.model_id = model_id_; @@ -163,36 +325,62 @@ void GeometryStreamer::run(const std::string& path, int num_threads) { info.name = tri_elem->name(); info.type = tri_elem->type(); info.parent_id = tri_elem->parent_id(); + { + std::lock_guard lock(elements_mutex_); + pending_elements_.push_back(std::move(info)); + } - // Instancing stats: key by representation id, count unique vs repeated. - const auto& geom = tri_elem->geometry(); + // Representation dedup. const std::string& geom_id = geom.id(); - size_t nv = geom.verts().size() / 3; - size_t ni = geom.faces().size(); - if (!geom_id.empty()) { - auto& gs = geom_stats[geom_id]; - gs.count++; - if (gs.count == 1) { - gs.vertex_count = nv; - gs.index_count = ni; - gs.example_type = info.type; + uint32_t local_mesh_id; + bool first_sight = false; + if (geom_id.empty()) { + // No representation key — treat as unique. + local_mesh_id = total_meshes++; + first_sight = true; + } else { + auto it = geom_to_local_mesh_id.find(geom_id); + if (it == geom_to_local_mesh_id.end()) { + local_mesh_id = total_meshes++; + geom_to_local_mesh_id.emplace(geom_id, local_mesh_id); + first_sight = true; + } else { + local_mesh_id = it->second; } } - total_shapes++; - total_vertices += nv; - total_indices += ni; - { - std::lock_guard lock(elements_mutex_); - pending_elements_.push_back(std::move(info)); + if (first_sight) { + MeshChunk mesh_chunk = buildMeshChunk(model_id_, local_mesh_id, tri_elem); + MeshAabb ma; + for (int a = 0; a < 3; ++a) { + ma.lmin[a] = mesh_chunk.local_aabb_min[a]; + ma.lmax[a] = mesh_chunk.local_aabb_max[a]; + } + if (mesh_aabbs.size() <= local_mesh_id) mesh_aabbs.resize(local_mesh_id + 1); + mesh_aabbs[local_mesh_id] = ma; + if (!mesh_chunk.indices.empty()) { + emit meshReady(std::move(mesh_chunk)); + } } - // Convert geometry to upload chunk - UploadChunk chunk = convertElement(tri_elem, object_id); - if (!chunk.indices.empty()) { - emit elementReady(std::move(chunk)); + // Transform (column-major 4x4, cast to float). + const Eigen::Matrix4d& mat_d = tri_elem->transformation().data()->ccomponents(); + InstanceChunk inst; + inst.model_id = model_id_; + inst.local_mesh_id = local_mesh_id; + inst.object_id = object_id; + inst.color_override_rgba8 = 0; // 0 = use baked vertex color + for (int i = 0; i < 16; ++i) { + inst.transform[i] = static_cast(mat_d.data()[i]); } + const MeshAabb& ma = mesh_aabbs[local_mesh_id]; + worldAabbFromLocal(ma.lmin, ma.lmax, inst.transform, + inst.world_aabb_min, inst.world_aabb_max); + + emit instanceReady(std::move(inst)); + total_shapes++; + int p = iterator->progress(); if (p != last_progress) { last_progress = p; @@ -204,188 +392,9 @@ void GeometryStreamer::run(const std::string& path, int num_threads) { progress_ = 100; emit progressChanged(100); - // === Instancing report === - { - size_t unique_geoms = geom_stats.size(); - size_t unique_vertices = 0; - size_t unique_indices = 0; - size_t repeated_shapes = 0; // total shapes that share a repr with another - for (const auto& [gid, gs] : geom_stats) { - unique_vertices += gs.vertex_count; - unique_indices += gs.index_count; - if (gs.count > 1) repeated_shapes += gs.count; - } - - // Bytes assuming current layout (32 B/vertex, 4 B/index). - size_t baked_vbo_bytes = total_vertices * 32; - size_t baked_ebo_bytes = total_indices * 4; - size_t instanced_vbo_bytes = unique_vertices * 32; - size_t instanced_ebo_bytes = unique_indices * 4; - // Per-instance data: 64 B transform + 8 B (object_id + color). - size_t per_instance_bytes = 72; - size_t instance_ssbo_bytes = total_shapes * per_instance_bytes; - - double dedup_ratio = unique_geoms > 0 - ? static_cast(total_shapes) / static_cast(unique_geoms) - : 1.0; - - qDebug("=== Instancing analysis: %s ===", path.c_str()); - qDebug(" Stream time: %.2f s", stream_timer.elapsed() / 1000.0); - qDebug(" Total shapes: %u", total_shapes); - qDebug(" Unique geometries: %zu (dedup ratio %.2fx)", - unique_geoms, dedup_ratio); - qDebug(" Repeated shapes: %zu (%.1f%% of total)", - repeated_shapes, - total_shapes > 0 ? 100.0 * repeated_shapes / total_shapes : 0.0); - qDebug(" Baked geometry: VBO %.1f MB + EBO %.1f MB = %.1f MB", - baked_vbo_bytes / (1024.0*1024.0), - baked_ebo_bytes / (1024.0*1024.0), - (baked_vbo_bytes + baked_ebo_bytes) / (1024.0*1024.0)); - qDebug(" If instanced: VBO %.1f MB + EBO %.1f MB + SSBO %.1f MB = %.1f MB", - instanced_vbo_bytes / (1024.0*1024.0), - instanced_ebo_bytes / (1024.0*1024.0), - instance_ssbo_bytes / (1024.0*1024.0), - (instanced_vbo_bytes + instanced_ebo_bytes + instance_ssbo_bytes) - / (1024.0*1024.0)); - size_t baked_total = baked_vbo_bytes + baked_ebo_bytes; - size_t inst_total = instanced_vbo_bytes + instanced_ebo_bytes + instance_ssbo_bytes; - if (inst_total > 0 && baked_total > inst_total) { - qDebug(" Potential savings: %.1f MB (%.1f%%)", - (baked_total - inst_total) / (1024.0*1024.0), - 100.0 * (baked_total - inst_total) / baked_total); - } else { - qDebug(" Potential savings: none (instance overhead exceeds dedup win)"); - } - - // Top-5 most duplicated representations. - std::vector> sorted(geom_stats.begin(), geom_stats.end()); - std::partial_sort(sorted.begin(), - sorted.begin() + std::min(5, sorted.size()), - sorted.end(), - [](const auto& a, const auto& b) { return a.second.count > b.second.count; }); - qDebug(" Top duplicated representations:"); - for (size_t i = 0; i < std::min(5, sorted.size()); ++i) { - const auto& [gid, gs] = sorted[i]; - qDebug(" [%zu] count=%u verts=%zu type=%s repr_id=%s", - i + 1, gs.count, gs.vertex_count, - gs.example_type.c_str(), gid.c_str()); - } - } -} - -static MaterialInfo materialFromStyle(const ifcopenshell::geometry::taxonomy::style::ptr& style) { - MaterialInfo m; - if (!style) return m; - - const auto& color = style->get_color(); - if (color) { - m.r = static_cast(color.r()); - m.g = static_cast(color.g()); - m.b = static_cast(color.b()); - } - if (!std::isnan(style->transparency)) { - m.a = 1.0f - static_cast(style->transparency); - } - return m; -} - -static inline uint32_t packRGBA8(const MaterialInfo& m) { - auto to_byte = [](float v) -> uint32_t { - float c = std::clamp(v, 0.0f, 1.0f); - return static_cast(c * 255.0f + 0.5f); - }; - uint32_t r = to_byte(m.r); - uint32_t g = to_byte(m.g); - uint32_t b = to_byte(m.b); - uint32_t a = to_byte(m.a); - // Layout in memory (little-endian) reads as bytes [r, g, b, a] which is - // what the GL_UNSIGNED_BYTE * 4 normalized vertex attribute expects. - return r | (g << 8) | (b << 16) | (a << 24); -} - -UploadChunk GeometryStreamer::convertElement(const IfcGeom::TriangulationElement* elem, uint32_t object_id) { - UploadChunk chunk; - chunk.object_id = object_id; - chunk.model_id = model_id_; - - const auto& geom = elem->geometry(); - const auto& verts = geom.verts(); - const auto& faces = geom.faces(); - const auto& normals = geom.normals(); - const auto& materials = geom.materials(); - const auto& material_ids = geom.material_ids(); - - if (verts.empty() || faces.empty()) return chunk; - - // Encode object_id as float bits for the vertex attribute - float id_as_float; - static_assert(sizeof(float) == sizeof(uint32_t)); - std::memcpy(&id_as_float, &object_id, sizeof(float)); - - const size_t num_verts = verts.size() / 3; - const size_t num_tris = faces.size() / 3; - const bool have_per_tri_material = (material_ids.size() == num_tris); - - // Per-vertex color requires that any vertex shared between triangles with - // *different* materials be split. We dedupe (orig_vert_idx, mat_id) pairs - // so vertices that are only ever used by one material stay shared. - auto make_key = [](uint32_t orig_idx, int mat_id) -> uint64_t { - return (static_cast(orig_idx) << 32) | - static_cast(mat_id); - }; - - std::unordered_map remap; - remap.reserve(num_verts); - - chunk.vertices.reserve(num_verts * 8); - chunk.indices.reserve(faces.size()); - - auto emit_vertex = [&](uint32_t orig_idx, int mat_id) -> uint32_t { - const uint64_t key = make_key(orig_idx, mat_id); - auto it = remap.find(key); - if (it != remap.end()) return it->second; - - const uint32_t new_idx = static_cast(chunk.vertices.size() / 8); - - // pos - chunk.vertices.push_back(static_cast(verts[orig_idx * 3 + 0])); - chunk.vertices.push_back(static_cast(verts[orig_idx * 3 + 1])); - chunk.vertices.push_back(static_cast(verts[orig_idx * 3 + 2])); - - // normal - if (orig_idx * 3 + 2 < normals.size()) { - chunk.vertices.push_back(static_cast(normals[orig_idx * 3 + 0])); - chunk.vertices.push_back(static_cast(normals[orig_idx * 3 + 1])); - chunk.vertices.push_back(static_cast(normals[orig_idx * 3 + 2])); - } else { - chunk.vertices.push_back(0.0f); - chunk.vertices.push_back(1.0f); - chunk.vertices.push_back(0.0f); - } - - // object_id (float bits) - chunk.vertices.push_back(id_as_float); - - // color (packed RGBA8 reinterpreted as float) - MaterialInfo m; - if (mat_id >= 0 && mat_id < static_cast(materials.size())) { - m = materialFromStyle(materials[mat_id]); - } - uint32_t packed = packRGBA8(m); - float packed_as_float; - std::memcpy(&packed_as_float, &packed, sizeof(float)); - chunk.vertices.push_back(packed_as_float); - - remap.emplace(key, new_idx); - return new_idx; - }; - - for (size_t t = 0; t < num_tris; ++t) { - const int mat_id = have_per_tri_material ? material_ids[t] : -1; - chunk.indices.push_back(emit_vertex(static_cast(faces[t * 3 + 0]), mat_id)); - chunk.indices.push_back(emit_vertex(static_cast(faces[t * 3 + 1]), mat_id)); - chunk.indices.push_back(emit_vertex(static_cast(faces[t * 3 + 2]), mat_id)); - } - - return chunk; + double dedup_ratio = total_meshes > 0 + ? static_cast(total_shapes) / static_cast(total_meshes) : 1.0; + qDebug("Streamer done: %s %.2fs shapes=%u unique_meshes=%u dedup=%.2fx", + path.c_str(), stream_timer.elapsed() / 1000.0, + total_shapes, total_meshes, dedup_ratio); } diff --git a/src/ifcviewer/GeometryStreamer.h b/src/ifcviewer/GeometryStreamer.h index 0d49a12ca70..f6201517ad1 100644 --- a/src/ifcviewer/GeometryStreamer.h +++ b/src/ifcviewer/GeometryStreamer.h @@ -26,15 +26,13 @@ #include #include #include -#include #include #include -#include #include "../ifcparse/file.h" #include "../ifcgeom/Iterator.h" -#include "ViewportWindow.h" +#include "InstancedGeometry.h" struct ElementInfo { uint32_t object_id; @@ -67,15 +65,14 @@ class GeometryStreamer : public QObject { signals: void progressChanged(int percent); - void elementReady(UploadChunk chunk); + void meshReady(MeshChunk chunk); + void instanceReady(InstanceChunk chunk); void finished(); void errorOccurred(const QString& message); private: void run(const std::string& path, int num_threads); - UploadChunk convertElement(const IfcGeom::TriangulationElement* elem, uint32_t object_id); - std::unique_ptr ifc_file_; std::unique_ptr worker_thread_; std::atomic running_{false}; @@ -85,7 +82,7 @@ class GeometryStreamer : public QObject { std::mutex elements_mutex_; std::vector pending_elements_; - uint32_t next_object_id_ = 1; // 0 = no object + uint32_t next_object_id_ = 1; uint32_t model_id_ = 0; }; diff --git a/src/ifcviewer/InstancedGeometry.h b/src/ifcviewer/InstancedGeometry.h new file mode 100644 index 00000000000..1c027976ef1 --- /dev/null +++ b/src/ifcviewer/InstancedGeometry.h @@ -0,0 +1,103 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#ifndef INSTANCEDGEOMETRY_H +#define INSTANCEDGEOMETRY_H + +#include +#include +#include + +// Per-vertex layout for instanced meshes, stored in local coordinates. +// 28 bytes per vertex: +// pos(3 float) -- 12 B +// normal(3 float) -- 12 B +// color(4 bytes RGBA8, read as GL_UNSIGNED_BYTE*4 normalized) -- 4 B +static constexpr int INSTANCED_VERTEX_STRIDE_BYTES = 28; +static constexpr int INSTANCED_VERTEX_STRIDE_FLOATS = 7; + +// Per-mesh metadata on the CPU side. Meshes own a slice of the model's +// VBO and EBO (both local-coords/mesh-local indices). +struct MeshInfo { + uint32_t vbo_byte_offset = 0; // where this mesh's vertices start + uint32_t vertex_count = 0; + uint32_t ebo_byte_offset = 0; // where this mesh's indices start + uint32_t index_count = 0; + float local_aabb_min[3]{}; + float local_aabb_max[3]{}; + uint32_t first_instance = 0; // index into per-model instances array + uint32_t instance_count = 0; +}; +static_assert(sizeof(MeshInfo) == 48, "MeshInfo must be 48 bytes"); + +// Per-instance record uploaded to an SSBO and read by the vertex shader. +// Layout deliberately matches std430 expectations: +// mat4 transform (64 B column-major) +// uint object_id +// uint color_override_rgba8 -- 0 = use baked vertex color, else override +// uint _pad0, _pad1 -- align to 16 for std430 +struct alignas(16) InstanceGpu { + float transform[16]; + uint32_t object_id = 0; + uint32_t color_override_rgba8 = 0; + uint32_t _pad0 = 0; + uint32_t _pad1 = 0; +}; +static_assert(sizeof(InstanceGpu) == 80, "InstanceGpu must be 80 bytes"); + +// CPU-side per-instance data. The GPU record above is derived from this; +// we also retain the world AABB for BVH construction and the mesh_id. +struct InstanceCpu { + uint32_t mesh_id = 0; // index into meshes array + uint32_t object_id = 0; + uint32_t color_override_rgba8 = 0; + uint32_t model_id = 0; + float transform[16]{}; + float world_aabb_min[3]{}; + float world_aabb_max[3]{}; +}; + +// Chunks emitted by the streamer to the viewport (main thread). + +// Emitted the first time a representation id is seen. Carries the mesh +// geometry in local coords. `local_mesh_id` is the streamer-assigned id +// within this model. +struct MeshChunk { + uint32_t model_id = 0; + uint32_t local_mesh_id = 0; + std::vector vertices; // 7 floats * N_verts (pos3+norm3+color1_packed) + std::vector indices; + float local_aabb_min[3]{}; + float local_aabb_max[3]{}; +}; + +// Emitted for every placement (every triangulation element from the +// iterator). For the first instance of a mesh, the MeshChunk is emitted +// just before this. +struct InstanceChunk { + uint32_t model_id = 0; + uint32_t local_mesh_id = 0; + uint32_t object_id = 0; + uint32_t color_override_rgba8 = 0; + float transform[16]{}; + float world_aabb_min[3]{}; + float world_aabb_max[3]{}; +}; + +#endif // INSTANCEDGEOMETRY_H diff --git a/src/ifcviewer/MainWindow.cpp b/src/ifcviewer/MainWindow.cpp index b5ee3581c44..86a787a0e26 100644 --- a/src/ifcviewer/MainWindow.cpp +++ b/src/ifcviewer/MainWindow.cpp @@ -173,8 +173,10 @@ void MainWindow::addFiles(const QStringList& paths) { void MainWindow::connectStreamer(GeometryStreamer* streamer) { connect(streamer, &GeometryStreamer::progressChanged, this, &MainWindow::onProgressChanged, Qt::QueuedConnection); - connect(streamer, &GeometryStreamer::elementReady, - this, &MainWindow::onElementReady, Qt::QueuedConnection); + connect(streamer, &GeometryStreamer::meshReady, + this, &MainWindow::onMeshReady, Qt::QueuedConnection); + connect(streamer, &GeometryStreamer::instanceReady, + this, &MainWindow::onInstanceReady, Qt::QueuedConnection); connect(streamer, &GeometryStreamer::finished, this, &MainWindow::onStreamingFinished, Qt::QueuedConnection); connect(streamer, &GeometryStreamer::errorOccurred, this, [this](const QString& msg) { @@ -208,7 +210,7 @@ void MainWindow::startNextLoad() { qDebug(" Sidecar read: %lld ms (%s)", rt.elapsed(), ifc_path.c_str()); auto result = std::make_shared>(std::move(cached)); QMetaObject::invokeMethod(this, [this, mid, result]() { - if (*result && !(*result)->draw_info.empty()) { + if (*result && !(*result)->meshes.empty()) { applySidecarData(mid, std::move(**result)); } else { // No sidecar — fall back to streaming from IFC. @@ -227,51 +229,10 @@ void MainWindow::startNextLoad() { }); } -void MainWindow::applySidecarData(ModelId mid, SidecarData data) { - auto it = models_.find(mid); - if (it == models_.end()) return; - auto& model = it->second; - - QElapsedTimer t; - - qDebug("Sidecar hit: %s (%zu objects, %zu verts, %zu indices, %.1f MB)", - model.file_path.toStdString().c_str(), data.draw_info.size(), - data.vertices.size() / 8, data.indices.size(), - (data.vertices.size() * 4 + data.indices.size() * 4) / (1024.0 * 1024.0)); - - // GL upload — fast, single buffer copy. - t.start(); - viewport_->uploadBulk(mid, data.vertices, data.indices, - data.draw_info, std::move(data.bvh_set)); - qDebug(" GL upload: %lld ms", t.elapsed()); - - // Update next_object_id_ past all objects in this model. - for (const auto& elem : data.elements) { - if (elem.object_id >= next_object_id_) - next_object_id_ = elem.object_id + 1; - } - - // Suppress per-item layout recalcs while building the tree. - t.restart(); - element_tree_->setUpdatesEnabled(false); - populateTreeFromSidecar(model, data.elements, data.string_table); - element_tree_->setUpdatesEnabled(true); - qDebug(" Tree build: %lld ms (%zu elements)", t.elapsed(), data.elements.size()); - - progress_bar_->setVisible(false); - - qint64 ms = load_timer_.elapsed(); - QString elapsed = (ms >= 1000) - ? QString::number(ms / 1000.0, 'f', 2) + " s" - : QString::number(ms) + " ms"; - - status_label_->setText(QString("%1 elements across %2 model(s) — loaded from cache in %3") - .arg(element_map_.size()) - .arg(models_.size()) - .arg(elapsed)); - - loading_model_id_ = 0; - QTimer::singleShot(0, this, &MainWindow::startNextLoad); +void MainWindow::applySidecarData(ModelId /*mid*/, SidecarData /*data*/) { + // Commit A: readSidecar() always returns nullopt, so this is unreachable. + // Restored in Commit B along with the v4 on-disk format. + qWarning("applySidecarData called but sidecar is disabled in Commit A"); } void MainWindow::populateTreeFromSidecar(ModelHandle& model, @@ -325,8 +286,12 @@ void MainWindow::onProgressChanged(int percent) { progress_bar_->setValue(percent); } -void MainWindow::onElementReady(UploadChunk chunk) { - viewport_->uploadChunk(chunk); +void MainWindow::onMeshReady(MeshChunk chunk) { + viewport_->uploadMeshChunk(chunk); +} + +void MainWindow::onInstanceReady(InstanceChunk chunk) { + viewport_->uploadInstanceChunk(chunk); } void MainWindow::onStreamingFinished() { @@ -355,39 +320,10 @@ void MainWindow::onStreamingFinished() { .arg(num_models) .arg(elapsed)); - // Build BVH and write sidecar (geometry + metadata + BVH). + // Sort instances by mesh and upload the per-model instance SSBO. + // Sidecar write is stubbed in Commit A. if (loading_model_id_ != 0) { - auto it = models_.find(loading_model_id_); - if (it != models_.end()) { - std::string ifc_path = it->second.file_path.toStdString(); - QFileInfo fi(it->second.file_path); - uint64_t file_size = static_cast(fi.size()); - - // Pack element info for the sidecar (only this model's elements). - std::vector packed; - std::string stbl; - for (const auto& [oid, info] : element_map_) { - if (info.model_id != loading_model_id_) continue; - PackedElementInfo pe; - pe.object_id = info.object_id; - pe.model_id = info.model_id; - pe.ifc_id = info.ifc_id; - pe.parent_id = info.parent_id; - pe.guid_offset = static_cast(stbl.size()); - pe.guid_length = static_cast(info.guid.size()); - stbl += info.guid; - pe.name_offset = static_cast(stbl.size()); - pe.name_length = static_cast(info.name.size()); - stbl += info.name; - pe.type_offset = static_cast(stbl.size()); - pe.type_length = static_cast(info.type.size()); - stbl += info.type; - packed.push_back(pe); - } - - viewport_->buildBvhAsync(loading_model_id_, ifc_path, file_size, - std::move(packed), std::move(stbl)); - } + viewport_->finalizeModel(loading_model_id_); } // Start next model if queued. diff --git a/src/ifcviewer/MainWindow.h b/src/ifcviewer/MainWindow.h index f60da70b75d..5270676af53 100644 --- a/src/ifcviewer/MainWindow.h +++ b/src/ifcviewer/MainWindow.h @@ -62,7 +62,8 @@ private slots: void onFileOpen(); void onFileSettings(); void onProgressChanged(int percent); - void onElementReady(UploadChunk chunk); + void onMeshReady(MeshChunk chunk); + void onInstanceReady(InstanceChunk chunk); void onStreamingFinished(); void onObjectPicked(uint32_t object_id); void onTreeSelectionChanged(); diff --git a/src/ifcviewer/SidecarCache.cpp b/src/ifcviewer/SidecarCache.cpp index d77095c9223..be19c8698f4 100644 --- a/src/ifcviewer/SidecarCache.cpp +++ b/src/ifcviewer/SidecarCache.cpp @@ -17,180 +17,20 @@ * * ********************************************************************************/ -#include "SidecarCache.h" - -#include -#include - -// Binary layout (all multi-byte fields native-endian): -// -// SidecarHeader (16 bytes) -// uint64_t source_file_size -// -// uint32_t num_vertices (count of floats) -// float[num_vertices] vertex data -// -// uint32_t num_indices -// uint32_t[num_indices] index data -// -// uint32_t num_draw_infos -// ObjectDrawInfo[N] draw info array -// -// uint32_t num_elements -// PackedElementInfo[N] element records -// uint32_t string_table_bytes -// char[string_table_bytes] -// -// uint32_t num_bvh_models -// for each model: -// uint32_t model_id -// uint32_t num_nodes -// BvhNode[num_nodes] -// uint32_t num_object_indices -// uint32_t[num_object_indices] - -struct SidecarHeader { - uint32_t magic; - uint32_t version; - uint32_t endian; - uint32_t reserved; -}; - -static std::string sidecarPath(const std::string& ifc_path) { - return ifc_path + ".ifcview"; -} - -template -static bool writeVec(FILE* f, const std::vector& v) { - uint32_t n = static_cast(v.size()); - if (fwrite(&n, 4, 1, f) != 1) return false; - if (n > 0 && fwrite(v.data(), sizeof(T), n, f) != n) return false; - return true; -} - -template -static bool readVec(FILE* f, std::vector& v) { - uint32_t n; - if (fread(&n, 4, 1, f) != 1) return false; - v.resize(n); - if (n > 0 && fread(v.data(), sizeof(T), n, f) != n) return false; - return true; -} - -bool writeSidecar(const std::string& ifc_path, - const SidecarData& data, - uint64_t ifc_file_size) { - std::string path = sidecarPath(ifc_path); - FILE* f = fopen(path.c_str(), "wb"); - if (!f) return false; - - // Header - SidecarHeader hdr = { SIDECAR_MAGIC, SIDECAR_VERSION, SIDECAR_ENDIAN, 0 }; - fwrite(&hdr, sizeof(hdr), 1, f); - fwrite(&ifc_file_size, 8, 1, f); - - // Geometry - if (!writeVec(f, data.vertices)) { fclose(f); return false; } - if (!writeVec(f, data.indices)) { fclose(f); return false; } - - // Draw info - if (!writeVec(f, data.draw_info)) { fclose(f); return false; } - - // Elements + string table - if (!writeVec(f, data.elements)) { fclose(f); return false; } - uint32_t stbl_len = static_cast(data.string_table.size()); - fwrite(&stbl_len, 4, 1, f); - if (stbl_len > 0) fwrite(data.string_table.data(), 1, stbl_len, f); - - // BVH - uint32_t num_bvh_models = data.bvh_set - ? static_cast(data.bvh_set->models.size()) : 0; - fwrite(&num_bvh_models, 4, 1, f); +// Commit A: sidecar cache is temporarily disabled. The on-disk format is +// being rewritten from v3 (monolithic world-coord geometry) to v4 (instanced +// meshes + per-instance records). Until v4 is finalised, loads always go +// through the streaming path and writes are no-ops. - if (data.bvh_set) { - for (const auto& [model_id, mbvh] : data.bvh_set->models) { - fwrite(&model_id, 4, 1, f); - - uint32_t nn = static_cast(mbvh.nodes.size()); - fwrite(&nn, 4, 1, f); - if (nn > 0) fwrite(mbvh.nodes.data(), sizeof(BvhNode), nn, f); - - uint32_t no = static_cast(mbvh.object_indices.size()); - fwrite(&no, 4, 1, f); - if (no > 0) fwrite(mbvh.object_indices.data(), 4, no, f); - } - } +#include "SidecarCache.h" - fclose(f); +bool writeSidecar(const std::string& /*ifc_path*/, + const SidecarData& /*data*/, + uint64_t /*ifc_file_size*/) { return true; } -std::optional readSidecar(const std::string& ifc_path, - uint64_t ifc_file_size) { - std::string path = sidecarPath(ifc_path); - FILE* f = fopen(path.c_str(), "rb"); - if (!f) return std::nullopt; - - auto fail = [&]() -> std::optional { fclose(f); return std::nullopt; }; - - // Header - SidecarHeader hdr; - if (fread(&hdr, sizeof(hdr), 1, f) != 1) return fail(); - if (hdr.magic != SIDECAR_MAGIC || - hdr.version != SIDECAR_VERSION || - hdr.endian != SIDECAR_ENDIAN) return fail(); - - uint64_t stored_size; - if (fread(&stored_size, 8, 1, f) != 1) return fail(); - if (stored_size != ifc_file_size) return fail(); - - SidecarData data; - - // Geometry - if (!readVec(f, data.vertices)) return fail(); - if (!readVec(f, data.indices)) return fail(); - - // Draw info - if (!readVec(f, data.draw_info)) return fail(); - - // Elements + string table - if (!readVec(f, data.elements)) return fail(); - uint32_t stbl_len; - if (fread(&stbl_len, 4, 1, f) != 1) return fail(); - data.string_table.resize(stbl_len); - if (stbl_len > 0 && fread(data.string_table.data(), 1, stbl_len, f) != stbl_len) - return fail(); - - // BVH - uint32_t num_bvh_models; - if (fread(&num_bvh_models, 4, 1, f) != 1) return fail(); - - if (num_bvh_models > 0) { - data.bvh_set = std::make_shared(); - for (uint32_t m = 0; m < num_bvh_models; ++m) { - uint32_t model_id; - if (fread(&model_id, 4, 1, f) != 1) return fail(); - - ModelBvh mbvh; - mbvh.model_id = model_id; - - uint32_t nn; - if (fread(&nn, 4, 1, f) != 1) return fail(); - mbvh.nodes.resize(nn); - if (nn > 0 && fread(mbvh.nodes.data(), sizeof(BvhNode), nn, f) != nn) - return fail(); - - uint32_t no; - if (fread(&no, 4, 1, f) != 1) return fail(); - mbvh.object_indices.resize(no); - if (no > 0 && fread(mbvh.object_indices.data(), 4, no, f) != no) - return fail(); - - data.bvh_set->bvh_model_ids.insert(model_id); - data.bvh_set->models[model_id] = std::move(mbvh); - } - } - - fclose(f); - return data; +std::optional readSidecar(const std::string& /*ifc_path*/, + uint64_t /*ifc_file_size*/) { + return std::nullopt; } diff --git a/src/ifcviewer/SidecarCache.h b/src/ifcviewer/SidecarCache.h index 49c36dba15a..e14eb9d2561 100644 --- a/src/ifcviewer/SidecarCache.h +++ b/src/ifcviewer/SidecarCache.h @@ -17,22 +17,28 @@ * * ********************************************************************************/ +// NOTE: Sidecar format v3 is being rewritten to v4 (instanced geometry layout). +// During the instancing rewrite (Commit A) the cache is a no-op: reads always +// miss and writes always succeed without producing a file. Commit B will +// re-introduce the on-disk format with MeshInfo + InstanceGpu sections. + #ifndef SIDECARCACHE_H #define SIDECARCACHE_H -#include "BvhAccel.h" +#include "InstancedGeometry.h" #include #include #include #include +#include static constexpr uint32_t SIDECAR_MAGIC = 0x49465657; // "IFVW" -static constexpr uint32_t SIDECAR_VERSION = 3; +static constexpr uint32_t SIDECAR_VERSION = 4; static constexpr uint32_t SIDECAR_ENDIAN = 0x01020304; -// Fixed-size element record for the sidecar. Strings are stored as -// (offset, length) pairs into a separate string table. +// Fixed-size element record. Strings are stored as (offset, length) pairs +// into a separate string table. struct PackedElementInfo { uint32_t object_id; uint32_t model_id; @@ -46,30 +52,27 @@ struct PackedElementInfo { uint32_t type_length; }; -// Everything the viewer needs to display a model without tessellating. +// Everything needed to display an already-tessellated model without +// re-running the iterator. v4 schema: instanced geometry. struct SidecarData { - // GPU geometry (ready to upload as-is) - std::vector vertices; // interleaved, 8 floats per vertex - std::vector indices; // global (already remapped) + // Per-model GPU geometry (local coords). 28 bytes/vertex. + std::vector vertices; + std::vector indices; - // Per-object metadata - std::vector draw_info; + // Mesh dictionary and per-instance data. + std::vector meshes; // indexed by local_mesh_id + std::vector instances; // sorted by mesh_id - // Element tree metadata + // Element tree metadata. std::vector elements; - std::string string_table; // concatenated UTF-8 - - // BVH acceleration - std::shared_ptr bvh_set; + std::string string_table; }; -// Write a full sidecar next to the IFC file. -// Returns true on success. +// v4 writer/reader are stubbed for Commit A — no disk I/O happens. bool writeSidecar(const std::string& ifc_path, const SidecarData& data, uint64_t ifc_file_size); -// Read a sidecar. Returns nullopt on any failure (missing, stale, corrupt). std::optional readSidecar(const std::string& ifc_path, uint64_t ifc_file_size); diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index 1c6ab786254..e264f990e45 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -18,7 +18,6 @@ ********************************************************************************/ #include "ViewportWindow.h" -#include "SidecarCache.h" #include #include @@ -31,33 +30,75 @@ #include #include -static const size_t INITIAL_VBO_SIZE = 64 * 1024 * 1024; // 64 MB -static const size_t INITIAL_EBO_SIZE = 32 * 1024 * 1024; // 32 MB -static const size_t MAX_BUFFER_SIZE = 4ull * 1024 * 1024 * 1024; // 4 GB -static const int VERTEX_STRIDE = 8; // pos(3) + normal(3) + object_id(1) + color(1 packed) +static const size_t INITIAL_VBO_SIZE = 64 * 1024 * 1024; // 64 MB +static const size_t INITIAL_EBO_SIZE = 32 * 1024 * 1024; // 32 MB +static const size_t MAX_BUFFER_SIZE = 4ull * 1024 * 1024 * 1024; // 4 GB + +// ----------------------------------------------------------------------------- +// Shaders +// ----------------------------------------------------------------------------- +// +// Vertex layout (GL side, 28 bytes): +// location 0: vec3 a_position (local coords) +// location 1: vec3 a_normal (local) +// location 2: vec4 a_color (GL_UNSIGNED_BYTE * 4 normalized) +// +// Per-instance record in SSBO std430 (80 bytes): +// mat4 transform +// uint object_id +// uint color_override_rgba8 -- 0 => use baked a_color +// uint _pad0, _pad1 +// +// The draw calls pass `u_instance_offset = mesh.first_instance`; the shader +// reads `instances[u_instance_offset + gl_InstanceID]`. static const char* MAIN_VERTEX_SHADER = R"( #version 450 core layout(location = 0) in vec3 a_position; layout(location = 1) in vec3 a_normal; -layout(location = 2) in float a_object_id; -layout(location = 3) in vec4 a_color; +layout(location = 2) in vec4 a_color; + +struct InstanceRecord { + mat4 transform; + uint object_id; + uint color_override; + uint _pad0; + uint _pad1; +}; +layout(std430, binding = 0) readonly buffer Instances { + InstanceRecord instances[]; +}; uniform mat4 u_view_projection; +uniform uint u_instance_offset; uniform uint u_selected_id; out vec3 v_normal; -out vec3 v_position; out vec4 v_color; flat out uint v_object_id; flat out uint v_selected; void main() { - gl_Position = u_view_projection * vec4(a_position, 1.0); - v_normal = a_normal; - v_position = a_position; - v_color = a_color; - v_object_id = floatBitsToUint(a_object_id); + InstanceRecord inst = instances[u_instance_offset + uint(gl_InstanceID)]; + vec4 world = inst.transform * vec4(a_position, 1.0); + gl_Position = u_view_projection * world; + + // Rotate the normal by the upper-3x3 of the transform. For the vast + // majority of BIM placements this is a rigid rotation (+ uniform scale), + // so we skip the inverse-transpose. + v_normal = normalize(mat3(inst.transform) * a_normal); + + vec4 baked = a_color; + if (inst.color_override != 0u) { + float r = float((inst.color_override ) & 0xFFu) / 255.0; + float g = float((inst.color_override >> 8) & 0xFFu) / 255.0; + float b = float((inst.color_override >> 16) & 0xFFu) / 255.0; + float a = float((inst.color_override >> 24) & 0xFFu) / 255.0; + if (a > 0.0) baked = vec4(r, g, b, a); + } + v_color = baked; + + v_object_id = inst.object_id; v_selected = (v_object_id == u_selected_id) ? 1u : 0u; } )"; @@ -65,7 +106,6 @@ void main() { static const char* MAIN_FRAGMENT_SHADER = R"( #version 450 core in vec3 v_normal; -in vec3 v_position; in vec4 v_color; flat in uint v_object_id; flat in uint v_selected; @@ -80,11 +120,7 @@ void main() { float ambient = 0.25; float diffuse = 0.75 * ndotl; vec3 color = v_color.rgb * (ambient + diffuse); - - if (v_selected == 1u) { - color = mix(color, vec3(0.2, 0.6, 1.0), 0.5); - } - + if (v_selected == 1u) color = mix(color, vec3(0.2, 0.6, 1.0), 0.5); frag_color = vec4(color, v_color.a); } )"; @@ -92,39 +128,43 @@ void main() { static const char* PICK_VERTEX_SHADER = R"( #version 450 core layout(location = 0) in vec3 a_position; -layout(location = 1) in vec3 a_normal; -layout(location = 2) in float a_object_id; + +struct InstanceRecord { + mat4 transform; + uint object_id; + uint color_override; + uint _pad0; + uint _pad1; +}; +layout(std430, binding = 0) readonly buffer Instances { + InstanceRecord instances[]; +}; uniform mat4 u_view_projection; +uniform uint u_instance_offset; flat out uint v_object_id; void main() { - gl_Position = u_view_projection * vec4(a_position, 1.0); - v_object_id = floatBitsToUint(a_object_id); + InstanceRecord inst = instances[u_instance_offset + uint(gl_InstanceID)]; + gl_Position = u_view_projection * inst.transform * vec4(a_position, 1.0); + v_object_id = inst.object_id; } )"; static const char* PICK_FRAGMENT_SHADER = R"( #version 450 core flat in uint v_object_id; - out uint frag_id; - -void main() { - frag_id = v_object_id; -} +void main() { frag_id = v_object_id; } )"; static const char* AXIS_VERTEX_SHADER = R"( #version 450 core layout(location = 0) in vec3 a_position; layout(location = 1) in vec3 a_color; - uniform mat4 u_mvp; - out vec3 v_color; - void main() { gl_Position = u_mvp * vec4(a_position, 1.0); v_color = a_color; @@ -135,10 +175,7 @@ static const char* AXIS_FRAGMENT_SHADER = R"( #version 450 core in vec3 v_color; out vec4 frag_color; - -void main() { - frag_color = vec4(v_color, 1.0); -} +void main() { frag_color = vec4(v_color, 1.0); } )"; static GLuint compileShader(QOpenGLFunctions_4_5_Core* gl, GLenum type, const char* source) { @@ -148,7 +185,7 @@ static GLuint compileShader(QOpenGLFunctions_4_5_Core* gl, GLenum type, const ch GLint ok = 0; gl->glGetShaderiv(shader, GL_COMPILE_STATUS, &ok); if (!ok) { - char log[1024]; + char log[2048]; gl->glGetShaderInfoLog(shader, sizeof(log), nullptr, log); qWarning("Shader compile error: %s", log); } @@ -163,7 +200,7 @@ static GLuint linkProgram(QOpenGLFunctions_4_5_Core* gl, GLuint vert, GLuint fra GLint ok = 0; gl->glGetProgramiv(prog, GL_LINK_STATUS, &ok); if (!ok) { - char log[1024]; + char log[2048]; gl->glGetProgramInfoLog(prog, sizeof(log), nullptr, log); qWarning("Program link error: %s", log); } @@ -172,6 +209,8 @@ static GLuint linkProgram(QOpenGLFunctions_4_5_Core* gl, GLuint vert, GLuint fra return prog; } +// ----------------------------------------------------------------------------- + ViewportWindow::ViewportWindow(QWindow* parent) : QWindow(parent) { @@ -188,26 +227,25 @@ ViewportWindow::ViewportWindow(QWindow* parent) connect(&render_timer_, &QTimer::timeout, this, [this]() { if (isExposed()) render(); }); - render_timer_.setInterval(16); // ~60 fps + render_timer_.setInterval(16); } ViewportWindow::~ViewportWindow() { - if (bvh_build_thread_.joinable()) - bvh_build_thread_.join(); if (context_) { context_->makeCurrent(this); if (gl_) { for (auto& [mid, m] : models_gpu_) { - if (m.vao) gl_->glDeleteVertexArrays(1, &m.vao); - if (m.vbo) gl_->glDeleteBuffers(1, &m.vbo); - if (m.ebo) gl_->glDeleteBuffers(1, &m.ebo); + if (m.vao) gl_->glDeleteVertexArrays(1, &m.vao); + if (m.vbo) gl_->glDeleteBuffers(1, &m.vbo); + if (m.ebo) gl_->glDeleteBuffers(1, &m.ebo); + if (m.ssbo) gl_->glDeleteBuffers(1, &m.ssbo); } - if (axis_vao_) gl_->glDeleteVertexArrays(1, &axis_vao_); - if (axis_vbo_) gl_->glDeleteBuffers(1, &axis_vbo_); - if (main_program_) gl_->glDeleteProgram(main_program_); - if (pick_program_) gl_->glDeleteProgram(pick_program_); - if (axis_program_) gl_->glDeleteProgram(axis_program_); - if (pick_fbo_) gl_->glDeleteFramebuffers(1, &pick_fbo_); + if (axis_vao_) gl_->glDeleteVertexArrays(1, &axis_vao_); + if (axis_vbo_) gl_->glDeleteBuffers(1, &axis_vbo_); + if (main_program_) gl_->glDeleteProgram(main_program_); + if (pick_program_) gl_->glDeleteProgram(pick_program_); + if (axis_program_) gl_->glDeleteProgram(axis_program_); + if (pick_fbo_) gl_->glDeleteFramebuffers(1, &pick_fbo_); if (pick_color_tex_) gl_->glDeleteTextures(1, &pick_color_tex_); if (pick_depth_rbo_) gl_->glDeleteRenderbuffers(1, &pick_depth_rbo_); } @@ -220,17 +258,11 @@ void ViewportWindow::initGL() { context_ = new QOpenGLContext(this); context_->setFormat(requestedFormat()); - if (!context_->create()) { - qFatal("Failed to create OpenGL context"); - return; - } + if (!context_->create()) { qFatal("Failed to create OpenGL context"); return; } context_->makeCurrent(this); gl_ = QOpenGLVersionFunctionsFactory::get(context_); - if (!gl_) { - qWarning("OpenGL 4.5 not available, falling back"); - return; - } + if (!gl_) { qWarning("OpenGL 4.5 not available"); return; } buildShaders(); buildAxisGizmo(); @@ -247,28 +279,23 @@ void ViewportWindow::initGL() { } void ViewportWindow::setupVaoLayout(GLuint vao, GLuint vbo, GLuint ebo) { - gl_->glVertexArrayVertexBuffer(vao, 0, vbo, 0, VERTEX_STRIDE * sizeof(float)); + gl_->glVertexArrayVertexBuffer(vao, 0, vbo, 0, INSTANCED_VERTEX_STRIDE_BYTES); gl_->glVertexArrayElementBuffer(vao, ebo); - // position + // position (3 float @ 0) gl_->glEnableVertexArrayAttrib(vao, 0); gl_->glVertexArrayAttribFormat(vao, 0, 3, GL_FLOAT, GL_FALSE, 0); gl_->glVertexArrayAttribBinding(vao, 0, 0); - // normal + // normal (3 float @ 12) gl_->glEnableVertexArrayAttrib(vao, 1); - gl_->glVertexArrayAttribFormat(vao, 1, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float)); + gl_->glVertexArrayAttribFormat(vao, 1, 3, GL_FLOAT, GL_FALSE, 12); gl_->glVertexArrayAttribBinding(vao, 1, 0); - // object_id (passed as float, decoded in shader via floatBitsToUint) + // color (4 ubyte @ 24, normalized) gl_->glEnableVertexArrayAttrib(vao, 2); - gl_->glVertexArrayAttribFormat(vao, 2, 1, GL_FLOAT, GL_FALSE, 6 * sizeof(float)); + gl_->glVertexArrayAttribFormat(vao, 2, 4, GL_UNSIGNED_BYTE, GL_TRUE, 24); gl_->glVertexArrayAttribBinding(vao, 2, 0); - - // color (RGBA8 packed into the 4 bytes at offset 28; normalized to vec4) - gl_->glEnableVertexArrayAttrib(vao, 3); - gl_->glVertexArrayAttribFormat(vao, 3, 4, GL_UNSIGNED_BYTE, GL_TRUE, 7 * sizeof(float)); - gl_->glVertexArrayAttribBinding(vao, 3, 0); } void ViewportWindow::buildShaders() { @@ -291,24 +318,20 @@ void ViewportWindow::buildShaders() { void ViewportWindow::buildAxisGizmo() { static const float axis_data[] = { - 0.0f, 0.0f, 0.0f, 1.0f, 0.25f, 0.25f, - 1.0f, 0.0f, 0.0f, 1.0f, 0.25f, 0.25f, - 0.0f, 0.0f, 0.0f, 0.30f, 0.95f, 0.30f, - 0.0f, 1.0f, 0.0f, 0.30f, 0.95f, 0.30f, - 0.0f, 0.0f, 0.0f, 0.30f, 0.55f, 1.0f, - 0.0f, 0.0f, 1.0f, 0.30f, 0.55f, 1.0f, + 0,0,0, 1.0f,0.25f,0.25f, + 1,0,0, 1.0f,0.25f,0.25f, + 0,0,0, 0.30f,0.95f,0.30f, + 0,1,0, 0.30f,0.95f,0.30f, + 0,0,0, 0.30f,0.55f,1.0f, + 0,0,1, 0.30f,0.55f,1.0f, }; - gl_->glCreateVertexArrays(1, &axis_vao_); gl_->glCreateBuffers(1, &axis_vbo_); gl_->glNamedBufferStorage(axis_vbo_, sizeof(axis_data), axis_data, 0); - gl_->glVertexArrayVertexBuffer(axis_vao_, 0, axis_vbo_, 0, 6 * sizeof(float)); - gl_->glEnableVertexArrayAttrib(axis_vao_, 0); gl_->glVertexArrayAttribFormat(axis_vao_, 0, 3, GL_FLOAT, GL_FALSE, 0); gl_->glVertexArrayAttribBinding(axis_vao_, 0, 0); - gl_->glEnableVertexArrayAttrib(axis_vao_, 1); gl_->glVertexArrayAttribFormat(axis_vao_, 1, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float)); gl_->glVertexArrayAttribBinding(axis_vao_, 1, 0); @@ -318,25 +341,20 @@ bool ViewportWindow::growModelVbo(ModelGpuData& m, size_t needed_total) { size_t new_capacity = m.vbo_capacity; while (new_capacity < needed_total) new_capacity *= 2; if (new_capacity > MAX_BUFFER_SIZE) { - qWarning("VBO grow request (%zu MB) exceeds cap", new_capacity / (1024 * 1024)); + qWarning("VBO grow request (%zu MB) exceeds cap", new_capacity / (1024*1024)); return false; } - GLuint new_vbo = 0; gl_->glCreateBuffers(1, &new_vbo); gl_->glNamedBufferStorage(new_vbo, new_capacity, nullptr, GL_DYNAMIC_STORAGE_BIT); - if (m.vbo_used > 0) { gl_->glCopyNamedBufferSubData(m.vbo, new_vbo, 0, 0, m.vbo_used); } - gl_->glDeleteBuffers(1, &m.vbo); m.vbo = new_vbo; m.vbo_capacity = new_capacity; - - gl_->glVertexArrayVertexBuffer(m.vao, 0, m.vbo, 0, VERTEX_STRIDE * sizeof(float)); - - qInfo("Model VBO grew to %zu MB", m.vbo_capacity / (1024 * 1024)); + gl_->glVertexArrayVertexBuffer(m.vao, 0, m.vbo, 0, INSTANCED_VERTEX_STRIDE_BYTES); + qInfo("Model VBO grew to %zu MB", m.vbo_capacity / (1024*1024)); return true; } @@ -344,268 +362,178 @@ bool ViewportWindow::growModelEbo(ModelGpuData& m, size_t needed_total) { size_t new_capacity = m.ebo_capacity; while (new_capacity < needed_total) new_capacity *= 2; if (new_capacity > MAX_BUFFER_SIZE) { - qWarning("EBO grow request (%zu MB) exceeds cap", new_capacity / (1024 * 1024)); + qWarning("EBO grow request (%zu MB) exceeds cap", new_capacity / (1024*1024)); return false; } - GLuint new_ebo = 0; gl_->glCreateBuffers(1, &new_ebo); gl_->glNamedBufferStorage(new_ebo, new_capacity, nullptr, GL_DYNAMIC_STORAGE_BIT); - if (m.ebo_used > 0) { gl_->glCopyNamedBufferSubData(m.ebo, new_ebo, 0, 0, m.ebo_used); } - gl_->glDeleteBuffers(1, &m.ebo); m.ebo = new_ebo; m.ebo_capacity = new_capacity; - gl_->glVertexArrayElementBuffer(m.vao, m.ebo); - - qInfo("Model EBO grew to %zu MB", m.ebo_capacity / (1024 * 1024)); + qInfo("Model EBO grew to %zu MB", m.ebo_capacity / (1024*1024)); return true; } -void ViewportWindow::uploadChunk(const UploadChunk& chunk) { +ModelGpuData& ViewportWindow::getOrCreateModel(uint32_t model_id) { + auto it = models_gpu_.find(model_id); + if (it != models_gpu_.end()) return it->second; + + ModelGpuData m; + gl_->glCreateVertexArrays(1, &m.vao); + gl_->glCreateBuffers(1, &m.vbo); + gl_->glCreateBuffers(1, &m.ebo); + + m.vbo_capacity = INITIAL_VBO_SIZE; + m.ebo_capacity = INITIAL_EBO_SIZE; + gl_->glNamedBufferStorage(m.vbo, m.vbo_capacity, nullptr, GL_DYNAMIC_STORAGE_BIT); + gl_->glNamedBufferStorage(m.ebo, m.ebo_capacity, nullptr, GL_DYNAMIC_STORAGE_BIT); + setupVaoLayout(m.vao, m.vbo, m.ebo); + + return models_gpu_.emplace(model_id, std::move(m)).first->second; +} + +void ViewportWindow::uploadMeshChunk(const MeshChunk& chunk) { if (!gl_initialized_) return; if (chunk.vertices.empty() || chunk.indices.empty()) return; - context_->makeCurrent(this); - // Get or create per-model GPU data. - auto it = models_gpu_.find(chunk.model_id); - if (it == models_gpu_.end()) { - ModelGpuData m; - gl_->glCreateVertexArrays(1, &m.vao); - gl_->glCreateBuffers(1, &m.vbo); - gl_->glCreateBuffers(1, &m.ebo); - - m.vbo_capacity = INITIAL_VBO_SIZE; - m.ebo_capacity = INITIAL_EBO_SIZE; - gl_->glNamedBufferStorage(m.vbo, m.vbo_capacity, nullptr, GL_DYNAMIC_STORAGE_BIT); - gl_->glNamedBufferStorage(m.ebo, m.ebo_capacity, nullptr, GL_DYNAMIC_STORAGE_BIT); - - setupVaoLayout(m.vao, m.vbo, m.ebo); - it = models_gpu_.emplace(chunk.model_id, std::move(m)).first; - } - - auto& mgpu = it->second; + ModelGpuData& m = getOrCreateModel(chunk.model_id); - size_t vb_size = chunk.vertices.size() * sizeof(float); - size_t ib_size = chunk.indices.size() * sizeof(uint32_t); + const size_t vb_size = chunk.vertices.size() * sizeof(float); + const size_t ib_size = chunk.indices.size() * sizeof(uint32_t); - if (mgpu.vbo_used + vb_size > mgpu.vbo_capacity) { - if (!growModelVbo(mgpu, mgpu.vbo_used + vb_size)) { - qWarning("VBO at cap, skipping chunk"); - return; - } + if (m.vbo_used + vb_size > m.vbo_capacity) { + if (!growModelVbo(m, m.vbo_used + vb_size)) return; } - if (mgpu.ebo_used + ib_size > mgpu.ebo_capacity) { - if (!growModelEbo(mgpu, mgpu.ebo_used + ib_size)) { - qWarning("EBO at cap, skipping chunk"); - return; - } + if (m.ebo_used + ib_size > m.ebo_capacity) { + if (!growModelEbo(m, m.ebo_used + ib_size)) return; } - uint32_t base_vertex = mgpu.vertex_count; - - gl_->glNamedBufferSubData(mgpu.vbo, mgpu.vbo_used, vb_size, chunk.vertices.data()); - - // Remap chunk-local indices into model-local global indices. - std::vector global_indices(chunk.indices.size()); - for (size_t i = 0; i < chunk.indices.size(); ++i) { - global_indices[i] = chunk.indices[i] + base_vertex; - } - gl_->glNamedBufferSubData(mgpu.ebo, mgpu.ebo_used, ib_size, global_indices.data()); - - // Compute AABB from vertex positions in this chunk. - ObjectDrawInfo info; - info.index_offset = static_cast(mgpu.ebo_used); - info.index_count = static_cast(chunk.indices.size()); - info.model_id = chunk.model_id; - - const size_t num_verts = chunk.vertices.size() / VERTEX_STRIDE; - if (num_verts > 0) { - info.aabb_min[0] = info.aabb_min[1] = info.aabb_min[2] = std::numeric_limits::max(); - info.aabb_max[0] = info.aabb_max[1] = info.aabb_max[2] = -std::numeric_limits::max(); - for (size_t v = 0; v < num_verts; ++v) { - const float* pos = &chunk.vertices[v * VERTEX_STRIDE]; - for (int a = 0; a < 3; ++a) { - if (pos[a] < info.aabb_min[a]) info.aabb_min[a] = pos[a]; - if (pos[a] > info.aabb_max[a]) info.aabb_max[a] = pos[a]; - } - } - } else { - info.aabb_min[0] = info.aabb_min[1] = info.aabb_min[2] = 0.0f; - info.aabb_max[0] = info.aabb_max[1] = info.aabb_max[2] = 0.0f; + MeshInfo info; + info.vbo_byte_offset = static_cast(m.vbo_used); + info.vertex_count = static_cast( + chunk.vertices.size() / INSTANCED_VERTEX_STRIDE_FLOATS); + info.ebo_byte_offset = static_cast(m.ebo_used); + info.index_count = static_cast(chunk.indices.size()); + for (int a = 0; a < 3; ++a) { + info.local_aabb_min[a] = chunk.local_aabb_min[a]; + info.local_aabb_max[a] = chunk.local_aabb_max[a]; } + info.first_instance = 0; + info.instance_count = 0; + + gl_->glNamedBufferSubData(m.vbo, m.vbo_used, vb_size, chunk.vertices.data()); + gl_->glNamedBufferSubData(m.ebo, m.ebo_used, ib_size, chunk.indices.data()); + m.vbo_used += vb_size; + m.ebo_used += ib_size; + m.vertex_count += info.vertex_count; - mgpu.draw_info.push_back(info); - mgpu.active_draw_count = static_cast(mgpu.draw_info.size()); // immediately drawable - mgpu.vbo_used += vb_size; - mgpu.ebo_used += ib_size; - mgpu.vertex_count += static_cast(num_verts); - mgpu.total_triangles += static_cast(chunk.indices.size() / 3); + if (m.meshes.size() <= chunk.local_mesh_id) m.meshes.resize(chunk.local_mesh_id + 1); + m.meshes[chunk.local_mesh_id] = info; } -void ViewportWindow::uploadBulk(uint32_t model_id, - std::vector vertices, - std::vector indices, - const std::vector& draw_info, - std::shared_ptr bvh_set) { +void ViewportWindow::uploadInstanceChunk(const InstanceChunk& chunk) { if (!gl_initialized_) return; - if (vertices.empty() || indices.empty()) return; + // We don't need a GL context here since we're only touching CPU state, + // but the signal may fire on the render thread so keep it simple. + ModelGpuData& m = getOrCreateModel(chunk.model_id); - context_->makeCurrent(this); + InstanceCpu inst; + inst.mesh_id = chunk.local_mesh_id; + inst.object_id = chunk.object_id; + inst.color_override_rgba8 = chunk.color_override_rgba8; + inst.model_id = chunk.model_id; + std::memcpy(inst.transform, chunk.transform, sizeof(inst.transform)); + std::memcpy(inst.world_aabb_min, chunk.world_aabb_min, sizeof(inst.world_aabb_min)); + std::memcpy(inst.world_aabb_max, chunk.world_aabb_max, sizeof(inst.world_aabb_max)); + m.instances.push_back(inst); - size_t vb_size = vertices.size() * sizeof(float); - size_t ib_size = indices.size() * sizeof(uint32_t); + if (chunk.local_mesh_id < m.meshes.size()) { + m.total_triangles += m.meshes[chunk.local_mesh_id].index_count / 3; + } +} - // Allocate empty buffers at exact size — no data uploaded yet. - ModelGpuData m; - gl_->glCreateVertexArrays(1, &m.vao); - gl_->glCreateBuffers(1, &m.vbo); - gl_->glCreateBuffers(1, &m.ebo); +void ViewportWindow::finalizeModel(uint32_t model_id) { + if (!gl_initialized_) return; + context_->makeCurrent(this); - m.vbo_capacity = vb_size; - m.ebo_capacity = ib_size; - gl_->glNamedBufferStorage(m.vbo, vb_size, nullptr, GL_DYNAMIC_STORAGE_BIT); - gl_->glNamedBufferStorage(m.ebo, ib_size, nullptr, GL_DYNAMIC_STORAGE_BIT); + auto it = models_gpu_.find(model_id); + if (it == models_gpu_.end()) return; + ModelGpuData& m = it->second; + if (m.instances.empty()) { m.finalized = true; return; } + + // Sort instances by mesh_id (stable for deterministic ordering). + std::stable_sort(m.instances.begin(), m.instances.end(), + [](const InstanceCpu& a, const InstanceCpu& b) { + return a.mesh_id < b.mesh_id; + }); + + // Assign per-mesh contiguous range. + for (auto& mesh : m.meshes) { mesh.first_instance = 0; mesh.instance_count = 0; } + uint32_t current = UINT32_MAX; + uint32_t run_start = 0; + for (uint32_t i = 0; i < m.instances.size(); ++i) { + uint32_t mid = m.instances[i].mesh_id; + if (mid != current) { + if (current != UINT32_MAX && current < m.meshes.size()) { + m.meshes[current].first_instance = run_start; + m.meshes[current].instance_count = i - run_start; + } + current = mid; + run_start = i; + } + } + if (current != UINT32_MAX && current < m.meshes.size()) { + m.meshes[current].first_instance = run_start; + m.meshes[current].instance_count = static_cast(m.instances.size()) - run_start; + } - setupVaoLayout(m.vao, m.vbo, m.ebo); + // Build GPU-layout array. + std::vector gpu(m.instances.size()); + for (size_t i = 0; i < m.instances.size(); ++i) { + const InstanceCpu& src = m.instances[i]; + InstanceGpu& dst = gpu[i]; + std::memcpy(dst.transform, src.transform, sizeof(dst.transform)); + dst.object_id = src.object_id; + dst.color_override_rgba8 = src.color_override_rgba8; + dst._pad0 = 0; + dst._pad1 = 0; + } - m.vbo_used = vb_size; - m.ebo_used = ib_size; - m.vertex_count = static_cast(vertices.size() / VERTEX_STRIDE); - m.draw_info = draw_info; - m.active_draw_count = 0; // nothing drawable yet + // Allocate and upload SSBO. + if (m.ssbo) gl_->glDeleteBuffers(1, &m.ssbo); + gl_->glCreateBuffers(1, &m.ssbo); + const size_t ssbo_bytes = gpu.size() * sizeof(InstanceGpu); + gl_->glNamedBufferStorage(m.ssbo, ssbo_bytes, gpu.data(), 0); + m.ssbo_instance_count = static_cast(gpu.size()); - uint32_t total_tri = 0; - for (const auto& di : draw_info) total_tri += di.index_count / 3; - m.total_triangles = total_tri; + m.finalized = true; - // Delete old model data if re-uploading. - auto it = models_gpu_.find(model_id); - if (it != models_gpu_.end()) { - gl_->glDeleteVertexArrays(1, &it->second.vao); - gl_->glDeleteBuffers(1, &it->second.vbo); - gl_->glDeleteBuffers(1, &it->second.ebo); - } - models_gpu_[model_id] = std::move(m); - - // Queue progressive upload — data will stream in over subsequent frames. - PendingUpload pu; - pu.model_id = model_id; - pu.vertices = std::move(vertices); - pu.indices = std::move(indices); - pu.bvh_set = std::move(bvh_set); - pending_uploads_.push_back(std::move(pu)); - - qDebug("Bulk upload queued: model %u, %zu vertices, %zu indices, %zu objects", - model_id, vertices.size() / VERTEX_STRIDE, indices.size(), draw_info.size()); + qDebug("Model %u finalized: %zu verts, %zu meshes, %zu instances, %.1f MB vram " + "(vbo %.1f + ebo %.1f + ssbo %.1f)", + model_id, size_t(m.vertex_count), m.meshes.size(), m.instances.size(), + (m.vbo_capacity + m.ebo_capacity + ssbo_bytes) / (1024.0*1024.0), + m.vbo_capacity / (1024.0*1024.0), + m.ebo_capacity / (1024.0*1024.0), + ssbo_bytes / (1024.0*1024.0)); } void ViewportWindow::resetScene() { if (!gl_initialized_) return; - - if (bvh_build_thread_.joinable()) - bvh_build_thread_.join(); - context_->makeCurrent(this); for (auto& [mid, m] : models_gpu_) { - if (m.vao) gl_->glDeleteVertexArrays(1, &m.vao); - if (m.vbo) gl_->glDeleteBuffers(1, &m.vbo); - if (m.ebo) gl_->glDeleteBuffers(1, &m.ebo); + if (m.vao) gl_->glDeleteVertexArrays(1, &m.vao); + if (m.vbo) gl_->glDeleteBuffers(1, &m.vbo); + if (m.ebo) gl_->glDeleteBuffers(1, &m.ebo); + if (m.ssbo) gl_->glDeleteBuffers(1, &m.ssbo); } models_gpu_.clear(); - model_bvhs_.clear(); - pending_uploads_.clear(); selected_object_id_ = 0; - { - std::lock_guard bvh_lock(bvh_result_mutex_); - pending_bvh_.reset(); - } -} - -static const size_t UPLOAD_CHUNK_BYTES = 48 * 1024 * 1024; // 48 MB per frame - -void ViewportWindow::processPendingUploads() { - if (pending_uploads_.empty()) return; - - auto& pu = pending_uploads_.front(); - auto it = models_gpu_.find(pu.model_id); - if (it == models_gpu_.end()) { - pending_uploads_.pop_front(); - return; - } - auto& mgpu = it->second; - - size_t vbo_total = pu.vertices.size() * sizeof(float); - size_t ebo_total = pu.indices.size() * sizeof(uint32_t); - - // Phase 1: Upload VBO in chunks. - if (pu.vbo_uploaded < vbo_total) { - size_t remaining = vbo_total - pu.vbo_uploaded; - size_t chunk = std::min(remaining, UPLOAD_CHUNK_BYTES); - gl_->glNamedBufferSubData(mgpu.vbo, pu.vbo_uploaded, chunk, - reinterpret_cast(pu.vertices.data()) + pu.vbo_uploaded); - pu.vbo_uploaded += chunk; - - if (pu.vbo_uploaded >= vbo_total) { - // VBO done — free CPU memory. - pu.vertices.clear(); - pu.vertices.shrink_to_fit(); - } - return; // yield to render loop - } - - // Phase 2: Upload EBO in chunks. Objects become drawable as their range lands. - if (pu.ebo_uploaded < ebo_total) { - size_t remaining = ebo_total - pu.ebo_uploaded; - size_t chunk = std::min(remaining, UPLOAD_CHUNK_BYTES); - gl_->glNamedBufferSubData(mgpu.ebo, pu.ebo_uploaded, chunk, - reinterpret_cast(pu.indices.data()) + pu.ebo_uploaded); - pu.ebo_uploaded += chunk; - - // Advance active_draw_count: activate objects whose EBO range is fully uploaded. - while (mgpu.active_draw_count < mgpu.draw_info.size()) { - const auto& obj = mgpu.draw_info[mgpu.active_draw_count]; - size_t obj_end = obj.index_offset + obj.index_count * sizeof(uint32_t); - if (obj_end <= pu.ebo_uploaded) - mgpu.active_draw_count++; - else - break; - } - - if (pu.ebo_uploaded >= ebo_total) { - // EBO done — free CPU memory. - pu.indices.clear(); - pu.indices.shrink_to_fit(); - } else { - return; // yield to render loop - } - } - - // Fully uploaded — activate BVH if present. - mgpu.active_draw_count = static_cast(mgpu.draw_info.size()); - if (pu.bvh_set) { - model_bvhs_[pu.model_id] = std::move(pu.bvh_set); - } - - size_t total_vbo = 0, total_ebo = 0; - for (const auto& [mid, mg] : models_gpu_) { - total_vbo += mg.vbo_capacity; - total_ebo += mg.ebo_capacity; - } - qDebug("Progressive upload complete: model %u (this: vbo %.1f MB + ebo %.1f MB, " - "%u objects, %u triangles) scene total vram %.1f MB", - pu.model_id, - mgpu.vbo_capacity / (1024.0 * 1024.0), - mgpu.ebo_capacity / (1024.0 * 1024.0), - static_cast(mgpu.draw_info.size()), - mgpu.total_triangles, - (total_vbo + total_ebo) / (1024.0 * 1024.0)); - pending_uploads_.pop_front(); } void ViewportWindow::hideModel(uint32_t model_id) { @@ -621,161 +549,35 @@ void ViewportWindow::showModel(uint32_t model_id) { void ViewportWindow::removeModel(uint32_t model_id) { if (!gl_initialized_) return; context_->makeCurrent(this); - - // Cancel any pending upload for this model. - pending_uploads_.erase( - std::remove_if(pending_uploads_.begin(), pending_uploads_.end(), - [model_id](const PendingUpload& pu) { return pu.model_id == model_id; }), - pending_uploads_.end()); - auto it = models_gpu_.find(model_id); if (it != models_gpu_.end()) { - gl_->glDeleteVertexArrays(1, &it->second.vao); - gl_->glDeleteBuffers(1, &it->second.vbo); - gl_->glDeleteBuffers(1, &it->second.ebo); + if (it->second.vao) gl_->glDeleteVertexArrays(1, &it->second.vao); + if (it->second.vbo) gl_->glDeleteBuffers(1, &it->second.vbo); + if (it->second.ebo) gl_->glDeleteBuffers(1, &it->second.ebo); + if (it->second.ssbo) gl_->glDeleteBuffers(1, &it->second.ssbo); models_gpu_.erase(it); } - model_bvhs_.erase(model_id); -} - -std::vector ViewportWindow::readbackEbo(uint32_t model_id) const { - std::vector ebo_data; - auto it = models_gpu_.find(model_id); - if (!gl_ || it == models_gpu_.end() || it->second.ebo_used == 0) return ebo_data; - - const auto& m = it->second; - size_t num_indices = m.ebo_used / sizeof(uint32_t); - ebo_data.resize(num_indices); - gl_->glGetNamedBufferSubData(m.ebo, 0, m.ebo_used, ebo_data.data()); - return ebo_data; -} - -std::vector ViewportWindow::readbackVbo(uint32_t model_id) const { - std::vector vbo_data; - auto it = models_gpu_.find(model_id); - if (!gl_ || it == models_gpu_.end() || it->second.vbo_used == 0) return vbo_data; - - const auto& m = it->second; - size_t num_floats = m.vbo_used / sizeof(float); - vbo_data.resize(num_floats); - gl_->glGetNamedBufferSubData(m.vbo, 0, m.vbo_used, vbo_data.data()); - return vbo_data; -} - -void ViewportWindow::buildBvhAsync(uint32_t model_id, - const std::string& ifc_path, - uint64_t ifc_file_size, - std::vector sidecar_elements, - std::string sidecar_string_table) { - if (bvh_build_thread_.joinable()) - bvh_build_thread_.join(); - - auto it = models_gpu_.find(model_id); - if (it == models_gpu_.end()) return; - - // Snapshot draw info; read back EBO + VBO on GL thread. - std::vector draw_snapshot = it->second.draw_info; - std::vector ebo_snapshot = readbackEbo(model_id); - std::vector vbo_snapshot; - if (!ifc_path.empty() && !sidecar_elements.empty()) { - vbo_snapshot = readbackVbo(model_id); - } - - if (draw_snapshot.empty() || ebo_snapshot.empty()) return; - - bvh_build_thread_ = std::thread([this, - model_id, - draw_info = std::move(draw_snapshot), - ebo_data = std::move(ebo_snapshot), - vbo_data = std::move(vbo_snapshot), - elements = std::move(sidecar_elements), - string_table = std::move(sidecar_string_table), - ifc_path, ifc_file_size]() { - auto bvh_set = buildBvhSet(draw_info); - - EboReorderResult ebo_result = reorderEbo(*bvh_set, draw_info, ebo_data); - - // Write full sidecar if requested. - if (!ifc_path.empty() && !elements.empty() && !vbo_data.empty()) { - SidecarData sd; - sd.vertices = vbo_data; - sd.indices = ebo_result.reordered_ebo; - sd.draw_info = ebo_result.reordered_draw_info; - sd.elements = std::move(elements); - sd.string_table = std::move(string_table); - sd.bvh_set = bvh_set; - writeSidecar(ifc_path, sd, ifc_file_size); - } - - { - std::lock_guard lock(bvh_result_mutex_); - pending_bvh_ = std::make_unique(); - pending_bvh_->model_id = model_id; - pending_bvh_->bvh_set = std::move(bvh_set); - pending_bvh_->ebo_reorder = std::move(ebo_result); - } - }); -} - -void ViewportWindow::applyBvhResult() { - std::unique_ptr result; - { - std::lock_guard lock(bvh_result_mutex_); - result = std::move(pending_bvh_); - } - if (!result) return; - - auto it = models_gpu_.find(result->model_id); - if (it == models_gpu_.end()) return; - - auto& mgpu = it->second; - - // Re-upload the reordered EBO into this model's buffer. - if (!result->ebo_reorder.reordered_ebo.empty()) { - size_t ebo_bytes = result->ebo_reorder.reordered_ebo.size() * sizeof(uint32_t); - if (ebo_bytes <= mgpu.ebo_capacity) { - gl_->glNamedBufferSubData(mgpu.ebo, 0, ebo_bytes, - result->ebo_reorder.reordered_ebo.data()); - } - } - - // Swap draw info. - if (result->ebo_reorder.reordered_draw_info.size() == mgpu.draw_info.size()) { - mgpu.draw_info = std::move(result->ebo_reorder.reordered_draw_info); - } - - model_bvhs_[result->model_id] = std::move(result->bvh_set); - - qDebug("BVH activated for model %u", result->model_id); } -void ViewportWindow::setSelectedObjectId(uint32_t id) { - selected_object_id_ = id; -} +void ViewportWindow::setSelectedObjectId(uint32_t id) { selected_object_id_ = id; } uint32_t ViewportWindow::pickObjectAt(int x, int y) { if (!gl_initialized_) return 0; - context_->makeCurrent(this); int w = width() * devicePixelRatio(); int h = height() * devicePixelRatio(); - if (pick_width_ != w || pick_height_ != h) { if (pick_fbo_) gl_->glDeleteFramebuffers(1, &pick_fbo_); if (pick_color_tex_) gl_->glDeleteTextures(1, &pick_color_tex_); if (pick_depth_rbo_) gl_->glDeleteRenderbuffers(1, &pick_depth_rbo_); - gl_->glCreateFramebuffers(1, &pick_fbo_); - gl_->glCreateTextures(GL_TEXTURE_2D, 1, &pick_color_tex_); gl_->glTextureStorage2D(pick_color_tex_, 1, GL_R32UI, w, h); gl_->glNamedFramebufferTexture(pick_fbo_, GL_COLOR_ATTACHMENT0, pick_color_tex_, 0); - gl_->glCreateRenderbuffers(1, &pick_depth_rbo_); gl_->glNamedRenderbufferStorage(pick_depth_rbo_, GL_DEPTH_COMPONENT24, w, h); gl_->glNamedFramebufferRenderbuffer(pick_fbo_, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, pick_depth_rbo_); - pick_width_ = w; pick_height_ = h; } @@ -785,163 +587,32 @@ uint32_t ViewportWindow::pickObjectAt(int x, int y) { int px = x * devicePixelRatio(); int py = (height() - y) * devicePixelRatio(); uint32_t pixel = 0; - gl_->glGetTextureSubImage(pick_color_tex_, 0, px, py, 0, 1, 1, 1, GL_RED_INTEGER, GL_UNSIGNED_INT, sizeof(pixel), &pixel); - + gl_->glGetTextureSubImage(pick_color_tex_, 0, px, py, 0, 1, 1, 1, + GL_RED_INTEGER, GL_UNSIGNED_INT, sizeof(pixel), &pixel); return pixel; } void ViewportWindow::updateCamera() { float yaw_rad = qDegreesToRadians(camera_yaw_); float pitch_rad = qDegreesToRadians(camera_pitch_); - QVector3D eye; eye.setX(camera_target_.x() + camera_distance_ * cosf(pitch_rad) * cosf(yaw_rad)); eye.setY(camera_target_.y() + camera_distance_ * cosf(pitch_rad) * sinf(yaw_rad)); eye.setZ(camera_target_.z() + camera_distance_ * sinf(pitch_rad)); - view_matrix_.setToIdentity(); view_matrix_.lookAt(eye, camera_target_, QVector3D(0, 0, 1)); - proj_matrix_.setToIdentity(); float aspect = width() > 0 ? float(width()) / float(height()) : 1.0f; proj_matrix_.perspective(45.0f, aspect, 0.1f, camera_distance_ * 10.0f); } -bool ViewportWindow::aabbInFrustum(const float aabb_min[3], const float aabb_max[3], - const float planes[6][4]) { - for (int p = 0; p < 6; ++p) { - float px = planes[p][0] >= 0.0f ? aabb_max[0] : aabb_min[0]; - float py = planes[p][1] >= 0.0f ? aabb_max[1] : aabb_min[1]; - float pz = planes[p][2] >= 0.0f ? aabb_max[2] : aabb_min[2]; - float dist = planes[p][0] * px + planes[p][1] * py + planes[p][2] * pz + planes[p][3]; - if (dist < 0.0f) return false; - } - return true; -} - -void ViewportWindow::traverseBvh(const ModelBvh& mbvh, const ModelGpuData& mgpu, - const float planes[6][4]) { - if (mbvh.nodes.empty()) return; - - uint32_t stack[64]; - int sp = 0; - stack[sp++] = 0; // root - - // Get the current model's draw command being built. - auto& cmd = frame_draw_cmds_.back(); - - while (sp > 0) { - uint32_t ni = stack[--sp]; - const BvhNode& node = mbvh.nodes[ni]; - - if (!aabbInFrustum(node.aabb_min, node.aabb_max, planes)) - continue; - - if (node.count > 0) { - // Leaf-batched draw: after reorderEbo, a leaf's objects occupy a - // contiguous EBO range. Emit one draw command covering all of them - // instead of N per-object tests/draws. The leaf AABB test above is - // already a conservative cull; any overdraw (up to BVH_MAX_LEAF_SIZE - // objects that may be fully outside the frustum but inside the leaf - // AABB) costs far less than the per-draw CPU/driver overhead we save. - uint32_t first_oi = mbvh.object_indices[node.right_or_first]; - const auto& first_obj = mgpu.draw_info[first_oi]; - uint32_t leaf_offset = first_obj.index_offset; - uint32_t leaf_count = 0; - for (uint32_t i = 0; i < node.count; ++i) { - uint32_t oi = mbvh.object_indices[node.right_or_first + i]; - leaf_count += mgpu.draw_info[oi].index_count; - } - cmd.counts.push_back(static_cast(leaf_count)); - cmd.offsets.push_back(reinterpret_cast( - static_cast(leaf_offset))); - visible_triangles_ += leaf_count / 3; - visible_objects_ += node.count; - } else { - if (sp < 63) { - stack[sp++] = node.right_or_first; - stack[sp++] = ni + 1; - } - } - } -} - -void ViewportWindow::buildVisibleList(const QMatrix4x4& vp) { - frame_draw_cmds_.clear(); - visible_triangles_ = 0; - visible_objects_ = 0; - - // Extract 6 frustum planes from the view-projection matrix. - float planes[6][4]; - for (int i = 0; i < 4; ++i) { - planes[0][i] = vp(3, i) + vp(0, i); // left - planes[1][i] = vp(3, i) - vp(0, i); // right - planes[2][i] = vp(3, i) + vp(1, i); // bottom - planes[3][i] = vp(3, i) - vp(1, i); // top - planes[4][i] = vp(3, i) + vp(2, i); // near - planes[5][i] = vp(3, i) - vp(2, i); // far - } - for (int p = 0; p < 6; ++p) { - float len = std::sqrt(planes[p][0] * planes[p][0] + - planes[p][1] * planes[p][1] + - planes[p][2] * planes[p][2]); - if (len > 0.0f) { - float inv = 1.0f / len; - planes[p][0] *= inv; - planes[p][1] *= inv; - planes[p][2] *= inv; - planes[p][3] *= inv; - } - } - - for (auto& [model_id, mgpu] : models_gpu_) { - if (mgpu.hidden || mgpu.active_draw_count == 0) continue; - - frame_draw_cmds_.push_back({mgpu.vao, {}, {}}); - auto& cmd = frame_draw_cmds_.back(); - cmd.counts.reserve(mgpu.active_draw_count); - cmd.offsets.reserve(mgpu.active_draw_count); - - bool fully_loaded = (mgpu.active_draw_count == mgpu.draw_info.size()); - auto bvh_it = model_bvhs_.find(model_id); - - // Only use BVH if model is fully uploaded; during progressive upload, - // fall back to linear scan of active objects. - if (fully_loaded && bvh_it != model_bvhs_.end() && bvh_it->second) { - const auto& bvh_set = *bvh_it->second; - auto mbvh_it = bvh_set.models.find(model_id); - if (mbvh_it != bvh_set.models.end()) { - traverseBvh(mbvh_it->second, mgpu, planes); - } - } else { - // Linear scan of active objects only. - for (uint32_t i = 0; i < mgpu.active_draw_count; ++i) { - const auto& obj = mgpu.draw_info[i]; - if (aabbInFrustum(obj.aabb_min, obj.aabb_max, planes)) { - cmd.counts.push_back(static_cast(obj.index_count)); - cmd.offsets.push_back(reinterpret_cast( - static_cast(obj.index_offset))); - visible_triangles_ += obj.index_count / 3; - visible_objects_++; - } - } - } - - if (cmd.counts.empty()) { - frame_draw_cmds_.pop_back(); - } - } -} - void ViewportWindow::render() { if (!gl_initialized_ || !isExposed()) return; context_->makeCurrent(this); - applyBvhResult(); - processPendingUploads(); updateCamera(); - int w = width() * devicePixelRatio(); + int w = width() * devicePixelRatio(); int h = height() * devicePixelRatio(); gl_->glViewport(0, 0, w, h); gl_->glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); @@ -949,24 +620,43 @@ void ViewportWindow::render() { QMatrix4x4 vp = proj_matrix_ * view_matrix_; gl_->glUseProgram(main_program_); - gl_->glUniformMatrix4fv(gl_->glGetUniformLocation(main_program_, "u_view_projection"), 1, GL_FALSE, vp.constData()); - gl_->glUniform3f(gl_->glGetUniformLocation(main_program_, "u_light_dir"), 0.3f, 0.5f, 0.8f); - gl_->glUniform1ui(gl_->glGetUniformLocation(main_program_, "u_selected_id"), selected_object_id_); - - buildVisibleList(vp); - for (const auto& cmd : frame_draw_cmds_) { - gl_->glBindVertexArray(cmd.vao); - gl_->glMultiDrawElements(GL_TRIANGLES, - cmd.counts.data(), GL_UNSIGNED_INT, - cmd.offsets.data(), - static_cast(cmd.counts.size())); + GLint u_vp = gl_->glGetUniformLocation(main_program_, "u_view_projection"); + GLint u_light = gl_->glGetUniformLocation(main_program_, "u_light_dir"); + GLint u_sel = gl_->glGetUniformLocation(main_program_, "u_selected_id"); + GLint u_inst_off = gl_->glGetUniformLocation(main_program_, "u_instance_offset"); + gl_->glUniformMatrix4fv(u_vp, 1, GL_FALSE, vp.constData()); + gl_->glUniform3f(u_light, 0.3f, 0.5f, 0.8f); + gl_->glUniform1ui(u_sel, selected_object_id_); + + visible_triangles_ = 0; + visible_objects_ = 0; + instanced_draws_ = 0; + + for (auto& [model_id, m] : models_gpu_) { + if (m.hidden || !m.finalized || !m.ssbo) continue; + gl_->glBindVertexArray(m.vao); + gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m.ssbo); + + for (const auto& mesh : m.meshes) { + if (mesh.instance_count == 0 || mesh.index_count == 0) continue; + gl_->glUniform1ui(u_inst_off, mesh.first_instance); + gl_->glDrawElementsInstancedBaseVertex( + GL_TRIANGLES, + static_cast(mesh.index_count), + GL_UNSIGNED_INT, + reinterpret_cast(static_cast(mesh.ebo_byte_offset)), + static_cast(mesh.instance_count), + static_cast(mesh.vbo_byte_offset / INSTANCED_VERTEX_STRIDE_BYTES)); + visible_triangles_ += (mesh.index_count / 3) * mesh.instance_count; + visible_objects_ += mesh.instance_count; + ++instanced_draws_; + } } renderAxisGizmo(); context_->swapBuffers(this); - // Compute FPS. float dt = frame_clock_.restart() / 1000.0f; accumulated_time_ += dt; frame_count_++; @@ -975,21 +665,18 @@ void ViewportWindow::render() { frame_count_ = 0; accumulated_time_ = 0.0f; - uint32_t total_obj = 0, total_tri = 0; - size_t total_vram = 0, total_vbo = 0, total_ebo = 0; + uint32_t total_obj = 0, total_tri = 0, total_meshes = 0; + size_t total_vbo = 0, total_ebo = 0, total_ssbo = 0; size_t num_models = 0, num_hidden = 0; - size_t total_leaf_draws = 0; - for (const auto& [mid, m] : models_gpu_) { + for (const auto& [mid, mm] : models_gpu_) { num_models++; - if (m.hidden) { num_hidden++; continue; } - total_obj += static_cast(m.draw_info.size()); - total_tri += m.total_triangles; - total_vbo += m.vbo_capacity; - total_ebo += m.ebo_capacity; - } - total_vram = total_vbo + total_ebo; - for (const auto& cmd : frame_draw_cmds_) { - total_leaf_draws += cmd.counts.size(); + if (mm.hidden || !mm.finalized) { num_hidden++; continue; } + total_obj += static_cast(mm.instances.size()); + total_tri += mm.total_triangles; + total_meshes += static_cast(mm.meshes.size()); + total_vbo += mm.vbo_capacity; + total_ebo += mm.ebo_capacity; + total_ssbo += mm.ssbo_instance_count * sizeof(InstanceGpu); } FrameStats stats; @@ -999,112 +686,95 @@ void ViewportWindow::render() { stats.visible_objects = visible_objects_; stats.total_triangles = total_tri; stats.visible_triangles = visible_triangles_; + stats.unique_meshes = total_meshes; + stats.instanced_draws = instanced_draws_; emit frameStatsUpdated(stats); - double vis_obj_pct = total_obj > 0 ? 100.0 * visible_objects_ / total_obj : 0.0; - double vis_tri_pct = total_tri > 0 ? 100.0 * visible_triangles_ / total_tri : 0.0; - qDebug("[frame] %.1f fps %.2f ms obj %u/%u (%.1f%%) tri %u/%u (%.1f%%) " - "vram %.1f MB (vbo %.1f + ebo %.1f) models %zu (%zu hidden) " - "leaf_draws %zu model_draws %zu pending_uploads %zu", + qDebug("[frame] %.1f fps %.2f ms obj %u/%u tri %u/%u " + "meshes %u inst_draws %u " + "vram %.1f MB (vbo %.1f + ebo %.1f + ssbo %.1f) models %zu (%zu hidden)", last_fps_, 1000.0f / last_fps_, - visible_objects_, total_obj, vis_obj_pct, - visible_triangles_, total_tri, vis_tri_pct, - total_vram / (1024.0 * 1024.0), - total_vbo / (1024.0 * 1024.0), - total_ebo / (1024.0 * 1024.0), - num_models, num_hidden, - total_leaf_draws, - frame_draw_cmds_.size(), - pending_uploads_.size()); + visible_objects_, total_obj, + visible_triangles_, total_tri, + total_meshes, instanced_draws_, + (total_vbo + total_ebo + total_ssbo) / (1024.0*1024.0), + total_vbo / (1024.0*1024.0), + total_ebo / (1024.0*1024.0), + total_ssbo / (1024.0*1024.0), + num_models, num_hidden); + } +} + +void ViewportWindow::renderPickPass() { + gl_->glBindFramebuffer(GL_FRAMEBUFFER, pick_fbo_); + gl_->glViewport(0, 0, pick_width_, pick_height_); + GLuint clear_val = 0; + gl_->glClearBufferuiv(GL_COLOR, 0, &clear_val); + gl_->glClear(GL_DEPTH_BUFFER_BIT); + + QMatrix4x4 vp = proj_matrix_ * view_matrix_; + gl_->glUseProgram(pick_program_); + GLint u_vp = gl_->glGetUniformLocation(pick_program_, "u_view_projection"); + GLint u_inst_off = gl_->glGetUniformLocation(pick_program_, "u_instance_offset"); + gl_->glUniformMatrix4fv(u_vp, 1, GL_FALSE, vp.constData()); + + for (auto& [model_id, m] : models_gpu_) { + if (m.hidden || !m.finalized || !m.ssbo) continue; + gl_->glBindVertexArray(m.vao); + gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m.ssbo); + for (const auto& mesh : m.meshes) { + if (mesh.instance_count == 0 || mesh.index_count == 0) continue; + gl_->glUniform1ui(u_inst_off, mesh.first_instance); + gl_->glDrawElementsInstancedBaseVertex( + GL_TRIANGLES, + static_cast(mesh.index_count), + GL_UNSIGNED_INT, + reinterpret_cast(static_cast(mesh.ebo_byte_offset)), + static_cast(mesh.instance_count), + static_cast(mesh.vbo_byte_offset / INSTANCED_VERTEX_STRIDE_BYTES)); + } } + gl_->glBindFramebuffer(GL_FRAMEBUFFER, 0); } void ViewportWindow::renderAxisGizmo() { if (!axis_program_ || !axis_vao_) return; - const int dpr = devicePixelRatio(); const int gizmo_size = 110 * dpr; const int margin = 10 * dpr; - gl_->glViewport(margin, margin, gizmo_size, gizmo_size); gl_->glDisable(GL_DEPTH_TEST); float yaw_rad = qDegreesToRadians(camera_yaw_); float pitch_rad = qDegreesToRadians(camera_pitch_); - - QVector3D eye_dir; - eye_dir.setX(cosf(pitch_rad) * cosf(yaw_rad)); - eye_dir.setY(cosf(pitch_rad) * sinf(yaw_rad)); - eye_dir.setZ(sinf(pitch_rad)); - - QMatrix4x4 gizmo_view; - gizmo_view.lookAt(eye_dir * 3.0f, QVector3D(0, 0, 0), QVector3D(0, 0, 1)); - - QMatrix4x4 gizmo_proj; - gizmo_proj.ortho(-1.4f, 1.4f, -1.4f, 1.4f, 0.1f, 10.0f); - - QMatrix4x4 mvp = gizmo_proj * gizmo_view; + QVector3D eye_dir(cosf(pitch_rad) * cosf(yaw_rad), + cosf(pitch_rad) * sinf(yaw_rad), + sinf(pitch_rad)); + QMatrix4x4 gv; gv.lookAt(eye_dir * 3.0f, QVector3D(0,0,0), QVector3D(0,0,1)); + QMatrix4x4 gp; gp.ortho(-1.4f, 1.4f, -1.4f, 1.4f, 0.1f, 10.0f); + QMatrix4x4 mvp = gp * gv; gl_->glUseProgram(axis_program_); gl_->glUniformMatrix4fv(gl_->glGetUniformLocation(axis_program_, "u_mvp"), 1, GL_FALSE, mvp.constData()); - gl_->glLineWidth(2.5f); gl_->glBindVertexArray(axis_vao_); gl_->glDrawArrays(GL_LINES, 0, 6); - gl_->glEnable(GL_DEPTH_TEST); } -void ViewportWindow::renderPickPass() { - gl_->glBindFramebuffer(GL_FRAMEBUFFER, pick_fbo_); - gl_->glViewport(0, 0, pick_width_, pick_height_); - - GLuint clear_val = 0; - gl_->glClearBufferuiv(GL_COLOR, 0, &clear_val); - gl_->glClear(GL_DEPTH_BUFFER_BIT); - - QMatrix4x4 vp = proj_matrix_ * view_matrix_; - gl_->glUseProgram(pick_program_); - gl_->glUniformMatrix4fv(gl_->glGetUniformLocation(pick_program_, "u_view_projection"), 1, GL_FALSE, vp.constData()); - - // Reuse the visible list from the most recent render() call. - for (const auto& cmd : frame_draw_cmds_) { - gl_->glBindVertexArray(cmd.vao); - gl_->glMultiDrawElements(GL_TRIANGLES, - cmd.counts.data(), GL_UNSIGNED_INT, - cmd.offsets.data(), - static_cast(cmd.counts.size())); - } - - gl_->glBindFramebuffer(GL_FRAMEBUFFER, 0); -} - void ViewportWindow::exposeEvent(QExposeEvent*) { - if (isExposed() && !gl_initialized_) { - initGL(); - } + if (isExposed() && !gl_initialized_) initGL(); } - void ViewportWindow::resizeEvent(QResizeEvent*) { if (gl_initialized_) render(); } - bool ViewportWindow::event(QEvent* e) { switch (e->type()) { - case QEvent::MouseButtonPress: - handleMousePress(static_cast(e)); - return true; - case QEvent::MouseButtonRelease: - handleMouseRelease(static_cast(e)); - return true; - case QEvent::MouseMove: - handleMouseMove(static_cast(e)); - return true; - case QEvent::Wheel: - handleWheel(static_cast(e)); - return true; - default: - return QWindow::event(e); + case QEvent::MouseButtonPress: handleMousePress(static_cast(e)); return true; + case QEvent::MouseButtonRelease: handleMouseRelease(static_cast(e)); return true; + case QEvent::MouseMove: handleMouseMove(static_cast(e)); return true; + case QEvent::Wheel: handleWheel(static_cast(e)); return true; + default: return QWindow::event(e); } } @@ -1112,7 +782,6 @@ void ViewportWindow::handleMousePress(QMouseEvent* e) { active_button_ = e->button(); last_mouse_pos_ = e->pos(); } - void ViewportWindow::handleMouseRelease(QMouseEvent* e) { if (active_button_ == Qt::LeftButton && (e->pos() - last_mouse_pos_).manhattanLength() < 5) { uint32_t id = pickObjectAt(e->pos().x(), e->pos().y()); @@ -1121,21 +790,18 @@ void ViewportWindow::handleMouseRelease(QMouseEvent* e) { } active_button_ = Qt::NoButton; } - void ViewportWindow::handleMouseMove(QMouseEvent* e) { QPoint delta = e->pos() - last_mouse_pos_; last_mouse_pos_ = e->pos(); - if (active_button_ == Qt::MiddleButton) { if (e->modifiers() & Qt::ShiftModifier) { float pan_speed = camera_distance_ * 0.002f; float yaw_rad = qDegreesToRadians(camera_yaw_); float pitch_rad = qDegreesToRadians(camera_pitch_); QVector3D right(-sinf(yaw_rad), cosf(yaw_rad), 0.0f); - QVector3D up( - -sinf(pitch_rad) * cosf(yaw_rad), - -sinf(pitch_rad) * sinf(yaw_rad), - cosf(pitch_rad)); + QVector3D up(-sinf(pitch_rad) * cosf(yaw_rad), + -sinf(pitch_rad) * sinf(yaw_rad), + cosf(pitch_rad)); camera_target_ -= right * delta.x() * pan_speed; camera_target_ += up * delta.y() * pan_speed; } else { @@ -1145,7 +811,6 @@ void ViewportWindow::handleMouseMove(QMouseEvent* e) { } } } - void ViewportWindow::handleWheel(QWheelEvent* e) { float factor = e->angleDelta().y() > 0 ? 0.9f : 1.1f; camera_distance_ *= factor; diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index 97925e6e2e3..9fbdcf054b0 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -28,58 +28,43 @@ #include #include -#include #include -#include #include #include #include -#include #include -#include -#include "BvhAccel.h" +#include "InstancedGeometry.h" #include "SidecarCache.h" -struct MaterialInfo { - float r = 0.75f, g = 0.75f, b = 0.78f, a = 1.0f; -}; - -struct UploadChunk { - // Interleaved per-vertex layout (8 floats / 32 bytes per vertex): - // pos(3 float) + normal(3 float) + object_id(1 float bitcast from uint) - // + color(1 float holding RGBA8 packed bytes, read on the GPU as - // GL_UNSIGNED_BYTE * 4 normalized). - std::vector vertices; - std::vector indices; // local to this chunk's vertices - uint32_t object_id = 0; - uint32_t model_id = 0; -}; - -// Per-model GPU state: own VAO, VBO, EBO, draw info, BVH. +// Per-model GPU state for the instanced render path. +// +// VBO: local-coord interleaved verts (pos3 + normal3 + color1_packed) — 28 B. +// EBO: mesh-local indices (uint32). +// meshes[]: per-unique-representation metadata; indexed by local_mesh_id. +// instances[]: CPU-side per-instance records; sorted by mesh_id at finalize. +// ssbo: InstanceGpu[]; populated at finalize. +// +// A model is drawable once `finalized == true`. struct ModelGpuData { GLuint vao = 0; GLuint vbo = 0; GLuint ebo = 0; + GLuint ssbo = 0; + size_t vbo_capacity = 0; size_t ebo_capacity = 0; - size_t vbo_used = 0; // bytes - size_t ebo_used = 0; // bytes - uint32_t vertex_count = 0; + size_t vbo_used = 0; + size_t ebo_used = 0; + uint32_t vertex_count = 0; // total (across all meshes) uint32_t total_triangles = 0; - std::vector draw_info; - uint32_t active_draw_count = 0; // how many objects are drawable (progressive upload) - bool hidden = false; -}; -// Pending progressive upload — VBO first, then EBO. -struct PendingUpload { - uint32_t model_id = 0; - std::vector vertices; - std::vector indices; - std::shared_ptr bvh_set; - size_t vbo_uploaded = 0; // bytes - size_t ebo_uploaded = 0; // bytes + std::vector meshes; + std::vector instances; // unsorted until finalize + uint32_t ssbo_instance_count = 0; + + bool finalized = false; + bool hidden = false; }; class ViewportWindow : public QWindow { @@ -88,32 +73,21 @@ class ViewportWindow : public QWindow { explicit ViewportWindow(QWindow* parent = nullptr); ~ViewportWindow(); - void uploadChunk(const UploadChunk& chunk); - void resetScene(); + // Streaming ingress. + void uploadMeshChunk(const MeshChunk& chunk); + void uploadInstanceChunk(const InstanceChunk& chunk); - // Bulk upload pre-built geometry from a sidecar cache. - // Creates a perfectly-sized per-model buffer set. No copy. - void uploadBulk(uint32_t model_id, - std::vector vertices, - std::vector indices, - const std::vector& draw_info, - std::shared_ptr bvh_set); + // Called once all chunks for a model have arrived: sorts instances by + // mesh_id, assigns each mesh its contiguous range, and uploads the + // instance SSBO. The model becomes drawable. + void finalizeModel(uint32_t model_id); + + void resetScene(); void hideModel(uint32_t model_id); void showModel(uint32_t model_id); void removeModel(uint32_t model_id); - // Build BVH and optionally write a sidecar cache. - void buildBvhAsync(uint32_t model_id, - const std::string& ifc_path = "", - uint64_t ifc_file_size = 0, - std::vector sidecar_elements = {}, - std::string sidecar_string_table = {}); - - // Read snapshots of a model's GPU buffers into CPU vectors. - std::vector readbackEbo(uint32_t model_id) const; - std::vector readbackVbo(uint32_t model_id) const; - void setSelectedObjectId(uint32_t id); uint32_t pickObjectAt(int x, int y); @@ -124,6 +98,8 @@ class ViewportWindow : public QWindow { uint32_t visible_objects; uint32_t total_triangles; uint32_t visible_triangles; + uint32_t unique_meshes; + uint32_t instanced_draws; }; signals: @@ -147,13 +123,7 @@ class ViewportWindow : public QWindow { void setupVaoLayout(GLuint vao, GLuint vbo, GLuint ebo); bool growModelVbo(ModelGpuData& m, size_t needed_total); bool growModelEbo(ModelGpuData& m, size_t needed_total); - void buildVisibleList(const QMatrix4x4& vp); - void traverseBvh(const ModelBvh& mbvh, const ModelGpuData& mgpu, - const float planes[6][4]); - static bool aabbInFrustum(const float aabb_min[3], const float aabb_max[3], - const float planes[6][4]); - void applyBvhResult(); - void processPendingUploads(); + ModelGpuData& getOrCreateModel(uint32_t model_id); // Mouse interaction void handleMousePress(QMouseEvent* event); @@ -172,13 +142,12 @@ class ViewportWindow : public QWindow { GLuint pick_program_ = 0; GLuint axis_program_ = 0; - // Axis gizmo (separate VAO/VBO since vertex layout differs from scene) + // Axis gizmo GLuint axis_vao_ = 0; GLuint axis_vbo_ = 0; // Per-model GPU data std::unordered_map models_gpu_; - std::mutex models_mutex_; // Pick framebuffer GLuint pick_fbo_ = 0; @@ -187,21 +156,10 @@ class ViewportWindow : public QWindow { int pick_width_ = 0; int pick_height_ = 0; - // Per-model BVH - std::unordered_map> model_bvhs_; - - // Progressive upload queue - std::deque pending_uploads_; - - // Scratch buffers reused each frame to avoid allocation. - struct ModelDrawCmd { - GLuint vao; - std::vector counts; - std::vector offsets; - }; - std::vector frame_draw_cmds_; + // Per-frame stats uint32_t visible_triangles_ = 0; uint32_t visible_objects_ = 0; + uint32_t instanced_draws_ = 0; // Camera QVector3D camera_target_{0, 0, 0}; @@ -211,26 +169,14 @@ class ViewportWindow : public QWindow { QMatrix4x4 view_matrix_; QMatrix4x4 proj_matrix_; - // Mouse state + // Mouse Qt::MouseButton active_button_ = Qt::NoButton; QPoint last_mouse_pos_; // Selection uint32_t selected_object_id_ = 0; - bool pick_requested_ = false; - int pick_x_ = 0, pick_y_ = 0; - - // BVH build (phase 2) - struct PendingBvh { - uint32_t model_id; - std::shared_ptr bvh_set; - EboReorderResult ebo_reorder; - }; - std::unique_ptr pending_bvh_; - std::mutex bvh_result_mutex_; - std::thread bvh_build_thread_; - // Stats + // FPS smoothing int frame_count_ = 0; float accumulated_time_ = 0.0f; float last_fps_ = 0.0f; From ea640e2f691bb16a14799fb25a902791440ac97c Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sun, 12 Apr 2026 20:10:20 +1000 Subject: [PATCH 15/37] Sidecar v4: persist instanced geometry + metadata Commit B of the instancing migration. The sidecar on-disk format is reintroduced at version 4 with MeshInfo + InstanceCpu sections in place of v3's flat per-object draw-info array. After streaming finishes, MainWindow asks the viewport for a post- finalise snapshot (VBO + EBO are read back from the GPU, meshes and instances come from the CPU-side arrays) and writes it alongside PackedElementInfo + the string table. On a subsequent load, readSidecar rehydrates the whole struct and ViewportWindow:: applyCachedModel uploads VBO/EBO/SSBO in a single step, bypassing the iterator entirely. Staleness check is still by source file size. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/MainWindow.cpp | 92 ++++++++++++++++++++++-- src/ifcviewer/SidecarCache.cpp | 118 ++++++++++++++++++++++++++++--- src/ifcviewer/ViewportWindow.cpp | 97 +++++++++++++++++++++++++ src/ifcviewer/ViewportWindow.h | 10 +++ 4 files changed, 300 insertions(+), 17 deletions(-) diff --git a/src/ifcviewer/MainWindow.cpp b/src/ifcviewer/MainWindow.cpp index 86a787a0e26..ceeedc8cbd4 100644 --- a/src/ifcviewer/MainWindow.cpp +++ b/src/ifcviewer/MainWindow.cpp @@ -210,7 +210,7 @@ void MainWindow::startNextLoad() { qDebug(" Sidecar read: %lld ms (%s)", rt.elapsed(), ifc_path.c_str()); auto result = std::make_shared>(std::move(cached)); QMetaObject::invokeMethod(this, [this, mid, result]() { - if (*result && !(*result)->meshes.empty()) { + if (*result && !(*result)->instances.empty()) { applySidecarData(mid, std::move(**result)); } else { // No sidecar — fall back to streaming from IFC. @@ -229,10 +229,54 @@ void MainWindow::startNextLoad() { }); } -void MainWindow::applySidecarData(ModelId /*mid*/, SidecarData /*data*/) { - // Commit A: readSidecar() always returns nullopt, so this is unreachable. - // Restored in Commit B along with the v4 on-disk format. - qWarning("applySidecarData called but sidecar is disabled in Commit A"); +void MainWindow::applySidecarData(ModelId mid, SidecarData data) { + auto it = models_.find(mid); + if (it == models_.end()) return; + auto& model = it->second; + + qDebug("Sidecar hit: %s (%zu verts, %zu indices, %zu meshes, %zu instances, %zu elements)", + model.file_path.toStdString().c_str(), + data.vertices.size() / INSTANCED_VERTEX_STRIDE_FLOATS, + data.indices.size(), + data.meshes.size(), + data.instances.size(), + data.elements.size()); + + QElapsedTimer t; + t.start(); + + // Update next_object_id_ past all objects in this model before the + // extracted `elements` is moved out of `data`. + for (const auto& elem : data.elements) { + if (elem.object_id >= next_object_id_) + next_object_id_ = elem.object_id + 1; + } + + // Hand off geometry to GPU in a single call. + std::vector elements = std::move(data.elements); + std::string stbl = std::move(data.string_table); + viewport_->applyCachedModel(mid, std::move(data)); + qDebug(" GL upload: %lld ms", t.elapsed()); + + t.restart(); + element_tree_->setUpdatesEnabled(false); + populateTreeFromSidecar(model, elements, stbl); + element_tree_->setUpdatesEnabled(true); + qDebug(" Tree build: %lld ms (%zu elements)", t.elapsed(), elements.size()); + + progress_bar_->setVisible(false); + + qint64 ms = load_timer_.elapsed(); + QString elapsed = (ms >= 1000) + ? QString::number(ms / 1000.0, 'f', 2) + " s" + : QString::number(ms) + " ms"; + status_label_->setText(QString("%1 elements across %2 model(s) — loaded from cache in %3") + .arg(element_map_.size()) + .arg(models_.size()) + .arg(elapsed)); + + loading_model_id_ = 0; + QTimer::singleShot(0, this, &MainWindow::startNextLoad); } void MainWindow::populateTreeFromSidecar(ModelHandle& model, @@ -320,10 +364,44 @@ void MainWindow::onStreamingFinished() { .arg(num_models) .arg(elapsed)); - // Sort instances by mesh and upload the per-model instance SSBO. - // Sidecar write is stubbed in Commit A. + // Sort instances by mesh, upload the per-model instance SSBO, and + // persist a v4 sidecar for next load. if (loading_model_id_ != 0) { viewport_->finalizeModel(loading_model_id_); + + auto it = models_.find(loading_model_id_); + if (it != models_.end()) { + SidecarData sd; + if (viewport_->snapshotModel(loading_model_id_, sd)) { + // Pack this model's element metadata + string table. + for (const auto& [oid, info] : element_map_) { + if (info.model_id != loading_model_id_) continue; + PackedElementInfo pe; + pe.object_id = info.object_id; + pe.model_id = info.model_id; + pe.ifc_id = info.ifc_id; + pe.parent_id = info.parent_id; + pe.guid_offset = static_cast(sd.string_table.size()); + pe.guid_length = static_cast(info.guid.size()); + sd.string_table += info.guid; + pe.name_offset = static_cast(sd.string_table.size()); + pe.name_length = static_cast(info.name.size()); + sd.string_table += info.name; + pe.type_offset = static_cast(sd.string_table.size()); + pe.type_length = static_cast(info.type.size()); + sd.string_table += info.type; + sd.elements.push_back(pe); + } + + std::string ifc_path = it->second.file_path.toStdString(); + uint64_t file_size = static_cast( + QFileInfo(it->second.file_path).size()); + QElapsedTimer t; t.start(); + bool ok = writeSidecar(ifc_path, sd, file_size); + qDebug(" Sidecar write: %lld ms (%s)", + t.elapsed(), ok ? "ok" : "FAILED"); + } + } } // Start next model if queued. diff --git a/src/ifcviewer/SidecarCache.cpp b/src/ifcviewer/SidecarCache.cpp index be19c8698f4..3c5ca9cd8d5 100644 --- a/src/ifcviewer/SidecarCache.cpp +++ b/src/ifcviewer/SidecarCache.cpp @@ -17,20 +17,118 @@ * * ********************************************************************************/ -// Commit A: sidecar cache is temporarily disabled. The on-disk format is -// being rewritten from v3 (monolithic world-coord geometry) to v4 (instanced -// meshes + per-instance records). Until v4 is finalised, loads always go -// through the streaming path and writes are no-ops. +// v4 layout (all multi-byte fields native-endian; endianness marker in header): +// +// SidecarHeader (16 bytes) +// uint64_t source_file_size +// +// uint32_t num_vertices_floats +// float[] vertex data (28 B/vertex: pos3 + normal3 + color1_packed) +// uint32_t num_indices +// uint32_t[] index data (mesh-local indices; base_vertex applied at draw time) +// +// uint32_t num_meshes +// MeshInfo[num_meshes] +// +// uint32_t num_instances +// InstanceCpu[num_instances] (already sorted by mesh_id) +// +// uint32_t num_elements +// PackedElementInfo[num_elements] +// uint32_t string_table_bytes +// char[string_table_bytes] #include "SidecarCache.h" -bool writeSidecar(const std::string& /*ifc_path*/, - const SidecarData& /*data*/, - uint64_t /*ifc_file_size*/) { +#include +#include + +struct SidecarHeader { + uint32_t magic; + uint32_t version; + uint32_t endian; + uint32_t reserved; +}; + +static std::string sidecarPath(const std::string& ifc_path) { + return ifc_path + ".ifcview"; +} + +template +static bool writeVec(FILE* f, const std::vector& v) { + uint32_t n = static_cast(v.size()); + if (fwrite(&n, 4, 1, f) != 1) return false; + if (n > 0 && fwrite(v.data(), sizeof(T), n, f) != n) return false; + return true; +} + +template +static bool readVec(FILE* f, std::vector& v) { + uint32_t n; + if (fread(&n, 4, 1, f) != 1) return false; + v.resize(n); + if (n > 0 && fread(v.data(), sizeof(T), n, f) != n) return false; + return true; +} + +bool writeSidecar(const std::string& ifc_path, + const SidecarData& data, + uint64_t ifc_file_size) { + std::string path = sidecarPath(ifc_path); + FILE* f = fopen(path.c_str(), "wb"); + if (!f) return false; + + SidecarHeader hdr = { SIDECAR_MAGIC, SIDECAR_VERSION, SIDECAR_ENDIAN, 0 }; + if (fwrite(&hdr, sizeof(hdr), 1, f) != 1) { fclose(f); return false; } + if (fwrite(&ifc_file_size, 8, 1, f) != 1) { fclose(f); return false; } + + if (!writeVec(f, data.vertices)) { fclose(f); return false; } + if (!writeVec(f, data.indices)) { fclose(f); return false; } + if (!writeVec(f, data.meshes)) { fclose(f); return false; } + if (!writeVec(f, data.instances)) { fclose(f); return false; } + if (!writeVec(f, data.elements)) { fclose(f); return false; } + + uint32_t stbl_len = static_cast(data.string_table.size()); + if (fwrite(&stbl_len, 4, 1, f) != 1) { fclose(f); return false; } + if (stbl_len > 0 && fwrite(data.string_table.data(), 1, stbl_len, f) != stbl_len) { + fclose(f); return false; + } + + fclose(f); return true; } -std::optional readSidecar(const std::string& /*ifc_path*/, - uint64_t /*ifc_file_size*/) { - return std::nullopt; +std::optional readSidecar(const std::string& ifc_path, + uint64_t ifc_file_size) { + std::string path = sidecarPath(ifc_path); + FILE* f = fopen(path.c_str(), "rb"); + if (!f) return std::nullopt; + + auto fail = [&]() -> std::optional { fclose(f); return std::nullopt; }; + + SidecarHeader hdr; + if (fread(&hdr, sizeof(hdr), 1, f) != 1) return fail(); + if (hdr.magic != SIDECAR_MAGIC || + hdr.version != SIDECAR_VERSION || + hdr.endian != SIDECAR_ENDIAN) return fail(); + + uint64_t stored_size; + if (fread(&stored_size, 8, 1, f) != 1) return fail(); + if (stored_size != ifc_file_size) return fail(); + + SidecarData data; + if (!readVec(f, data.vertices)) return fail(); + if (!readVec(f, data.indices)) return fail(); + if (!readVec(f, data.meshes)) return fail(); + if (!readVec(f, data.instances)) return fail(); + if (!readVec(f, data.elements)) return fail(); + + uint32_t stbl_len; + if (fread(&stbl_len, 4, 1, f) != 1) return fail(); + data.string_table.resize(stbl_len); + if (stbl_len > 0 && fread(data.string_table.data(), 1, stbl_len, f) != stbl_len) + return fail(); + + fclose(f); + return data; } diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index e264f990e45..48558fc64f4 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -523,6 +523,103 @@ void ViewportWindow::finalizeModel(uint32_t model_id) { ssbo_bytes / (1024.0*1024.0)); } +bool ViewportWindow::snapshotModel(uint32_t model_id, SidecarData& out) const { + auto it = models_gpu_.find(model_id); + if (!gl_ || it == models_gpu_.end()) return false; + const auto& m = it->second; + if (!m.finalized) return false; + + // GPU readback of the packed VBO/EBO ranges actually in use. + if (m.vbo_used > 0) { + out.vertices.resize(m.vbo_used / sizeof(float)); + gl_->glGetNamedBufferSubData(m.vbo, 0, m.vbo_used, out.vertices.data()); + } + if (m.ebo_used > 0) { + out.indices.resize(m.ebo_used / sizeof(uint32_t)); + gl_->glGetNamedBufferSubData(m.ebo, 0, m.ebo_used, out.indices.data()); + } + + out.meshes = m.meshes; + out.instances = m.instances; + return true; +} + +void ViewportWindow::applyCachedModel(uint32_t model_id, SidecarData data) { + if (!gl_initialized_) return; + context_->makeCurrent(this); + + // Drop any existing state for this model_id. + auto existing = models_gpu_.find(model_id); + if (existing != models_gpu_.end()) { + if (existing->second.vao) gl_->glDeleteVertexArrays(1, &existing->second.vao); + if (existing->second.vbo) gl_->glDeleteBuffers(1, &existing->second.vbo); + if (existing->second.ebo) gl_->glDeleteBuffers(1, &existing->second.ebo); + if (existing->second.ssbo) gl_->glDeleteBuffers(1, &existing->second.ssbo); + models_gpu_.erase(existing); + } + + ModelGpuData m; + gl_->glCreateVertexArrays(1, &m.vao); + gl_->glCreateBuffers(1, &m.vbo); + gl_->glCreateBuffers(1, &m.ebo); + + const size_t vb_bytes = data.vertices.size() * sizeof(float); + const size_t ib_bytes = data.indices.size() * sizeof(uint32_t); + m.vbo_capacity = std::max(vb_bytes, 1); + m.ebo_capacity = std::max(ib_bytes, 1); + gl_->glNamedBufferStorage(m.vbo, m.vbo_capacity, + vb_bytes ? data.vertices.data() : nullptr, + GL_DYNAMIC_STORAGE_BIT); + gl_->glNamedBufferStorage(m.ebo, m.ebo_capacity, + ib_bytes ? data.indices.data() : nullptr, + GL_DYNAMIC_STORAGE_BIT); + setupVaoLayout(m.vao, m.vbo, m.ebo); + + m.vbo_used = vb_bytes; + m.ebo_used = ib_bytes; + m.vertex_count = static_cast( + data.vertices.size() / INSTANCED_VERTEX_STRIDE_FLOATS); + m.meshes = std::move(data.meshes); + m.instances = std::move(data.instances); + + uint32_t total_tri = 0; + for (const auto& mesh : m.meshes) { + total_tri += (mesh.index_count / 3) * mesh.instance_count; + } + m.total_triangles = total_tri; + + // Build and upload the instance SSBO. + std::vector gpu(m.instances.size()); + for (size_t i = 0; i < m.instances.size(); ++i) { + const InstanceCpu& src = m.instances[i]; + InstanceGpu& dst = gpu[i]; + std::memcpy(dst.transform, src.transform, sizeof(dst.transform)); + dst.object_id = src.object_id; + dst.color_override_rgba8 = src.color_override_rgba8; + dst._pad0 = 0; + dst._pad1 = 0; + } + gl_->glCreateBuffers(1, &m.ssbo); + const size_t ssbo_bytes = gpu.size() * sizeof(InstanceGpu); + if (ssbo_bytes > 0) { + gl_->glNamedBufferStorage(m.ssbo, ssbo_bytes, gpu.data(), 0); + } + m.ssbo_instance_count = static_cast(gpu.size()); + + m.finalized = true; + models_gpu_.emplace(model_id, std::move(m)); + + qDebug("Sidecar apply: model %u %zu verts, %zu meshes, %zu instances " + "%.1f MB vram (vbo %.1f + ebo %.1f + ssbo %.1f)", + model_id, data.vertices.size() / INSTANCED_VERTEX_STRIDE_FLOATS, + models_gpu_[model_id].meshes.size(), + models_gpu_[model_id].instances.size(), + (vb_bytes + ib_bytes + ssbo_bytes) / (1024.0*1024.0), + vb_bytes / (1024.0*1024.0), + ib_bytes / (1024.0*1024.0), + ssbo_bytes / (1024.0*1024.0)); +} + void ViewportWindow::resetScene() { if (!gl_initialized_) return; context_->makeCurrent(this); diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index 9fbdcf054b0..65b15412e9a 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -84,6 +84,16 @@ class ViewportWindow : public QWindow { void resetScene(); + // Snapshot the finalised model into a SidecarData struct for caching. + // Vertices + indices are read back from the GPU; meshes/instances come + // from the CPU-side vectors. Leaves `elements` and `string_table` empty + // for the caller to fill in. + bool snapshotModel(uint32_t model_id, SidecarData& out) const; + + // Restore a finalised model from a cached SidecarData struct. Replaces + // any existing state for model_id and marks it drawable. + void applyCachedModel(uint32_t model_id, SidecarData data); + void hideModel(uint32_t model_id); void showModel(uint32_t model_id); void removeModel(uint32_t model_id); From f4b3e6c515da25ffed33ffcba69357bf235819e6 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sun, 12 Apr 2026 20:23:50 +1000 Subject: [PATCH 16/37] BVH frustum culling over instances Re-wires the BVH acceleration structure on top of the new instanced renderer. Per model, build a BVH over per-instance world AABBs at finalize (and on sidecar apply). Each frame, traverse the BVH against the camera frustum to produce a visible-instance index list, bucket by mesh_id, and upload to a per-model SSBO at binding=1. The main and pick vertex shaders do a double-indirection `instances[visible[u_offset + gl_InstanceID]]` so draws only touch instances that passed the frustum test. Models with fewer than BVH_MIN_OBJECTS instances skip the BVH build and fall back to a linear per-instance frustum test. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/BvhAccel.cpp | 6 + src/ifcviewer/BvhAccel.h | 4 + src/ifcviewer/ViewportWindow.cpp | 181 +++++++++++++++++++++++++++++-- src/ifcviewer/ViewportWindow.h | 25 +++++ 4 files changed, 204 insertions(+), 12 deletions(-) diff --git a/src/ifcviewer/BvhAccel.cpp b/src/ifcviewer/BvhAccel.cpp index c285f1fbfe0..4b115bfa4ce 100644 --- a/src/ifcviewer/BvhAccel.cpp +++ b/src/ifcviewer/BvhAccel.cpp @@ -119,6 +119,12 @@ ModelBvh buildModelBvh(const std::vector& items, } // anonymous namespace +ModelBvh buildModelBvhOne(const std::vector& items, uint32_t model_id) { + std::vector idxs(items.size()); + for (uint32_t i = 0; i < items.size(); ++i) idxs[i] = i; + return buildModelBvh(items, idxs, model_id); +} + std::shared_ptr buildBvhSet(const std::vector& items) { auto bvh_set = std::make_shared(); diff --git a/src/ifcviewer/BvhAccel.h b/src/ifcviewer/BvhAccel.h index a2cb6a13163..7281dff511d 100644 --- a/src/ifcviewer/BvhAccel.h +++ b/src/ifcviewer/BvhAccel.h @@ -63,4 +63,8 @@ struct BvhSet { // vector — callers providing a single model's items will see 0..N-1. std::shared_ptr buildBvhSet(const std::vector& items); +// Build a single-model BVH over `items`. model_id is stored on the result +// for identification; item_indices will be 0..items.size()-1. +ModelBvh buildModelBvhOne(const std::vector& items, uint32_t model_id); + #endif // BVHACCEL_H diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index 48558fc64f4..7011ec9d38f 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -68,6 +68,9 @@ struct InstanceRecord { layout(std430, binding = 0) readonly buffer Instances { InstanceRecord instances[]; }; +layout(std430, binding = 1) readonly buffer VisibleIndices { + uint visible[]; +}; uniform mat4 u_view_projection; uniform uint u_instance_offset; @@ -79,7 +82,8 @@ flat out uint v_object_id; flat out uint v_selected; void main() { - InstanceRecord inst = instances[u_instance_offset + uint(gl_InstanceID)]; + uint iid = visible[u_instance_offset + uint(gl_InstanceID)]; + InstanceRecord inst = instances[iid]; vec4 world = inst.transform * vec4(a_position, 1.0); gl_Position = u_view_projection * world; @@ -139,6 +143,9 @@ struct InstanceRecord { layout(std430, binding = 0) readonly buffer Instances { InstanceRecord instances[]; }; +layout(std430, binding = 1) readonly buffer VisibleIndices { + uint visible[]; +}; uniform mat4 u_view_projection; uniform uint u_instance_offset; @@ -146,7 +153,8 @@ uniform uint u_instance_offset; flat out uint v_object_id; void main() { - InstanceRecord inst = instances[u_instance_offset + uint(gl_InstanceID)]; + uint iid = visible[u_instance_offset + uint(gl_InstanceID)]; + InstanceRecord inst = instances[iid]; gl_Position = u_view_projection * inst.transform * vec4(a_position, 1.0); v_object_id = inst.object_id; } @@ -211,6 +219,59 @@ static GLuint linkProgram(QOpenGLFunctions_4_5_Core* gl, GLuint vert, GLuint fra // ----------------------------------------------------------------------------- +static bool aabbInFrustum(const float aabb_min[3], const float aabb_max[3], + const float planes[6][4]) { + for (int p = 0; p < 6; ++p) { + float px = planes[p][0] >= 0.0f ? aabb_max[0] : aabb_min[0]; + float py = planes[p][1] >= 0.0f ? aabb_max[1] : aabb_min[1]; + float pz = planes[p][2] >= 0.0f ? aabb_max[2] : aabb_min[2]; + float dist = planes[p][0] * px + planes[p][1] * py + planes[p][2] * pz + planes[p][3]; + if (dist < 0.0f) return false; + } + return true; +} + +static void extractFrustumPlanes(const QMatrix4x4& vp, float planes[6][4]) { + for (int i = 0; i < 4; ++i) { + planes[0][i] = vp(3, i) + vp(0, i); + planes[1][i] = vp(3, i) - vp(0, i); + planes[2][i] = vp(3, i) + vp(1, i); + planes[3][i] = vp(3, i) - vp(1, i); + planes[4][i] = vp(3, i) + vp(2, i); + planes[5][i] = vp(3, i) - vp(2, i); + } + for (int p = 0; p < 6; ++p) { + float len = std::sqrt(planes[p][0]*planes[p][0] + + planes[p][1]*planes[p][1] + + planes[p][2]*planes[p][2]); + if (len > 0.0f) { + float inv = 1.0f / len; + planes[p][0] *= inv; planes[p][1] *= inv; + planes[p][2] *= inv; planes[p][3] *= inv; + } + } +} + +// Build bvh_items (one per instance, 1:1 ordering) and a per-model BVH. +// Items with instances.size() < BVH_MIN_OBJECTS leave bvh empty — the +// render path falls back to drawing every instance. +static void buildBvhForModel(ModelGpuData& m, uint32_t model_id) { + m.bvh_items.clear(); + m.bvh_items.reserve(m.instances.size()); + for (const auto& inst : m.instances) { + BvhItem it; + std::memcpy(it.aabb_min, inst.world_aabb_min, sizeof(it.aabb_min)); + std::memcpy(it.aabb_max, inst.world_aabb_max, sizeof(it.aabb_max)); + it.model_id = inst.model_id; + m.bvh_items.push_back(it); + } + if (m.bvh_items.size() >= BVH_MIN_OBJECTS) { + m.bvh = buildModelBvhOne(m.bvh_items, model_id); + } else { + m.bvh = ModelBvh{}; + } +} + ViewportWindow::ViewportWindow(QWindow* parent) : QWindow(parent) { @@ -239,6 +300,7 @@ ViewportWindow::~ViewportWindow() { if (m.vbo) gl_->glDeleteBuffers(1, &m.vbo); if (m.ebo) gl_->glDeleteBuffers(1, &m.ebo); if (m.ssbo) gl_->glDeleteBuffers(1, &m.ssbo); + if (m.visible_ssbo) gl_->glDeleteBuffers(1, &m.visible_ssbo); } if (axis_vao_) gl_->glDeleteVertexArrays(1, &axis_vao_); if (axis_vbo_) gl_->glDeleteBuffers(1, &axis_vbo_); @@ -512,6 +574,8 @@ void ViewportWindow::finalizeModel(uint32_t model_id) { gl_->glNamedBufferStorage(m.ssbo, ssbo_bytes, gpu.data(), 0); m.ssbo_instance_count = static_cast(gpu.size()); + buildBvhForModel(m, model_id); + m.finalized = true; qDebug("Model %u finalized: %zu verts, %zu meshes, %zu instances, %.1f MB vram " @@ -555,6 +619,7 @@ void ViewportWindow::applyCachedModel(uint32_t model_id, SidecarData data) { if (existing->second.vbo) gl_->glDeleteBuffers(1, &existing->second.vbo); if (existing->second.ebo) gl_->glDeleteBuffers(1, &existing->second.ebo); if (existing->second.ssbo) gl_->glDeleteBuffers(1, &existing->second.ssbo); + if (existing->second.visible_ssbo) gl_->glDeleteBuffers(1, &existing->second.visible_ssbo); models_gpu_.erase(existing); } @@ -606,6 +671,8 @@ void ViewportWindow::applyCachedModel(uint32_t model_id, SidecarData data) { } m.ssbo_instance_count = static_cast(gpu.size()); + buildBvhForModel(m, model_id); + m.finalized = true; models_gpu_.emplace(model_id, std::move(m)); @@ -628,6 +695,7 @@ void ViewportWindow::resetScene() { if (m.vbo) gl_->glDeleteBuffers(1, &m.vbo); if (m.ebo) gl_->glDeleteBuffers(1, &m.ebo); if (m.ssbo) gl_->glDeleteBuffers(1, &m.ssbo); + if (m.visible_ssbo) gl_->glDeleteBuffers(1, &m.visible_ssbo); } models_gpu_.clear(); selected_object_id_ = 0; @@ -652,6 +720,7 @@ void ViewportWindow::removeModel(uint32_t model_id) { if (it->second.vbo) gl_->glDeleteBuffers(1, &it->second.vbo); if (it->second.ebo) gl_->glDeleteBuffers(1, &it->second.ebo); if (it->second.ssbo) gl_->glDeleteBuffers(1, &it->second.ssbo); + if (it->second.visible_ssbo) gl_->glDeleteBuffers(1, &it->second.visible_ssbo); models_gpu_.erase(it); } } @@ -689,6 +758,74 @@ uint32_t ViewportWindow::pickObjectAt(int x, int y) { return pixel; } +void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6][4]) { + // Ensure per-mesh scratch sized. + if (visible_by_mesh_.size() < m.meshes.size()) visible_by_mesh_.resize(m.meshes.size()); + for (size_t i = 0; i < m.meshes.size(); ++i) visible_by_mesh_[i].clear(); + + auto test_and_push = [&](uint32_t inst_idx) { + const InstanceCpu& inst = m.instances[inst_idx]; + if (!aabbInFrustum(inst.world_aabb_min, inst.world_aabb_max, planes)) return; + if (inst.mesh_id < visible_by_mesh_.size()) + visible_by_mesh_[inst.mesh_id].push_back(inst_idx); + }; + + if (!m.bvh.nodes.empty()) { + uint32_t stack[64]; + int sp = 0; + stack[sp++] = 0; + while (sp > 0) { + uint32_t ni = stack[--sp]; + const BvhNode& n = m.bvh.nodes[ni]; + if (!aabbInFrustum(n.aabb_min, n.aabb_max, planes)) continue; + if (n.count > 0) { + for (uint32_t k = 0; k < n.count; ++k) { + uint32_t item_idx = m.bvh.item_indices[n.right_or_first + k]; + test_and_push(item_idx); + } + } else { + // Left child = ni + 1, right child = n.right_or_first. + // Push right first so left is popped next (DFS order). + if (sp + 2 <= 64) { + stack[sp++] = n.right_or_first; + stack[sp++] = ni + 1; + } + } + } + } else { + for (uint32_t i = 0; i < m.instances.size(); ++i) test_and_push(i); + } + + // Flatten into visible_flat_ and record per-mesh ranges. + visible_flat_.clear(); + m.mesh_vis_first.assign(m.meshes.size(), 0); + m.mesh_vis_count.assign(m.meshes.size(), 0); + for (size_t mi = 0; mi < m.meshes.size(); ++mi) { + m.mesh_vis_first[mi] = static_cast(visible_flat_.size()); + m.mesh_vis_count[mi] = static_cast(visible_by_mesh_[mi].size()); + visible_flat_.insert(visible_flat_.end(), + visible_by_mesh_[mi].begin(), + visible_by_mesh_[mi].end()); + } + + // Grow/create visible SSBO as needed. Keep at least 4 bytes so the binding + // is always valid even when nothing is visible. + size_t bytes = std::max(visible_flat_.size() * sizeof(uint32_t), + sizeof(uint32_t)); + if (m.visible_ssbo == 0 || m.visible_ssbo_capacity < bytes) { + if (m.visible_ssbo) gl_->glDeleteBuffers(1, &m.visible_ssbo); + size_t new_cap = m.visible_ssbo_capacity ? m.visible_ssbo_capacity : 4096; + while (new_cap < bytes) new_cap *= 2; + gl_->glCreateBuffers(1, &m.visible_ssbo); + gl_->glNamedBufferStorage(m.visible_ssbo, new_cap, nullptr, GL_DYNAMIC_STORAGE_BIT); + m.visible_ssbo_capacity = new_cap; + } + if (!visible_flat_.empty()) { + gl_->glNamedBufferSubData(m.visible_ssbo, 0, + visible_flat_.size() * sizeof(uint32_t), visible_flat_.data()); + } +} + void ViewportWindow::updateCamera() { float yaw_rad = qDegreesToRadians(camera_yaw_); float pitch_rad = qDegreesToRadians(camera_pitch_); @@ -715,6 +852,8 @@ void ViewportWindow::render() { gl_->glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); QMatrix4x4 vp = proj_matrix_ * view_matrix_; + float planes[6][4]; + extractFrustumPlanes(vp, planes); gl_->glUseProgram(main_program_); GLint u_vp = gl_->glGetUniformLocation(main_program_, "u_view_projection"); @@ -731,21 +870,28 @@ void ViewportWindow::render() { for (auto& [model_id, m] : models_gpu_) { if (m.hidden || !m.finalized || !m.ssbo) continue; + + cullAndUploadVisible(m, planes); + if (visible_flat_.empty()) continue; + gl_->glBindVertexArray(m.vao); gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m.ssbo); + gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m.visible_ssbo); - for (const auto& mesh : m.meshes) { - if (mesh.instance_count == 0 || mesh.index_count == 0) continue; - gl_->glUniform1ui(u_inst_off, mesh.first_instance); + for (size_t mi = 0; mi < m.meshes.size(); ++mi) { + const auto& mesh = m.meshes[mi]; + uint32_t vis_count = m.mesh_vis_count[mi]; + if (vis_count == 0 || mesh.index_count == 0) continue; + gl_->glUniform1ui(u_inst_off, m.mesh_vis_first[mi]); gl_->glDrawElementsInstancedBaseVertex( GL_TRIANGLES, static_cast(mesh.index_count), GL_UNSIGNED_INT, reinterpret_cast(static_cast(mesh.ebo_byte_offset)), - static_cast(mesh.instance_count), + static_cast(vis_count), static_cast(mesh.vbo_byte_offset / INSTANCED_VERTEX_STRIDE_BYTES)); - visible_triangles_ += (mesh.index_count / 3) * mesh.instance_count; - visible_objects_ += mesh.instance_count; + visible_triangles_ += (mesh.index_count / 3) * vis_count; + visible_objects_ += vis_count; ++instanced_draws_; } } @@ -810,6 +956,9 @@ void ViewportWindow::renderPickPass() { gl_->glClear(GL_DEPTH_BUFFER_BIT); QMatrix4x4 vp = proj_matrix_ * view_matrix_; + float planes[6][4]; + extractFrustumPlanes(vp, planes); + gl_->glUseProgram(pick_program_); GLint u_vp = gl_->glGetUniformLocation(pick_program_, "u_view_projection"); GLint u_inst_off = gl_->glGetUniformLocation(pick_program_, "u_instance_offset"); @@ -817,17 +966,25 @@ void ViewportWindow::renderPickPass() { for (auto& [model_id, m] : models_gpu_) { if (m.hidden || !m.finalized || !m.ssbo) continue; + + cullAndUploadVisible(m, planes); + if (visible_flat_.empty()) continue; + gl_->glBindVertexArray(m.vao); gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m.ssbo); - for (const auto& mesh : m.meshes) { - if (mesh.instance_count == 0 || mesh.index_count == 0) continue; - gl_->glUniform1ui(u_inst_off, mesh.first_instance); + gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m.visible_ssbo); + + for (size_t mi = 0; mi < m.meshes.size(); ++mi) { + const auto& mesh = m.meshes[mi]; + uint32_t vis_count = m.mesh_vis_count[mi]; + if (vis_count == 0 || mesh.index_count == 0) continue; + gl_->glUniform1ui(u_inst_off, m.mesh_vis_first[mi]); gl_->glDrawElementsInstancedBaseVertex( GL_TRIANGLES, static_cast(mesh.index_count), GL_UNSIGNED_INT, reinterpret_cast(static_cast(mesh.ebo_byte_offset)), - static_cast(mesh.instance_count), + static_cast(vis_count), static_cast(mesh.vbo_byte_offset / INSTANCED_VERTEX_STRIDE_BYTES)); } } diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index 65b15412e9a..5a086fd774f 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -34,6 +34,7 @@ #include #include +#include "BvhAccel.h" #include "InstancedGeometry.h" #include "SidecarCache.h" @@ -63,6 +64,20 @@ struct ModelGpuData { std::vector instances; // unsorted until finalize uint32_t ssbo_instance_count = 0; + // Per-instance world AABB + BVH (built at finalize). The BVH is the + // same ordering as `instances`; bvh_items[i] corresponds to instances[i]. + std::vector bvh_items; + ModelBvh bvh; + + // Dynamic visible-instance index buffer (std430, binding = 1). + // Re-uploaded each frame from frame_visible_scratch_. + GLuint visible_ssbo = 0; + size_t visible_ssbo_capacity = 0; // bytes + + // Per-mesh visible-list offset/count, rebuilt each frame. + std::vector mesh_vis_first; + std::vector mesh_vis_count; + bool finalized = false; bool hidden = false; }; @@ -135,6 +150,10 @@ class ViewportWindow : public QWindow { bool growModelEbo(ModelGpuData& m, size_t needed_total); ModelGpuData& getOrCreateModel(uint32_t model_id); + // Populate m.mesh_vis_first / mesh_vis_count and upload visible indices + // to m.visible_ssbo. Uses BVH when available, else linear scan. + void cullAndUploadVisible(ModelGpuData& m, const float planes[6][4]); + // Mouse interaction void handleMousePress(QMouseEvent* event); void handleMouseRelease(QMouseEvent* event); @@ -171,6 +190,12 @@ class ViewportWindow : public QWindow { uint32_t visible_objects_ = 0; uint32_t instanced_draws_ = 0; + // Reused scratch: visible-instance index lists per mesh, flattened into + // `visible_flat_` for upload. Both live in the parent object to avoid + // per-frame allocation. + std::vector> visible_by_mesh_; + std::vector visible_flat_; + // Camera QVector3D camera_target_{0, 0, 0}; float camera_distance_ = 50.0f; From e2d51300c83d086e33eabed8a188de3ef581ca91 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sun, 12 Apr 2026 20:29:16 +1000 Subject: [PATCH 17/37] Progressive rendering during streaming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-allocate the instance SSBO on model creation (4 MB, grow-on-demand) and append each arriving InstanceChunk directly to the GPU-side InstanceGpu array in uploadInstanceChunk. This makes a model drawable as soon as its first mesh + first instance chunk land, rather than waiting for finalizeModel. The visible-list architecture already decouples SSBO order from the draw path, so appending in insertion order is correct — no sorting required. finalizeModel collapses to: - compute per-mesh instance counts (for stats + sidecar round-trip) - build the per-model BVH over instance world AABBs Render / pick loops now gate on ssbo_instance_count > 0 rather than the finalized flag. Stats include in-progress models in totals (excluding only hidden). Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/ViewportWindow.cpp | 118 +++++++++++++++++-------------- src/ifcviewer/ViewportWindow.h | 2 + 2 files changed, 66 insertions(+), 54 deletions(-) diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index 7011ec9d38f..b70e2bf8324 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -30,9 +30,10 @@ #include #include -static const size_t INITIAL_VBO_SIZE = 64 * 1024 * 1024; // 64 MB -static const size_t INITIAL_EBO_SIZE = 32 * 1024 * 1024; // 32 MB -static const size_t MAX_BUFFER_SIZE = 4ull * 1024 * 1024 * 1024; // 4 GB +static const size_t INITIAL_VBO_SIZE = 64 * 1024 * 1024; // 64 MB +static const size_t INITIAL_EBO_SIZE = 32 * 1024 * 1024; // 32 MB +static const size_t INITIAL_SSBO_SIZE = 4 * 1024 * 1024; // 4 MB (~52k instances) +static const size_t MAX_BUFFER_SIZE = 4ull * 1024 * 1024 * 1024; // 4 GB // ----------------------------------------------------------------------------- // Shaders @@ -420,6 +421,27 @@ bool ViewportWindow::growModelVbo(ModelGpuData& m, size_t needed_total) { return true; } +bool ViewportWindow::growModelSsbo(ModelGpuData& m, size_t needed_total) { + size_t new_capacity = m.ssbo_capacity ? m.ssbo_capacity : INITIAL_SSBO_SIZE; + while (new_capacity < needed_total) new_capacity *= 2; + if (new_capacity > MAX_BUFFER_SIZE) { + qWarning("Instance SSBO grow request (%zu MB) exceeds cap", new_capacity / (1024*1024)); + return false; + } + GLuint new_ssbo = 0; + gl_->glCreateBuffers(1, &new_ssbo); + gl_->glNamedBufferStorage(new_ssbo, new_capacity, nullptr, GL_DYNAMIC_STORAGE_BIT); + const size_t used = m.ssbo_instance_count * sizeof(InstanceGpu); + if (m.ssbo && used > 0) { + gl_->glCopyNamedBufferSubData(m.ssbo, new_ssbo, 0, 0, used); + } + if (m.ssbo) gl_->glDeleteBuffers(1, &m.ssbo); + m.ssbo = new_ssbo; + m.ssbo_capacity = new_capacity; + qInfo("Model instance SSBO grew to %zu MB", m.ssbo_capacity / (1024*1024)); + return true; +} + bool ViewportWindow::growModelEbo(ModelGpuData& m, size_t needed_total) { size_t new_capacity = m.ebo_capacity; while (new_capacity < needed_total) new_capacity *= 2; @@ -456,6 +478,11 @@ ModelGpuData& ViewportWindow::getOrCreateModel(uint32_t model_id) { gl_->glNamedBufferStorage(m.ebo, m.ebo_capacity, nullptr, GL_DYNAMIC_STORAGE_BIT); setupVaoLayout(m.vao, m.vbo, m.ebo); + // Pre-allocate instance SSBO so we can append during streaming. + gl_->glCreateBuffers(1, &m.ssbo); + m.ssbo_capacity = INITIAL_SSBO_SIZE; + gl_->glNamedBufferStorage(m.ssbo, m.ssbo_capacity, nullptr, GL_DYNAMIC_STORAGE_BIT); + return models_gpu_.emplace(model_id, std::move(m)).first->second; } @@ -501,8 +528,8 @@ void ViewportWindow::uploadMeshChunk(const MeshChunk& chunk) { void ViewportWindow::uploadInstanceChunk(const InstanceChunk& chunk) { if (!gl_initialized_) return; - // We don't need a GL context here since we're only touching CPU state, - // but the signal may fire on the render thread so keep it simple. + context_->makeCurrent(this); + ModelGpuData& m = getOrCreateModel(chunk.model_id); InstanceCpu inst; @@ -515,6 +542,23 @@ void ViewportWindow::uploadInstanceChunk(const InstanceChunk& chunk) { std::memcpy(inst.world_aabb_max, chunk.world_aabb_max, sizeof(inst.world_aabb_max)); m.instances.push_back(inst); + // Append the GPU record to the instance SSBO so the model is drawable + // immediately, without waiting for finalizeModel. The visible-list + // architecture means SSBO order is irrelevant to correctness. + InstanceGpu gpu; + std::memcpy(gpu.transform, inst.transform, sizeof(gpu.transform)); + gpu.object_id = inst.object_id; + gpu.color_override_rgba8 = inst.color_override_rgba8; + gpu._pad0 = 0; + gpu._pad1 = 0; + + const size_t offset = m.ssbo_instance_count * sizeof(InstanceGpu); + if (offset + sizeof(InstanceGpu) > m.ssbo_capacity) { + if (!growModelSsbo(m, offset + sizeof(InstanceGpu))) return; + } + gl_->glNamedBufferSubData(m.ssbo, offset, sizeof(InstanceGpu), &gpu); + m.ssbo_instance_count++; + if (chunk.local_mesh_id < m.meshes.size()) { m.total_triangles += m.meshes[chunk.local_mesh_id].index_count / 3; } @@ -527,64 +571,30 @@ void ViewportWindow::finalizeModel(uint32_t model_id) { auto it = models_gpu_.find(model_id); if (it == models_gpu_.end()) return; ModelGpuData& m = it->second; - if (m.instances.empty()) { m.finalized = true; return; } - // Sort instances by mesh_id (stable for deterministic ordering). - std::stable_sort(m.instances.begin(), m.instances.end(), - [](const InstanceCpu& a, const InstanceCpu& b) { - return a.mesh_id < b.mesh_id; - }); - - // Assign per-mesh contiguous range. + // Instance SSBO has been populated incrementally during streaming, so + // we don't re-upload here. What finalize still does: + // (1) compute per-mesh instance counts — used by stats and the sidecar + // round-trip (first_instance is unused by the visible-list renderer), + // (2) build the per-model BVH over instance world AABBs. for (auto& mesh : m.meshes) { mesh.first_instance = 0; mesh.instance_count = 0; } - uint32_t current = UINT32_MAX; - uint32_t run_start = 0; - for (uint32_t i = 0; i < m.instances.size(); ++i) { - uint32_t mid = m.instances[i].mesh_id; - if (mid != current) { - if (current != UINT32_MAX && current < m.meshes.size()) { - m.meshes[current].first_instance = run_start; - m.meshes[current].instance_count = i - run_start; - } - current = mid; - run_start = i; - } - } - if (current != UINT32_MAX && current < m.meshes.size()) { - m.meshes[current].first_instance = run_start; - m.meshes[current].instance_count = static_cast(m.instances.size()) - run_start; - } - - // Build GPU-layout array. - std::vector gpu(m.instances.size()); - for (size_t i = 0; i < m.instances.size(); ++i) { - const InstanceCpu& src = m.instances[i]; - InstanceGpu& dst = gpu[i]; - std::memcpy(dst.transform, src.transform, sizeof(dst.transform)); - dst.object_id = src.object_id; - dst.color_override_rgba8 = src.color_override_rgba8; - dst._pad0 = 0; - dst._pad1 = 0; + for (const auto& inst : m.instances) { + if (inst.mesh_id < m.meshes.size()) ++m.meshes[inst.mesh_id].instance_count; } - // Allocate and upload SSBO. - if (m.ssbo) gl_->glDeleteBuffers(1, &m.ssbo); - gl_->glCreateBuffers(1, &m.ssbo); - const size_t ssbo_bytes = gpu.size() * sizeof(InstanceGpu); - gl_->glNamedBufferStorage(m.ssbo, ssbo_bytes, gpu.data(), 0); - m.ssbo_instance_count = static_cast(gpu.size()); - buildBvhForModel(m, model_id); m.finalized = true; + const size_t ssbo_bytes = m.ssbo_instance_count * sizeof(InstanceGpu); qDebug("Model %u finalized: %zu verts, %zu meshes, %zu instances, %.1f MB vram " - "(vbo %.1f + ebo %.1f + ssbo %.1f)", + "(vbo %.1f + ebo %.1f + ssbo-used %.1f / %.1f cap)", model_id, size_t(m.vertex_count), m.meshes.size(), m.instances.size(), - (m.vbo_capacity + m.ebo_capacity + ssbo_bytes) / (1024.0*1024.0), + (m.vbo_capacity + m.ebo_capacity + m.ssbo_capacity) / (1024.0*1024.0), m.vbo_capacity / (1024.0*1024.0), m.ebo_capacity / (1024.0*1024.0), - ssbo_bytes / (1024.0*1024.0)); + ssbo_bytes / (1024.0*1024.0), + m.ssbo_capacity / (1024.0*1024.0)); } bool ViewportWindow::snapshotModel(uint32_t model_id, SidecarData& out) const { @@ -869,7 +879,7 @@ void ViewportWindow::render() { instanced_draws_ = 0; for (auto& [model_id, m] : models_gpu_) { - if (m.hidden || !m.finalized || !m.ssbo) continue; + if (m.hidden || !m.ssbo || m.ssbo_instance_count == 0) continue; cullAndUploadVisible(m, planes); if (visible_flat_.empty()) continue; @@ -913,7 +923,7 @@ void ViewportWindow::render() { size_t num_models = 0, num_hidden = 0; for (const auto& [mid, mm] : models_gpu_) { num_models++; - if (mm.hidden || !mm.finalized) { num_hidden++; continue; } + if (mm.hidden) { num_hidden++; continue; } total_obj += static_cast(mm.instances.size()); total_tri += mm.total_triangles; total_meshes += static_cast(mm.meshes.size()); @@ -965,7 +975,7 @@ void ViewportWindow::renderPickPass() { gl_->glUniformMatrix4fv(u_vp, 1, GL_FALSE, vp.constData()); for (auto& [model_id, m] : models_gpu_) { - if (m.hidden || !m.finalized || !m.ssbo) continue; + if (m.hidden || !m.ssbo || m.ssbo_instance_count == 0) continue; cullAndUploadVisible(m, planes); if (visible_flat_.empty()) continue; diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index 5a086fd774f..fd21bb76416 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -55,6 +55,7 @@ struct ModelGpuData { size_t vbo_capacity = 0; size_t ebo_capacity = 0; + size_t ssbo_capacity = 0; // bytes size_t vbo_used = 0; size_t ebo_used = 0; uint32_t vertex_count = 0; // total (across all meshes) @@ -148,6 +149,7 @@ class ViewportWindow : public QWindow { void setupVaoLayout(GLuint vao, GLuint vbo, GLuint ebo); bool growModelVbo(ModelGpuData& m, size_t needed_total); bool growModelEbo(ModelGpuData& m, size_t needed_total); + bool growModelSsbo(ModelGpuData& m, size_t needed_total); ModelGpuData& getOrCreateModel(uint32_t model_id); // Populate m.mesh_vis_first / mesh_vis_count and upload visible indices From 99ca61b133624d63c2f81432e0ed60f76706fcdf Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sun, 12 Apr 2026 20:34:47 +1000 Subject: [PATCH 18/37] Collapse per-mesh draws into glMultiDrawElementsIndirect Each visible model now issues a single glMultiDrawElementsIndirect call instead of one glDrawElementsInstancedBaseVertex per mesh. The CPU BVH cull populates an array of DrawElementsIndirectCommand records plus the flat visible-instance list, uploads both, and draws the whole model in one GL call. Vertex shaders switch from a uniform u_instance_offset to gl_BaseInstanceARB (ARB_shader_draw_parameters), so per-draw offset comes from the indirect command's baseInstance field. Draw-call counts for BIM scenes with hundreds of unique meshes drop from hundreds-per-frame to one-per-model, cutting driver overhead. This also sets up the plumbing for the follow-up compute-shader cull that will populate the indirect buffer entirely on-GPU. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/ViewportWindow.cpp | 112 +++++++++++++++++-------------- src/ifcviewer/ViewportWindow.h | 33 ++++++--- 2 files changed, 87 insertions(+), 58 deletions(-) diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index b70e2bf8324..b24ff7e3b3d 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -35,6 +35,8 @@ static const size_t INITIAL_EBO_SIZE = 32 * 1024 * 1024; // 32 MB static const size_t INITIAL_SSBO_SIZE = 4 * 1024 * 1024; // 4 MB (~52k instances) static const size_t MAX_BUFFER_SIZE = 4ull * 1024 * 1024 * 1024; // 4 GB +static_assert(sizeof(DrawElementsIndirectCommand) == 20, "indirect cmd must be 20 bytes"); + // ----------------------------------------------------------------------------- // Shaders // ----------------------------------------------------------------------------- @@ -55,6 +57,7 @@ static const size_t MAX_BUFFER_SIZE = 4ull * 1024 * 1024 * 1024; // 4 GB static const char* MAIN_VERTEX_SHADER = R"( #version 450 core +#extension GL_ARB_shader_draw_parameters : require layout(location = 0) in vec3 a_position; layout(location = 1) in vec3 a_normal; layout(location = 2) in vec4 a_color; @@ -74,7 +77,6 @@ layout(std430, binding = 1) readonly buffer VisibleIndices { }; uniform mat4 u_view_projection; -uniform uint u_instance_offset; uniform uint u_selected_id; out vec3 v_normal; @@ -83,7 +85,8 @@ flat out uint v_object_id; flat out uint v_selected; void main() { - uint iid = visible[u_instance_offset + uint(gl_InstanceID)]; + uint slot = uint(gl_BaseInstanceARB) + uint(gl_InstanceID); + uint iid = visible[slot]; InstanceRecord inst = instances[iid]; vec4 world = inst.transform * vec4(a_position, 1.0); gl_Position = u_view_projection * world; @@ -132,6 +135,7 @@ void main() { static const char* PICK_VERTEX_SHADER = R"( #version 450 core +#extension GL_ARB_shader_draw_parameters : require layout(location = 0) in vec3 a_position; struct InstanceRecord { @@ -149,12 +153,12 @@ layout(std430, binding = 1) readonly buffer VisibleIndices { }; uniform mat4 u_view_projection; -uniform uint u_instance_offset; flat out uint v_object_id; void main() { - uint iid = visible[u_instance_offset + uint(gl_InstanceID)]; + uint slot = uint(gl_BaseInstanceARB) + uint(gl_InstanceID); + uint iid = visible[slot]; InstanceRecord inst = instances[iid]; gl_Position = u_view_projection * inst.transform * vec4(a_position, 1.0); v_object_id = inst.object_id; @@ -302,6 +306,7 @@ ViewportWindow::~ViewportWindow() { if (m.ebo) gl_->glDeleteBuffers(1, &m.ebo); if (m.ssbo) gl_->glDeleteBuffers(1, &m.ssbo); if (m.visible_ssbo) gl_->glDeleteBuffers(1, &m.visible_ssbo); + if (m.indirect_buffer) gl_->glDeleteBuffers(1, &m.indirect_buffer); } if (axis_vao_) gl_->glDeleteVertexArrays(1, &axis_vao_); if (axis_vbo_) gl_->glDeleteBuffers(1, &axis_vbo_); @@ -630,6 +635,7 @@ void ViewportWindow::applyCachedModel(uint32_t model_id, SidecarData data) { if (existing->second.ebo) gl_->glDeleteBuffers(1, &existing->second.ebo); if (existing->second.ssbo) gl_->glDeleteBuffers(1, &existing->second.ssbo); if (existing->second.visible_ssbo) gl_->glDeleteBuffers(1, &existing->second.visible_ssbo); + if (existing->second.indirect_buffer) gl_->glDeleteBuffers(1, &existing->second.indirect_buffer); models_gpu_.erase(existing); } @@ -706,6 +712,7 @@ void ViewportWindow::resetScene() { if (m.ebo) gl_->glDeleteBuffers(1, &m.ebo); if (m.ssbo) gl_->glDeleteBuffers(1, &m.ssbo); if (m.visible_ssbo) gl_->glDeleteBuffers(1, &m.visible_ssbo); + if (m.indirect_buffer) gl_->glDeleteBuffers(1, &m.indirect_buffer); } models_gpu_.clear(); selected_object_id_ = 0; @@ -731,6 +738,7 @@ void ViewportWindow::removeModel(uint32_t model_id) { if (it->second.ebo) gl_->glDeleteBuffers(1, &it->second.ebo); if (it->second.ssbo) gl_->glDeleteBuffers(1, &it->second.ssbo); if (it->second.visible_ssbo) gl_->glDeleteBuffers(1, &it->second.visible_ssbo); + if (it->second.indirect_buffer) gl_->glDeleteBuffers(1, &it->second.indirect_buffer); models_gpu_.erase(it); } } @@ -806,26 +814,36 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] for (uint32_t i = 0; i < m.instances.size(); ++i) test_and_push(i); } - // Flatten into visible_flat_ and record per-mesh ranges. + // Flatten into visible_flat_ and build one DrawElementsIndirectCommand + // per non-empty mesh. visible_flat_.clear(); - m.mesh_vis_first.assign(m.meshes.size(), 0); - m.mesh_vis_count.assign(m.meshes.size(), 0); + indirect_scratch_.clear(); for (size_t mi = 0; mi < m.meshes.size(); ++mi) { - m.mesh_vis_first[mi] = static_cast(visible_flat_.size()); - m.mesh_vis_count[mi] = static_cast(visible_by_mesh_[mi].size()); + const auto& mesh = m.meshes[mi]; + const uint32_t vis_count = static_cast(visible_by_mesh_[mi].size()); + if (vis_count == 0 || mesh.index_count == 0) continue; + + DrawElementsIndirectCommand cmd; + cmd.count = mesh.index_count; + cmd.instanceCount = vis_count; + cmd.firstIndex = mesh.ebo_byte_offset / sizeof(uint32_t); + cmd.baseVertex = mesh.vbo_byte_offset / INSTANCED_VERTEX_STRIDE_BYTES; + cmd.baseInstance = static_cast(visible_flat_.size()); + indirect_scratch_.push_back(cmd); + visible_flat_.insert(visible_flat_.end(), visible_by_mesh_[mi].begin(), visible_by_mesh_[mi].end()); } + m.indirect_command_count = static_cast(indirect_scratch_.size()); - // Grow/create visible SSBO as needed. Keep at least 4 bytes so the binding - // is always valid even when nothing is visible. - size_t bytes = std::max(visible_flat_.size() * sizeof(uint32_t), - sizeof(uint32_t)); - if (m.visible_ssbo == 0 || m.visible_ssbo_capacity < bytes) { + // Upload visible list (keep binding alive even when empty). + size_t vis_bytes = std::max(visible_flat_.size() * sizeof(uint32_t), + sizeof(uint32_t)); + if (m.visible_ssbo == 0 || m.visible_ssbo_capacity < vis_bytes) { if (m.visible_ssbo) gl_->glDeleteBuffers(1, &m.visible_ssbo); size_t new_cap = m.visible_ssbo_capacity ? m.visible_ssbo_capacity : 4096; - while (new_cap < bytes) new_cap *= 2; + while (new_cap < vis_bytes) new_cap *= 2; gl_->glCreateBuffers(1, &m.visible_ssbo); gl_->glNamedBufferStorage(m.visible_ssbo, new_cap, nullptr, GL_DYNAMIC_STORAGE_BIT); m.visible_ssbo_capacity = new_cap; @@ -834,6 +852,19 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] gl_->glNamedBufferSubData(m.visible_ssbo, 0, visible_flat_.size() * sizeof(uint32_t), visible_flat_.data()); } + + // Upload indirect command buffer. + size_t ind_bytes = indirect_scratch_.size() * sizeof(DrawElementsIndirectCommand); + if (ind_bytes == 0) return; + if (m.indirect_buffer == 0 || m.indirect_capacity < ind_bytes) { + if (m.indirect_buffer) gl_->glDeleteBuffers(1, &m.indirect_buffer); + size_t new_cap = m.indirect_capacity ? m.indirect_capacity : 4096; + while (new_cap < ind_bytes) new_cap *= 2; + gl_->glCreateBuffers(1, &m.indirect_buffer); + gl_->glNamedBufferStorage(m.indirect_buffer, new_cap, nullptr, GL_DYNAMIC_STORAGE_BIT); + m.indirect_capacity = new_cap; + } + gl_->glNamedBufferSubData(m.indirect_buffer, 0, ind_bytes, indirect_scratch_.data()); } void ViewportWindow::updateCamera() { @@ -869,7 +900,6 @@ void ViewportWindow::render() { GLint u_vp = gl_->glGetUniformLocation(main_program_, "u_view_projection"); GLint u_light = gl_->glGetUniformLocation(main_program_, "u_light_dir"); GLint u_sel = gl_->glGetUniformLocation(main_program_, "u_selected_id"); - GLint u_inst_off = gl_->glGetUniformLocation(main_program_, "u_instance_offset"); gl_->glUniformMatrix4fv(u_vp, 1, GL_FALSE, vp.constData()); gl_->glUniform3f(u_light, 0.3f, 0.5f, 0.8f); gl_->glUniform1ui(u_sel, selected_object_id_); @@ -882,29 +912,23 @@ void ViewportWindow::render() { if (m.hidden || !m.ssbo || m.ssbo_instance_count == 0) continue; cullAndUploadVisible(m, planes); - if (visible_flat_.empty()) continue; + if (m.indirect_command_count == 0) continue; gl_->glBindVertexArray(m.vao); gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m.ssbo); gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m.visible_ssbo); - - for (size_t mi = 0; mi < m.meshes.size(); ++mi) { - const auto& mesh = m.meshes[mi]; - uint32_t vis_count = m.mesh_vis_count[mi]; - if (vis_count == 0 || mesh.index_count == 0) continue; - gl_->glUniform1ui(u_inst_off, m.mesh_vis_first[mi]); - gl_->glDrawElementsInstancedBaseVertex( - GL_TRIANGLES, - static_cast(mesh.index_count), - GL_UNSIGNED_INT, - reinterpret_cast(static_cast(mesh.ebo_byte_offset)), - static_cast(vis_count), - static_cast(mesh.vbo_byte_offset / INSTANCED_VERTEX_STRIDE_BYTES)); - visible_triangles_ += (mesh.index_count / 3) * vis_count; - visible_objects_ += vis_count; - ++instanced_draws_; + gl_->glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m.indirect_buffer); + gl_->glMultiDrawElementsIndirect( + GL_TRIANGLES, GL_UNSIGNED_INT, nullptr, + static_cast(m.indirect_command_count), 0); + + for (const auto& cmd : indirect_scratch_) { + visible_triangles_ += (cmd.count / 3) * cmd.instanceCount; + visible_objects_ += cmd.instanceCount; } + instanced_draws_ += m.indirect_command_count; } + gl_->glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); renderAxisGizmo(); @@ -971,33 +995,23 @@ void ViewportWindow::renderPickPass() { gl_->glUseProgram(pick_program_); GLint u_vp = gl_->glGetUniformLocation(pick_program_, "u_view_projection"); - GLint u_inst_off = gl_->glGetUniformLocation(pick_program_, "u_instance_offset"); gl_->glUniformMatrix4fv(u_vp, 1, GL_FALSE, vp.constData()); for (auto& [model_id, m] : models_gpu_) { if (m.hidden || !m.ssbo || m.ssbo_instance_count == 0) continue; cullAndUploadVisible(m, planes); - if (visible_flat_.empty()) continue; + if (m.indirect_command_count == 0) continue; gl_->glBindVertexArray(m.vao); gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m.ssbo); gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m.visible_ssbo); - - for (size_t mi = 0; mi < m.meshes.size(); ++mi) { - const auto& mesh = m.meshes[mi]; - uint32_t vis_count = m.mesh_vis_count[mi]; - if (vis_count == 0 || mesh.index_count == 0) continue; - gl_->glUniform1ui(u_inst_off, m.mesh_vis_first[mi]); - gl_->glDrawElementsInstancedBaseVertex( - GL_TRIANGLES, - static_cast(mesh.index_count), - GL_UNSIGNED_INT, - reinterpret_cast(static_cast(mesh.ebo_byte_offset)), - static_cast(vis_count), - static_cast(mesh.vbo_byte_offset / INSTANCED_VERTEX_STRIDE_BYTES)); - } + gl_->glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m.indirect_buffer); + gl_->glMultiDrawElementsIndirect( + GL_TRIANGLES, GL_UNSIGNED_INT, nullptr, + static_cast(m.indirect_command_count), 0); } + gl_->glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); gl_->glBindFramebuffer(GL_FRAMEBUFFER, 0); } diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index fd21bb76416..966761eeaf1 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -38,6 +38,15 @@ #include "InstancedGeometry.h" #include "SidecarCache.h" +// Matches GL_DRAW_INDIRECT_BUFFER layout for glMultiDrawElementsIndirect. +struct DrawElementsIndirectCommand { + uint32_t count; + uint32_t instanceCount; + uint32_t firstIndex; + uint32_t baseVertex; + uint32_t baseInstance; +}; + // Per-model GPU state for the instanced render path. // // VBO: local-coord interleaved verts (pos3 + normal3 + color1_packed) — 28 B. @@ -71,13 +80,15 @@ struct ModelGpuData { ModelBvh bvh; // Dynamic visible-instance index buffer (std430, binding = 1). - // Re-uploaded each frame from frame_visible_scratch_. + // Re-uploaded each frame from visible_flat_. GLuint visible_ssbo = 0; size_t visible_ssbo_capacity = 0; // bytes - // Per-mesh visible-list offset/count, rebuilt each frame. - std::vector mesh_vis_first; - std::vector mesh_vis_count; + // GL_DRAW_INDIRECT_BUFFER of DrawElementsIndirectCommand[], one per + // non-empty mesh. Re-uploaded each frame. + GLuint indirect_buffer = 0; + size_t indirect_capacity = 0; // bytes + uint32_t indirect_command_count = 0; // valid commands this frame bool finalized = false; bool hidden = false; @@ -152,8 +163,9 @@ class ViewportWindow : public QWindow { bool growModelSsbo(ModelGpuData& m, size_t needed_total); ModelGpuData& getOrCreateModel(uint32_t model_id); - // Populate m.mesh_vis_first / mesh_vis_count and upload visible indices - // to m.visible_ssbo. Uses BVH when available, else linear scan. + // Frustum-cull m's instances (BVH if available, else linear scan), + // build the per-mesh DrawElementsIndirectCommand array + flat visible + // list, and upload both to m.indirect_buffer / m.visible_ssbo. void cullAndUploadVisible(ModelGpuData& m, const float planes[6][4]); // Mouse interaction @@ -194,9 +206,12 @@ class ViewportWindow : public QWindow { // Reused scratch: visible-instance index lists per mesh, flattened into // `visible_flat_` for upload. Both live in the parent object to avoid - // per-frame allocation. - std::vector> visible_by_mesh_; - std::vector visible_flat_; + // per-frame allocation. indirect_scratch_ is the matching array of + // DrawElementsIndirectCommand records — forward-declared as bytes so + // the header doesn't need the struct definition. + std::vector> visible_by_mesh_; + std::vector visible_flat_; + std::vector indirect_scratch_; // Camera QVector3D camera_target_{0, 0, 0}; From 04733487347504b6cd609857d7fe36047961ef87 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sun, 12 Apr 2026 21:10:27 +1000 Subject: [PATCH 19/37] Two-sided lighting, rename misleading draw-count stat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs conflated as "weird colors": 1. Two-sided lighting. IFC placements often embed reflection matrices (mirrored families). Transforming a_normal by mat3(inst.transform) produces a normal pointing the wrong way on those instances, and max(n·L, 0) then clamps the surface to pure ambient — reads as dark / washed out. Use gl_FrontFacing to flip n in the fragment shader so both winding orientations shade correctly. The proper fix (ship an inverse-transpose normal matrix or a det-sign bit per instance) is still owed; that would unlock re-enabling GL_CULL_FACE for a big fragment- work win on closed solids. 2. Stats label "inst_draws" was counting indirect sub-draws, not actual GL draw calls — misleading since MDI collapses N sub- draws into one glMultiDrawElementsIndirect. Split into gl_draw_calls (real GL calls, = drawn-model count) and indirect_sub_draws (packed sub-commands). For a BIM model with 47k unique meshes at full view this now correctly reads "1 gl_draws (47092 sub)" rather than suggesting 47k driver dispatches. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/MainWindow.cpp | 6 ++++-- src/ifcviewer/ViewportWindow.cpp | 19 ++++++++++++++----- src/ifcviewer/ViewportWindow.h | 6 ++++-- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/ifcviewer/MainWindow.cpp b/src/ifcviewer/MainWindow.cpp index ceeedc8cbd4..8b63f3bdf68 100644 --- a/src/ifcviewer/MainWindow.cpp +++ b/src/ifcviewer/MainWindow.cpp @@ -41,13 +41,15 @@ MainWindow::MainWindow(QWidget* parent) connect(viewport_, &ViewportWindow::frameStatsUpdated, this, [this](const ViewportWindow::FrameStats& s) { if (!stats_label_->isVisible()) return; stats_label_->setText( - QString("%1 fps | %2 ms | %3/%4 obj | %5/%6 tri") + QString("%1 fps | %2 ms | %3/%4 obj | %5/%6 tri | %7 gl_draws (%8 sub)") .arg(s.fps, 0, 'f', 1) .arg(s.frame_time_ms, 0, 'f', 1) .arg(s.visible_objects) .arg(s.total_objects) .arg(s.visible_triangles) - .arg(s.total_triangles)); + .arg(s.total_triangles) + .arg(s.gl_draw_calls) + .arg(s.indirect_sub_draws)); }); connect(&AppSettings::instance(), &AppSettings::showStatsChanged, this, [this](bool show) { diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index b24ff7e3b3d..d58f192733a 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -123,7 +123,13 @@ uniform vec3 u_light_dir; out vec4 frag_color; void main() { + // Two-sided lighting: IFC placements frequently embed reflections + // (mirrored families), which flip triangle winding and invert the + // transformed normal. Taking abs(dot) — or equivalently flipping n + // based on gl_FrontFacing — makes both sides shade correctly + // regardless of winding / reflection state. vec3 n = normalize(v_normal); + if (!gl_FrontFacing) n = -n; float ndotl = max(dot(n, u_light_dir), 0.0); float ambient = 0.25; float diffuse = 0.75 * ndotl; @@ -906,7 +912,8 @@ void ViewportWindow::render() { visible_triangles_ = 0; visible_objects_ = 0; - instanced_draws_ = 0; + gl_draw_calls_ = 0; + indirect_sub_draws_ = 0; for (auto& [model_id, m] : models_gpu_) { if (m.hidden || !m.ssbo || m.ssbo_instance_count == 0) continue; @@ -926,7 +933,8 @@ void ViewportWindow::render() { visible_triangles_ += (cmd.count / 3) * cmd.instanceCount; visible_objects_ += cmd.instanceCount; } - instanced_draws_ += m.indirect_command_count; + indirect_sub_draws_ += m.indirect_command_count; + ++gl_draw_calls_; } gl_->glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); @@ -964,16 +972,17 @@ void ViewportWindow::render() { stats.total_triangles = total_tri; stats.visible_triangles = visible_triangles_; stats.unique_meshes = total_meshes; - stats.instanced_draws = instanced_draws_; + stats.gl_draw_calls = gl_draw_calls_; + stats.indirect_sub_draws = indirect_sub_draws_; emit frameStatsUpdated(stats); qDebug("[frame] %.1f fps %.2f ms obj %u/%u tri %u/%u " - "meshes %u inst_draws %u " + "meshes %u gl_draws %u sub_draws %u " "vram %.1f MB (vbo %.1f + ebo %.1f + ssbo %.1f) models %zu (%zu hidden)", last_fps_, 1000.0f / last_fps_, visible_objects_, total_obj, visible_triangles_, total_tri, - total_meshes, instanced_draws_, + total_meshes, gl_draw_calls_, indirect_sub_draws_, (total_vbo + total_ebo + total_ssbo) / (1024.0*1024.0), total_vbo / (1024.0*1024.0), total_ebo / (1024.0*1024.0), diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index 966761eeaf1..2c3019eb15d 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -136,7 +136,8 @@ class ViewportWindow : public QWindow { uint32_t total_triangles; uint32_t visible_triangles; uint32_t unique_meshes; - uint32_t instanced_draws; + uint32_t gl_draw_calls; // actual glMultiDrawElementsIndirect issues per frame + uint32_t indirect_sub_draws; // total commands packed into those indirect buffers }; signals: @@ -202,7 +203,8 @@ class ViewportWindow : public QWindow { // Per-frame stats uint32_t visible_triangles_ = 0; uint32_t visible_objects_ = 0; - uint32_t instanced_draws_ = 0; + uint32_t gl_draw_calls_ = 0; + uint32_t indirect_sub_draws_ = 0; // Reused scratch: visible-instance index lists per mesh, flattened into // `visible_flat_` for upload. Both live in the parent object to avoid From d1cdec2c54829f147e4c96f8b5d7376621948e19 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sun, 12 Apr 2026 22:06:58 +1000 Subject: [PATCH 20/37] Enable reorient-shells in geometry iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IFC files routinely have IfcConnectedFaceSets whose faces point inconsistently within the same shell — the result under per-vertex normals is dark inside-out patches, and under GL_CULL_FACE it's swiss-cheese. reorient-shells fixes the face winding at geometry generation time, which is the only place it can be fixed correctly; no shader trick can recover from a mesh whose triangles disagree among themselves. Off by default in IfcOpenShell because it adds iterator time, but we cache the result in the sidecar so it's a one-shot cost per file. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/GeometryStreamer.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/ifcviewer/GeometryStreamer.cpp b/src/ifcviewer/GeometryStreamer.cpp index 226fb0808ca..d3edcce19f8 100644 --- a/src/ifcviewer/GeometryStreamer.cpp +++ b/src/ifcviewer/GeometryStreamer.cpp @@ -270,6 +270,11 @@ void GeometryStreamer::run(const std::string& path, int num_threads) { settings.set("use-world-coords", false); settings.set("weld-vertices", false); settings.set("apply-default-materials", true); + // Off by default in IfcOpenShell — makes face winding consistent within + // each shell, which we need for GL_CULL_FACE and for per-vertex normals + // to shade a solid without dark inside-out patches. Costs some iterator + // time, but results are cached in the sidecar so it's a one-shot hit. + settings.set("reorient-shells", true); std::unique_ptr iterator; try { From 4729a094fcdcc8ca6cf51bb32cc9018a12193642 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sun, 12 Apr 2026 22:07:19 +1000 Subject: [PATCH 21/37] Backface culling with reflection-aware two-pass MDI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enables GL_CULL_FACE by default (user-toggleable in Settings) so closed solids skip shading their back halves. The catch is that IFC placements can contain reflections (mat4 with det<0 — mirrored families, symmetric instances). Naively culling would make every mirrored instance vanish because the rasterizer sees its screen-space winding as backwards. Fix: detect reflections at upload time via determinant sign, bucket visible instances into forward (det>=0) and reverse (det<0) per mesh during culling, and issue two glMultiDrawElementsIndirect calls per model with glFrontFace toggled CCW/CW between them. The indirect buffer is still one buffer — just split into a forward slice followed by a reverse slice, with m.indirect_forward_count recording the split. Vertex shader flips the normal when the transform has negative determinant, keeping lighting correct on mirrored instances. The fragment shader keeps the gl_FrontFacing fallback as a safety net when culling is disabled (e.g. for files with open shells). Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/AppSettings.cpp | 14 +++ src/ifcviewer/AppSettings.h | 5 + src/ifcviewer/SettingsWindow.cpp | 8 ++ src/ifcviewer/SettingsWindow.h | 1 + src/ifcviewer/ViewportWindow.cpp | 176 ++++++++++++++++++++++++------- src/ifcviewer/ViewportWindow.h | 13 ++- 6 files changed, 174 insertions(+), 43 deletions(-) diff --git a/src/ifcviewer/AppSettings.cpp b/src/ifcviewer/AppSettings.cpp index af1edfa36f6..ff8d3bb3f1b 100644 --- a/src/ifcviewer/AppSettings.cpp +++ b/src/ifcviewer/AppSettings.cpp @@ -25,6 +25,7 @@ namespace { constexpr const char* kGeometryLibraryKey = "geometry/library"; constexpr const char* kGeometryLibraryDefault = "hybrid-cgal-simple-opencascade"; constexpr const char* kShowStatsKey = "viewport/show_stats"; +constexpr const char* kBackfaceCullingKey = "viewport/backface_culling"; } AppSettings& AppSettings::instance() { @@ -58,14 +59,27 @@ void AppSettings::setShowStats(bool value) { emit showStatsChanged(value); } +bool AppSettings::backfaceCulling() const { + return backface_culling_; +} + +void AppSettings::setBackfaceCulling(bool value) { + if (backface_culling_ == value) return; + backface_culling_ = value; + persist(); + emit backfaceCullingChanged(value); +} + void AppSettings::load() { QSettings settings; geometry_library_ = settings.value(kGeometryLibraryKey, kGeometryLibraryDefault).toString(); show_stats_ = settings.value(kShowStatsKey, false).toBool(); + backface_culling_ = settings.value(kBackfaceCullingKey, true).toBool(); } void AppSettings::persist() { QSettings settings; settings.setValue(kGeometryLibraryKey, geometry_library_); settings.setValue(kShowStatsKey, show_stats_); + settings.setValue(kBackfaceCullingKey, backface_culling_); } diff --git a/src/ifcviewer/AppSettings.h b/src/ifcviewer/AppSettings.h index f70062475c6..8b38c61a338 100644 --- a/src/ifcviewer/AppSettings.h +++ b/src/ifcviewer/AppSettings.h @@ -37,9 +37,13 @@ class AppSettings : public QObject { bool showStats() const; void setShowStats(bool value); + bool backfaceCulling() const; + void setBackfaceCulling(bool value); + signals: void geometryLibraryChanged(const QString& value); void showStatsChanged(bool value); + void backfaceCullingChanged(bool value); private: AppSettings(); @@ -48,6 +52,7 @@ class AppSettings : public QObject { QString geometry_library_; bool show_stats_ = false; + bool backface_culling_ = true; }; #endif // APPSETTINGS_H diff --git a/src/ifcviewer/SettingsWindow.cpp b/src/ifcviewer/SettingsWindow.cpp index c4ebddc650e..69e1f025b80 100644 --- a/src/ifcviewer/SettingsWindow.cpp +++ b/src/ifcviewer/SettingsWindow.cpp @@ -44,6 +44,12 @@ void SettingsWindow::setupUi() { show_stats_check_ = new QCheckBox(this); form->addRow("Show Performance Stats", show_stats_check_); + backface_culling_check_ = new QCheckBox(this); + backface_culling_check_->setToolTip( + "Skip triangles facing away from the camera. Big FPS win on " + "closed solids; disable if you see holes in open geometry."); + form->addRow("Backface Culling", backface_culling_check_); + auto* button_box = new QDialogButtonBox( QDialogButtonBox::Ok | QDialogButtonBox::Cancel, this); @@ -65,10 +71,12 @@ void SettingsWindow::showEvent(QShowEvent* event) { void SettingsWindow::syncFromSettings() { geometry_library_edit_->setText(AppSettings::instance().geometryLibrary()); show_stats_check_->setChecked(AppSettings::instance().showStats()); + backface_culling_check_->setChecked(AppSettings::instance().backfaceCulling()); } void SettingsWindow::onAccepted() { AppSettings::instance().setGeometryLibrary(geometry_library_edit_->text()); AppSettings::instance().setShowStats(show_stats_check_->isChecked()); + AppSettings::instance().setBackfaceCulling(backface_culling_check_->isChecked()); accept(); } diff --git a/src/ifcviewer/SettingsWindow.h b/src/ifcviewer/SettingsWindow.h index ea55252682e..967938b4a23 100644 --- a/src/ifcviewer/SettingsWindow.h +++ b/src/ifcviewer/SettingsWindow.h @@ -43,6 +43,7 @@ private slots: QLineEdit* geometry_library_edit_ = nullptr; QCheckBox* show_stats_check_ = nullptr; + QCheckBox* backface_culling_check_ = nullptr; }; #endif diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index d58f192733a..84778f3f2f7 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -19,6 +19,8 @@ #include "ViewportWindow.h" +#include "AppSettings.h" + #include #include #include @@ -91,10 +93,17 @@ void main() { vec4 world = inst.transform * vec4(a_position, 1.0); gl_Position = u_view_projection * world; - // Rotate the normal by the upper-3x3 of the transform. For the vast - // majority of BIM placements this is a rigid rotation (+ uniform scale), - // so we skip the inverse-transpose. - v_normal = normalize(mat3(inst.transform) * a_normal); + // Rotate the normal by the upper-3x3 of the transform. BIM placements + // are overwhelmingly rigid rotations (+ optional uniform scale + + // optional reflection), so we skip the full inverse-transpose but do + // need to flip the normal when the transform contains a reflection, + // otherwise mirrored instances shade as if inside-out. The same + // determinant sign is what GL_CULL_FACE uses to decide winding, so + // keeping them in agreement means backface culling is safe to enable. + mat3 rot = mat3(inst.transform); + vec3 n = rot * a_normal; + if (determinant(rot) < 0.0) n = -n; + v_normal = normalize(n); vec4 baked = a_color; if (inst.color_override != 0u) { @@ -123,11 +132,11 @@ uniform vec3 u_light_dir; out vec4 frag_color; void main() { - // Two-sided lighting: IFC placements frequently embed reflections - // (mirrored families), which flip triangle winding and invert the - // transformed normal. Taking abs(dot) — or equivalently flipping n - // based on gl_FrontFacing — makes both sides shade correctly - // regardless of winding / reflection state. + // v_normal already has the reflection flip applied in the vertex + // shader. When backface culling is off, open shells let us see the + // "wrong" side of a face — flip based on gl_FrontFacing so both + // sides light correctly. When culling is on this branch is always + // true and has no effect. vec3 n = normalize(v_normal); if (!gl_FrontFacing) n = -n; float ndotl = max(dot(n, u_light_dir), 0.0); @@ -230,6 +239,17 @@ static GLuint linkProgram(QOpenGLFunctions_4_5_Core* gl, GLuint vert, GLuint fra // ----------------------------------------------------------------------------- +// Determinant of the upper-left 3x3 of a column-major mat4 stored as 16 floats. +// Sign tells us whether the transform contains a reflection, which is what +// decides which glFrontFace winding to draw the instance with. +static bool transformIsReflected(const float t[16]) { + const float det = + t[0] * (t[5] * t[10] - t[9] * t[6]) + - t[4] * (t[1] * t[10] - t[9] * t[2]) + + t[8] * (t[1] * t[6] - t[5] * t[2]); + return det < 0.0f; +} + static bool aabbInFrustum(const float aabb_min[3], const float aabb_max[3], const float planes[6][4]) { for (int p = 0; p < 6; ++p) { @@ -344,6 +364,19 @@ void ViewportWindow::initGL() { gl_->glEnable(GL_DEPTH_TEST); gl_->glEnable(GL_MULTISAMPLE); gl_->glClearColor(0.18f, 0.20f, 0.22f, 1.0f); + gl_->glCullFace(GL_BACK); + if (AppSettings::instance().backfaceCulling()) gl_->glEnable(GL_CULL_FACE); + else gl_->glDisable(GL_CULL_FACE); + + // Hot-toggle cull state when the setting changes. Queued so we touch GL + // state only when render() is about to run. + connect(&AppSettings::instance(), &AppSettings::backfaceCullingChanged, + this, [this](bool on) { + if (!gl_initialized_ || !gl_) return; + context_->makeCurrent(this); + if (on) gl_->glEnable(GL_CULL_FACE); + else gl_->glDisable(GL_CULL_FACE); + }); gl_initialized_ = true; frame_clock_.start(); @@ -552,6 +585,7 @@ void ViewportWindow::uploadInstanceChunk(const InstanceChunk& chunk) { std::memcpy(inst.world_aabb_min, chunk.world_aabb_min, sizeof(inst.world_aabb_min)); std::memcpy(inst.world_aabb_max, chunk.world_aabb_max, sizeof(inst.world_aabb_max)); m.instances.push_back(inst); + m.instance_reflected.push_back(transformIsReflected(inst.transform) ? 1 : 0); // Append the GPU record to the instance SSBO so the model is drawable // immediately, without waiting for finalizeModel. The visible-list @@ -693,6 +727,13 @@ void ViewportWindow::applyCachedModel(uint32_t model_id, SidecarData data) { } m.ssbo_instance_count = static_cast(gpu.size()); + // Recompute the reflection flag from each instance's transform — the + // sidecar only caches InstanceCpu, not the parallel reflection flags. + m.instance_reflected.resize(m.instances.size()); + for (size_t i = 0; i < m.instances.size(); ++i) { + m.instance_reflected[i] = transformIsReflected(m.instances[i].transform) ? 1 : 0; + } + buildBvhForModel(m, model_id); m.finalized = true; @@ -783,15 +824,25 @@ uint32_t ViewportWindow::pickObjectAt(int x, int y) { } void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6][4]) { - // Ensure per-mesh scratch sized. - if (visible_by_mesh_.size() < m.meshes.size()) visible_by_mesh_.resize(m.meshes.size()); - for (size_t i = 0; i < m.meshes.size(); ++i) visible_by_mesh_[i].clear(); + // Per-mesh scratch, split by winding: fwd = non-reflected (CCW in screen + // space), rev = reflected (CW in screen space). Splitting lets the draw + // pass toggle glFrontFace once between two MDI calls so GL_CULL_FACE does + // the right thing for both. + if (visible_by_mesh_fwd_.size() < m.meshes.size()) visible_by_mesh_fwd_.resize(m.meshes.size()); + if (visible_by_mesh_rev_.size() < m.meshes.size()) visible_by_mesh_rev_.resize(m.meshes.size()); + for (size_t i = 0; i < m.meshes.size(); ++i) { + visible_by_mesh_fwd_[i].clear(); + visible_by_mesh_rev_[i].clear(); + } auto test_and_push = [&](uint32_t inst_idx) { const InstanceCpu& inst = m.instances[inst_idx]; if (!aabbInFrustum(inst.world_aabb_min, inst.world_aabb_max, planes)) return; - if (inst.mesh_id < visible_by_mesh_.size()) - visible_by_mesh_[inst.mesh_id].push_back(inst_idx); + if (inst.mesh_id >= m.meshes.size()) return; + const bool reflected = inst_idx < m.instance_reflected.size() + && m.instance_reflected[inst_idx] != 0; + if (reflected) visible_by_mesh_rev_[inst.mesh_id].push_back(inst_idx); + else visible_by_mesh_fwd_[inst.mesh_id].push_back(inst_idx); }; if (!m.bvh.nodes.empty()) { @@ -820,27 +871,34 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] for (uint32_t i = 0; i < m.instances.size(); ++i) test_and_push(i); } - // Flatten into visible_flat_ and build one DrawElementsIndirectCommand - // per non-empty mesh. + // Flatten fwd-slice first, then rev-slice, into visible_flat_. Build + // matching DrawElementsIndirectCommands; commands for the fwd slice fill + // [0, indirect_forward_count), rev fills [indirect_forward_count, end). visible_flat_.clear(); indirect_scratch_.clear(); - for (size_t mi = 0; mi < m.meshes.size(); ++mi) { - const auto& mesh = m.meshes[mi]; - const uint32_t vis_count = static_cast(visible_by_mesh_[mi].size()); - if (vis_count == 0 || mesh.index_count == 0) continue; - - DrawElementsIndirectCommand cmd; - cmd.count = mesh.index_count; - cmd.instanceCount = vis_count; - cmd.firstIndex = mesh.ebo_byte_offset / sizeof(uint32_t); - cmd.baseVertex = mesh.vbo_byte_offset / INSTANCED_VERTEX_STRIDE_BYTES; - cmd.baseInstance = static_cast(visible_flat_.size()); - indirect_scratch_.push_back(cmd); - - visible_flat_.insert(visible_flat_.end(), - visible_by_mesh_[mi].begin(), - visible_by_mesh_[mi].end()); - } + + auto emit_slice = [&](std::vector>& by_mesh) { + for (size_t mi = 0; mi < m.meshes.size(); ++mi) { + const auto& mesh = m.meshes[mi]; + const uint32_t vis_count = static_cast(by_mesh[mi].size()); + if (vis_count == 0 || mesh.index_count == 0) continue; + + DrawElementsIndirectCommand cmd; + cmd.count = mesh.index_count; + cmd.instanceCount = vis_count; + cmd.firstIndex = mesh.ebo_byte_offset / sizeof(uint32_t); + cmd.baseVertex = mesh.vbo_byte_offset / INSTANCED_VERTEX_STRIDE_BYTES; + cmd.baseInstance = static_cast(visible_flat_.size()); + indirect_scratch_.push_back(cmd); + + visible_flat_.insert(visible_flat_.end(), + by_mesh[mi].begin(), by_mesh[mi].end()); + } + }; + + emit_slice(visible_by_mesh_fwd_); + m.indirect_forward_count = static_cast(indirect_scratch_.size()); + emit_slice(visible_by_mesh_rev_); m.indirect_command_count = static_cast(indirect_scratch_.size()); // Upload visible list (keep binding alive even when empty). @@ -915,6 +973,10 @@ void ViewportWindow::render() { gl_draw_calls_ = 0; indirect_sub_draws_ = 0; + // Start each frame with CCW-is-front; the two-pass draw below flips + // back and forth. Harmless when culling is off. + gl_->glFrontFace(GL_CCW); + for (auto& [model_id, m] : models_gpu_) { if (m.hidden || !m.ssbo || m.ssbo_instance_count == 0) continue; @@ -925,16 +987,34 @@ void ViewportWindow::render() { gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m.ssbo); gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m.visible_ssbo); gl_->glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m.indirect_buffer); - gl_->glMultiDrawElementsIndirect( - GL_TRIANGLES, GL_UNSIGNED_INT, nullptr, - static_cast(m.indirect_command_count), 0); + + const uint32_t fwd = m.indirect_forward_count; + const uint32_t rev = m.indirect_command_count - fwd; + // Forward pass: non-reflected instances, standard CCW winding. + if (fwd > 0) { + gl_->glFrontFace(GL_CCW); + gl_->glMultiDrawElementsIndirect( + GL_TRIANGLES, GL_UNSIGNED_INT, nullptr, + static_cast(fwd), 0); + ++gl_draw_calls_; + } + // Reverse pass: reflected instances — their world-space winding is + // flipped, so telling GL the front is CW keeps cull-back working. + if (rev > 0) { + gl_->glFrontFace(GL_CW); + gl_->glMultiDrawElementsIndirect( + GL_TRIANGLES, GL_UNSIGNED_INT, + reinterpret_cast(fwd * sizeof(DrawElementsIndirectCommand)), + static_cast(rev), 0); + ++gl_draw_calls_; + gl_->glFrontFace(GL_CCW); + } for (const auto& cmd : indirect_scratch_) { visible_triangles_ += (cmd.count / 3) * cmd.instanceCount; visible_objects_ += cmd.instanceCount; } indirect_sub_draws_ += m.indirect_command_count; - ++gl_draw_calls_; } gl_->glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); @@ -1006,6 +1086,8 @@ void ViewportWindow::renderPickPass() { GLint u_vp = gl_->glGetUniformLocation(pick_program_, "u_view_projection"); gl_->glUniformMatrix4fv(u_vp, 1, GL_FALSE, vp.constData()); + gl_->glFrontFace(GL_CCW); + for (auto& [model_id, m] : models_gpu_) { if (m.hidden || !m.ssbo || m.ssbo_instance_count == 0) continue; @@ -1016,9 +1098,23 @@ void ViewportWindow::renderPickPass() { gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m.ssbo); gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m.visible_ssbo); gl_->glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m.indirect_buffer); - gl_->glMultiDrawElementsIndirect( - GL_TRIANGLES, GL_UNSIGNED_INT, nullptr, - static_cast(m.indirect_command_count), 0); + + const uint32_t fwd = m.indirect_forward_count; + const uint32_t rev = m.indirect_command_count - fwd; + if (fwd > 0) { + gl_->glFrontFace(GL_CCW); + gl_->glMultiDrawElementsIndirect( + GL_TRIANGLES, GL_UNSIGNED_INT, nullptr, + static_cast(fwd), 0); + } + if (rev > 0) { + gl_->glFrontFace(GL_CW); + gl_->glMultiDrawElementsIndirect( + GL_TRIANGLES, GL_UNSIGNED_INT, + reinterpret_cast(fwd * sizeof(DrawElementsIndirectCommand)), + static_cast(rev), 0); + gl_->glFrontFace(GL_CCW); + } } gl_->glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); gl_->glBindFramebuffer(GL_FRAMEBUFFER, 0); diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index 2c3019eb15d..1bbc44c97c4 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -71,7 +71,12 @@ struct ModelGpuData { uint32_t total_triangles = 0; std::vector meshes; - std::vector instances; // unsorted until finalize + std::vector instances; // unsorted + // 1:1 with instances[] — true when the instance transform has + // det < 0 (a reflection). Reflected instances need their + // triangle winding treated as reversed so GL_CULL_FACE culls + // the correct side. + std::vector instance_reflected; uint32_t ssbo_instance_count = 0; // Per-instance world AABB + BVH (built at finalize). The BVH is the @@ -88,7 +93,8 @@ struct ModelGpuData { // non-empty mesh. Re-uploaded each frame. GLuint indirect_buffer = 0; size_t indirect_capacity = 0; // bytes - uint32_t indirect_command_count = 0; // valid commands this frame + uint32_t indirect_command_count = 0; // total valid commands this frame + uint32_t indirect_forward_count = 0; // first N are CCW-winding draws bool finalized = false; bool hidden = false; @@ -211,7 +217,8 @@ class ViewportWindow : public QWindow { // per-frame allocation. indirect_scratch_ is the matching array of // DrawElementsIndirectCommand records — forward-declared as bytes so // the header doesn't need the struct definition. - std::vector> visible_by_mesh_; + std::vector> visible_by_mesh_fwd_; + std::vector> visible_by_mesh_rev_; std::vector visible_flat_; std::vector indirect_scratch_; From f32f471af37ddcb7ef6998d5addc3cfa1244476f Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Sun, 12 Apr 2026 23:16:38 +1000 Subject: [PATCH 22/37] Rewrite README for instancing pipeline and refocus Phase 3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous README described a pre-instancing world (32-byte world- coord vertices with per-vertex object_id, ObjectDrawInfo structs, EBO reordering after BVH build, and a Phase 3 plan built around moving draw submission to the GPU). Most of that is either gone or already solved: - Vertices are now 28 B local-coord; per-instance transforms live in an SSBO read through a visible-index SSBO and gl_BaseInstanceARB. - ObjectDrawInfo is replaced by MeshInfo + InstanceCpu + InstanceGpu. - No EBO reorder on BVH build — the BVH is over instance AABBs and the mesh/EBO layout is orthogonal. - Draw-call submission is already one glMultiDrawElementsIndirect per model; the old Phase 3 goal is met. New content worth keeping: - GPU instancing section documents the mesh/instance/visible/indirect buffer contract the whole renderer hangs off of. - Reflection-aware two-pass draw is documented (det<0 placements, forward/reverse slice split, glFrontFace toggle). - reorient-shells and backface culling are called out as correctness + perf levers with their tradeoffs. - Phase 3 is rewritten around the actual bottleneck surfaced by profiling: per-frame glNamedBufferSubData stalls on the visible and indirect buffers. Includes the diagnostic methodology (empty- screen jump to 60 fps, window/MSAA invariance, upload-comment-out experiment) so future-me remembers why this is the next step. - 3A (persistent mapped ring buffers, near-term) and 3B (GPU-side compute cull, longer-term) split out with scope estimates. - Roadmap updated: instancing / MDI / reflections / reorient-shells / backface cull all ticked; 3A surfaced as the next open item. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/README.md | 757 +++++++++++++++++----------------------- 1 file changed, 322 insertions(+), 435 deletions(-) diff --git a/src/ifcviewer/README.md b/src/ifcviewer/README.md index d0122d63c83..4966d27d5ef 100644 --- a/src/ifcviewer/README.md +++ b/src/ifcviewer/README.md @@ -1,75 +1,115 @@ # IfcViewer -A high-performance native IFC viewer built on IfcOpenShell's C++ geometry engine with a Qt6 interface and OpenGL 4.5 rendering. +A high-performance native IFC viewer built on IfcOpenShell's C++ geometry +engine with a Qt6 interface and OpenGL 4.5 rendering. ## Architecture ``` -+-------------------------------------------+ -| Qt6 Application (MainWindow) | -| +----------+ +--------------------------+| -| | Element | | 3D Viewport || -| | Tree | | (QWindow + OpenGL 4.5) || -| | (per- | | || -| | model) | | Per-model VAO/VBO/EBO || -| +----------+ | glMultiDrawElements || -| | Property | | BVH frustum culling || -| | Table | | GPU pick pass || -| +----------+ +--------------------------+| -| | Status / Progress / Stats | -+-------------------------------------------+ - ^ ^ - | | - element metadata UploadChunks / Sidecar - | | -+-------------------------------------------+ -| GeometryStreamer (one per loaded model) | -| IfcGeom::Iterator with N threads | -| (models loaded sequentially) | -+-------------------------------------------+ ++---------------------------------------------------+ +| Qt6 Application (MainWindow) | +| +----------+ +----------------------------------+| +| | Element | | 3D Viewport || +| | Tree | | (QWindow + OpenGL 4.5 Core) || +| | (per- | | || +| | model) | | Per-model: VAO/VBO/EBO || +| +----------+ | instance SSBO || +| | Property | | visible SSBO || +| | Table | | indirect buffer || +| +----------+ | glMultiDrawElementsIndirect || +| | Status / Progress / Stats | ++---------------------------------------------------+ + ^ ^ + | | + element metadata MeshChunk / InstanceChunk / Sidecar + | | ++---------------------------------------------------+ +| GeometryStreamer (one per loaded model) | +| IfcGeom::Iterator with N threads | +| Dedups representations -> MeshChunk | +| Emits one InstanceChunk per placement | ++---------------------------------------------------+ ``` ### Key design decisions -- **QWindow viewport** embedded via `QWidget::createWindowContainer()`. This gives us a raw native surface for OpenGL, bypassing `QOpenGLWidget`'s compositor overhead. -- **Per-model GPU buffers**: each loaded model gets its own VAO/VBO/EBO. No shared buffer, no cross-model copies on growth. Removing a model frees its GPU memory immediately. -- **Interleaved vertex format**: position (3 floats) + normal (3 floats) + object ID (1 float, bitcast uint32) + color (RGBA8 packed into 1 float) = 32 bytes per vertex. -- **Progressive GPU upload**: bulk sidecar loads allocate empty GPU buffers, then stream data in 48 MB chunks per frame. VBO uploads first (no objects visible), then EBO (objects appear progressively as their index range lands). The viewport stays interactive throughout — you can orbit already-loaded models while new ones stream in. -- **Non-blocking sidecar loading**: sidecar files are read on a background thread. The heavy disk I/O (potentially gigabytes) never blocks the render loop. Only the final GPU upload and tree population happen on the main thread. -- **BVH frustum culling**: per-model BVH trees cull entire subtrees of objects in one frustum test, reducing per-frame cost from O(N) to O(log N). Falls back to linear scan during progressive upload; BVH activates once the model is fully loaded. -- **GPU object picking**: a second render pass writes object IDs to an R32UI framebuffer. Click reads back one pixel. No CPU-side raycasting. -- **Multi-model support**: multiple IFC files can be loaded simultaneously. Each model gets its own `GeometryStreamer` (owning the `ifcopenshell::file` for property lookup). Models are loaded sequentially. Per-model visibility toggle and removal are supported. -- **Multi-threaded tessellation**: `IfcGeom::Iterator` runs on a background thread and internally parallelizes geometry conversion across all CPU cores. -- **Non-blocking streaming**: the iterator emits `UploadChunk` signals via Qt's queued connection. The main thread uploads to the GPU without blocking iteration. -- **World coordinates**: geometry is emitted in world space (`use-world-coords=true`) so no per-object transform matrices are needed on the GPU. +- **QWindow viewport** embedded via `QWidget::createWindowContainer()`. Gives + us a raw native surface for OpenGL, bypassing `QOpenGLWidget`'s compositor + overhead. +- **GPU instancing as the central pillar.** IFC models are dominated by + repeated geometry — identical doors, windows, studs, pipes placed at + different transforms. IfcOpenShell's iterator surfaces representation + identity, so we upload each unique mesh exactly once and keep per-placement + data (transform, object id, optional colour override) in a separate SSBO. + For real projects this collapses tens of millions of triangles of duplicate + vertex data into a few hundred MB of unique meshes. +- **Per-model GPU buffers**: each loaded model gets its own + VAO/VBO/EBO/instance-SSBO/visible-SSBO/indirect-buffer. No cross-model + growth copies. Removing a model frees its GPU memory immediately. +- **Local-coordinate vertex format (28 B):** position (3 floats) + normal + (3 floats) + packed RGBA8 colour (1 uint). The per-instance transform is + applied in the vertex shader via an SSBO lookup. No world-baked vertex data. +- **Multi-draw indirect:** every frame the CPU builds a flat list of visible + instance indices and one `DrawElementsIndirectCommand` per non-empty mesh, + then issues a single `glMultiDrawElementsIndirect` per model. 50k visible + instances across 8k unique meshes collapse to one driver-side command + submission per model. +- **BVH frustum culling over instances**: per-model BVH trees cull whole + subtrees of placements with one frustum test. Falls back to a linear scan + during progressive upload and for very small models (< 32 instances). +- **Reflection-aware two-pass draw:** IFC placements can have negative- + determinant transforms (mirrored families). These flip the screen-space + winding of their triangles, which would make them vanish under + `GL_CULL_FACE`. The cull pass buckets visible instances into forward + (det ≥ 0) and reverse (det < 0) slices and the renderer issues two MDI + calls per model with `glFrontFace` toggled between them. +- **`reorient-shells` enabled in the iterator:** makes face winding + consistent within a shell at geometry-gen time — the only place this can + actually be fixed. Without it, files with inside-out faces produce dark + patches and swiss-cheese under backface culling. Costs iterator time but + is cached in the sidecar. +- **Progressive rendering during streaming:** the viewport is drawable + before `finalizeModel()`. Instances are pushed to the SSBO one at a time + via `glNamedBufferSubData` as they arrive, and the linear-scan cull path + handles them until the BVH is built. Orbit and pan remain interactive + through load. +- **Non-blocking sidecar loading**: sidecars are read on a background + thread; only the final GPU upload touches the main thread. +- **GPU object picking**: a second render pass writes object IDs into an + R32UI framebuffer. Click reads back one pixel. No CPU-side raycasting. +- **Multi-model support**: multiple IFCs can be loaded simultaneously. + Each gets its own `GeometryStreamer` (which owns the `ifcopenshell::file` + for property lookup). Models load sequentially. Per-model + hide/show/remove. ### Files | File | Purpose | |------|---------| -| `main.cpp` | Application entry point, GL 4.5 surface format, CLI argument parsing | -| `MainWindow.h/cpp` | Qt main window: multi-model project management, element tree, property table, status bar | -| `ViewportWindow.h/cpp` | OpenGL 4.5 Core renderer: shaders, buffer management, camera, frustum culling, BVH traversal, picking | -| `GeometryStreamer.h/cpp` | Background geometry processing: loads IFC, runs iterator, emits chunks (one per model) | -| `BvhAccel.h/cpp` | BVH construction (median-split), per-model trees, EBO reordering | -| `SidecarCache.h/cpp` | Raw binary `.ifcview` sidecar read/write | -| `AppSettings.h/cpp` | Persisted application preferences (geometry library, show stats) | -| `SettingsWindow.h/cpp` | Settings dialog UI | +| `main.cpp` | Application entry, GL 4.5 surface format, CLI argument parsing | +| `MainWindow.h/cpp` | Qt main window: multi-model project, element tree, properties, status | +| `ViewportWindow.h/cpp` | OpenGL 4.5 Core renderer: shaders, buffers, camera, culling, MDI draw, picking | +| `GeometryStreamer.h/cpp` | Background iterator runner; emits `MeshChunk` + `InstanceChunk` | +| `InstancedGeometry.h` | Shared structs: `MeshInfo`, `InstanceCpu`, `InstanceGpu`, chunk records | +| `BvhAccel.h/cpp` | Median-split BVH builder; operates on instance world-AABBs | +| `SidecarCache.h/cpp` | Raw binary `.ifcview` (v4) sidecar read/write | +| `AppSettings.h/cpp` | Persisted preferences (geometry library, stats overlay, backface culling) | +| `SettingsWindow.h/cpp` | Settings dialog | | `CMakeLists.txt` | Build configuration | ## Dependencies -- **Qt6** (Core, Gui, Widgets) -- **OpenGL 4.5** (GL_ARB_direct_state_access) - available on Windows and Linux; macOS will need a Vulkan/MoltenVK backend (not yet implemented) -- **IfcOpenShell C++ libraries** (IfcParse, IfcGeom, and their dependencies: Open CASCADE, Boost, Eigen3, optionally CGAL) +- **Qt6** (Core, Gui, Widgets, OpenGL) +- **OpenGL 4.5** with `GL_ARB_direct_state_access` and + `GL_ARB_shader_draw_parameters` — available on Windows and Linux. macOS + will need a Vulkan/MoltenVK backend (not yet implemented; macOS caps out + at GL 4.1). +- **IfcOpenShell C++ libraries** (IfcParse, IfcGeom, and their + dependencies: Open CASCADE, Boost, Eigen3, optionally CGAL). ## Building -IfcViewer is built as part of the IfcOpenShell CMake project. You do not need to build everything - disable the targets you don't need. - -### Minimal build (IfcViewer only) - -From the repository root: +IfcViewer is part of the IfcOpenShell CMake project. From the repo root: ```sh mkdir build && cd build @@ -89,25 +129,13 @@ cmake ../cmake \ make -j$(nproc) IfcViewer ``` -This builds only IfcParse, IfcGeom (with geometry kernels), and IfcViewer itself. All other targets (IfcConvert, Python bindings, serializers, etc.) are skipped. - If Qt6 is not in a standard location, pass `-DQT_DIR=/path/to/qt6`. -### Full build with IfcViewer enabled - -```sh -cmake ../cmake -DBUILD_IFCVIEWER=ON -make -j$(nproc) -``` - ## Usage ```sh -# Open one or more files from the command line ./IfcViewer arch.ifc struct.ifc mep.ifc - -# Or use File -> Add Files from the menu (supports multiselect) -./IfcViewer +./IfcViewer # then File -> Add Files ``` ### Controls @@ -117,449 +145,308 @@ make -j$(nproc) | Middle mouse drag | Orbit camera | | Shift + middle mouse drag | Pan camera | | Scroll wheel | Zoom | -| Left click | Select object (highlights in viewport and tree) | +| Left click | Select object | -### Keyboard shortcuts +### Keyboard | Key | Action | |-----|--------| | Ctrl+O | Add files | | Ctrl+Q | Quit | -## Performance Strategy - -The viewer targets smooth orbiting at 60 fps on models up to 1 million IFC objects. -Rendering performance is addressed in three phases. Each phase builds on the -previous one, and the system is designed so that smaller models never pay for -optimizations they don't need. - -### Phase 1: Per-Object Frustum Culling (CPU) - -**Status:** Implemented. - -The simplest win: don't draw what's off screen. - -#### Data model - -During `uploadChunk()`, the viewport records a small metadata struct for every -object that enters the GPU buffers: - -```cpp -struct ObjectDrawInfo { - uint32_t index_offset; // byte offset into the model's EBO - uint32_t index_count; // number of indices (triangles * 3) - uint32_t model_id; // which model this object belongs to - float aabb_min[3]; // world-space axis-aligned bounding box - float aabb_max[3]; // (computed from vertex positions at upload time) -}; -``` - -This costs 32 bytes per object. For 1M objects that's ~32 MB of CPU-side -metadata — negligible next to the vertex data. - -#### Frustum extraction - -Each frame, before drawing, six clip planes are extracted from the -view-projection matrix (`VP = proj * view`). The standard Griess-Hartmann -method pulls them directly from the matrix rows: - -``` -left = VP[3] + VP[0] -right = VP[3] - VP[0] -bottom = VP[3] + VP[1] -top = VP[3] - VP[1] -near = VP[3] + VP[2] -far = VP[3] - VP[2] -``` - -Each plane is stored as (a, b, c, d) and normalized so that -`a*x + b*y + c*z + d` gives the signed distance from the plane. - -#### AABB-frustum test +### Settings -For each object, the AABB is tested against all six planes using the -"p-vertex / n-vertex" method: +- **Geometry Library** — kernel string passed to IfcOpenShell (default + `hybrid-cgal-simple-opencascade`). +- **Show Performance Stats** — overlay FPS / object / triangle / draw + counts in the status bar. +- **Backface Culling** — `GL_CULL_FACE` on closed solids. Default on. + Disable if a model uses open shells and you see missing faces. -- For each plane, find the AABB corner most in the direction of the plane - normal (the p-vertex). -- If the p-vertex is on the negative side of the plane, the entire AABB is - outside the frustum → cull. -- If any plane culls the object, skip it. - -This test is conservative: it never culls a visible object, but may -occasionally keep an invisible one (when the AABB straddles a frustum corner). -That's fine — false positives just cost a few extra triangles. +## Performance Strategy -#### Drawing visible objects +The viewer targets smooth orbiting at 60 fps on real-world multi-discipline +BIM projects (a "real job" being ~50 models, several million placements, +hundreds of millions of rasterised triangles when everything is in view). -The surviving objects' `(index_count, index_offset)` pairs are passed to -`glMultiDrawElements()` in a single call. This replaces the previous single -`glDrawElements()` that drew everything. The GPU processes only the index -ranges that survived the frustum test. +Rendering performance has evolved in phases. Each builds on the previous, +and smaller models never pay for optimisations they don't need. -Alternatively, for the pick pass (which runs less frequently), the same -visibility list is reused — objects culled from the main pass are also culled -from picking. +### Phase 1 — Per-object Frustum Culling -#### Performance characteristics +**Status:** implemented (and still the fallback for small models / during +streaming). -| Metric | Value | -|--------|-------| -| Per-object cost | ~6 dot products + 6 comparisons per frame | -| 50k objects | ~0.3 ms on a modern CPU core | -| 500k objects | ~3 ms (starts to matter at 60 fps) | -| 1M objects | ~6 ms (too expensive — need phase 3) | -| Memory overhead | 32 bytes/object | -| Load-time overhead | Near zero (AABB computed during existing upload) | +Six view-frustum planes are extracted from the view-projection matrix each +frame. Each instance's world AABB is tested with the p-vertex / n-vertex +method (one dot product + one compare per plane, 6 planes). -Phase 1 is sufficient for models up to ~100k objects. Beyond that, the CPU-side -frustum test becomes a measurable fraction of the frame budget, motivating -phase 3. +Surviving instance indices are written into a per-mesh bucket, then +flattened into a single `uint[]` (the "visible SSBO", binding = 1) and +accompanied by one `DrawElementsIndirectCommand` per non-empty mesh. +One `glMultiDrawElementsIndirect` call per model draws everything. -### Phase 2: BVH Acceleration (optional, for large models) +Cost: ~6 dot products per instance per frame. Fine up to ~100 k instances +per frame; above that the linear scan shows up in profiles, motivating +Phase 2. -**Status:** Implemented. +### Phase 2 — BVH Acceleration + Sidecar Cache -For models exceeding ~100 objects, a bounding volume hierarchy (BVH) groups -nearby objects into a binary tree and culls entire subtrees in one frustum -test. This reduces the number of AABB-frustum tests from O(N_objects) to -O(log N) in the best case (camera zoomed into a corner) and gives a constant -overhead for the common case where most of the model is on screen. +**Status:** implemented. -A BVH was chosen over an octree because BIM data is spatially non-uniform — -dense MEP risers in one zone, sparse open atriums in another. An octree -subdivides space uniformly, wasting nodes on empty regions and creating deep -chains in dense ones. A BVH adapts its splits to the actual object -distribution, producing balanced trees regardless of density variation. +For models exceeding ~32 instances, a bounding volume hierarchy groups +nearby placements into a binary tree and culls entire subtrees with a +single frustum test. This reduces per-frame work from O(N) to O(log N) in +the best case (camera zoomed to a corner) and remains well under 1 ms for +100 k instances in the worst case (everything on screen). -#### When the BVH activates +A BVH was chosen over an octree because BIM data is spatially non-uniform +— dense MEP risers in one zone, sparse open atria in another. An octree +subdivides space uniformly, wasting nodes on empty regions and creating +deep chains in dense ones. A BVH adapts its splits to the actual +placement distribution. -The BVH is **optional and non-disruptive**. Until it is built, phase 1's -linear scan handles all culling. The rendering loop checks for an active BVH -and falls back to the linear scan for any model that doesn't have one. +#### Activation -The BVH activates in one of two ways: +The BVH is optional and non-disruptive. Until it is built, the Phase 1 +linear scan handles culling. The renderer checks for a BVH per model and +falls back to the scan for any model that doesn't have one. -1. **Sidecar cache exists**: If a `.ifcview` file is found next to the `.ifc` - file, the BVH is loaded from it instantly (raw memory read, no parsing). - The model uses BVH culling from the first frame after loading. -2. **Automatic build**: After streaming finishes, a background thread builds - the BVH from the per-object AABBs already computed in phase 1. Until it - completes, phase 1 culling handles visibility. On completion, the render - thread picks up the BVH on the next frame. The sidecar is written for - future loads. +It activates in one of two ways: -Models with fewer than 32 objects skip the BVH entirely — the overhead of tree -traversal is worse than a linear scan at that scale. +1. **Sidecar hit** — the `.ifcview` file next to the `.ifc` is found and + valid; its instance data is uploaded and the BVH rebuilt on the fly + from the restored AABBs (cheap — `< 100 ms` for 100 k placements). +2. **After streaming** — `finalizeModel()` builds the BVH synchronously + once all chunks are in (instances already live on the GPU, so there's + no EBO re-sort to do). The sidecar is written afterwards. -#### BVH node layout +Models under 32 instances skip the BVH. -Each node is 32 bytes, so two nodes fit in one 64-byte cache line: +#### BVH node layout (32 B, two per cache line) ```cpp struct BvhNode { - float aabb_min[3]; // world-space bounding box (12 bytes) - float aabb_max[3]; // (12 bytes) - uint32_t right_or_first; // interior: right child index; leaf: first object index (4 bytes) - uint16_t count; // 0 = interior node; >0 = leaf with this many objects (2 bytes) - uint16_t axis; // split axis for interior (0=x, 1=y, 2=z); unused for leaf (2 bytes) + float aabb_min[3]; // 12 B + float aabb_max[3]; // 12 B + uint32_t right_or_first; // interior: right child index; leaf: first item index + uint16_t count; // 0 = interior, >0 = leaf + uint16_t axis; // 0/1/2 for interior; unused for leaf }; ``` -Interior nodes store the right child index; the left child is always the -immediately next node in the array (implicit in pre-order DFS layout, no -pointer needed). Leaf nodes reference a contiguous range in a sorted -object-index array. - -The BVH is stored as a flat `std::vector` in pre-order DFS layout. -This means a depth-first traversal (which is what frustum culling does) reads -memory sequentially, maximizing prefetch and cache-line utilization. - -#### Build algorithm: object-median split - -1. Compute the centroid of each object's AABB. -2. Find the longest axis of the current node's bounding box. -3. Use `std::nth_element` to partition objects at the median centroid on that - axis. This is O(n) — no full sort needed. -4. Recurse on each half. Terminate when the node contains ≤ 8 objects (leaf). -5. Write nodes into the flat array in pre-order DFS. +Left child is always the next node (pre-order DFS). Leaf items are +indices into the per-model `instances` array; the parallel `bvh_items[]` +array carries the world AABBs. -Total build time is O(n log n). For 100k objects this is well under 100 ms on -a single core. +#### Build: object-median split -SAH (Surface Area Heuristic) is the gold standard for ray-tracing BVHs, but -for frustum culling — where we test 6 planes and early-out entire subtrees — -the quality difference vs. median split is negligible. Median split is simpler -and produces reliably balanced trees. +1. Compute centroid of each item's AABB. +2. Pick the longest axis of the node's AABB. +3. `std::nth_element` partitions at the median on that axis — O(n). +4. Recurse until a leaf holds ≤ 8 items. -#### Frustum traversal +O(n log n) total. No SAH — for frustum culling (6-plane tests, early +subtree reject) the quality difference vs median is negligible. -The traversal uses an explicit stack on the C++ stack (no heap allocation, -no recursion): +#### Traversal: stack-based, no recursion ``` -stack[64] = {0} // start at root; depth 64 handles billions of objects +stack[64] = { 0 } // root while stack not empty: node = nodes[stack.pop()] - if node AABB outside frustum: continue // cull entire subtree + if node.aabb outside frustum: continue if leaf: - for each object in node: - if object AABB in frustum: emit to visible list + for each item in node: + if item.aabb in frustum: emit to visible list else: - push right child, push left child // left processed first (DFS) + push right child, push left child // left processed first (DFS) ``` -When the camera is zoomed into a corner of the model, the traversal skips -large portions of the tree after testing only a handful of interior nodes. -When zoomed out to see everything, the traversal visits all leaves but the -overhead of the interior-node tests is small relative to the leaf work. +Depth 64 is enough for billions of items on any balanced tree. The stack +is on the C++ stack, zero per-frame allocation. -#### Per-model BVH +#### Sidecar format (`.ifcview`, v4) -Each loaded model gets its own BVH. During frustum culling, the outer loop -iterates over models (skipping hidden/removed ones); the inner loop traverses -that model's BVH. This means hiding or removing a model is free — just skip -its BVH, no tree modification needed. +Raw memory dump, Blender-`.blend`-style — no serialisation, no parsing. +Stores everything needed to skip the `IfcGeom::Iterator` pass: -```cpp -struct ModelBvh { - uint32_t model_id; - std::vector nodes; // flat BVH node array - std::vector object_indices; // indices into object_draw_info_ -}; +``` +SidecarHeader (magic "IFVW", version, endian, ...) +uint64_t source_file_size +uint32_t + float[] vertex data (7 floats × N_verts, local coords) +uint32_t + uint32_t[] index data (mesh-local) +uint32_t + MeshInfo[] per-unique-mesh metadata (48 B each) +uint32_t + InstanceCpu[] per-placement records (transform + AABB + ids) +uint32_t + PackedElementInfo[] element tree records +uint32_t + char[] string table ``` -#### EBO re-sorting - -For BVH culling to maximise GPU cache performance, the EBO is re-sorted so -that objects in the same BVH leaf are contiguous. This happens via **deferred -compaction**: - -1. During initial load, geometry uploads in iterator order (fast first frame, - phase 1 culling active). -2. After the BVH build completes on the background thread: - a. Walk the BVH leaves in DFS order. - b. For each object in each leaf, copy its index data to a new EBO buffer, - updating `ObjectDrawInfo::index_offset` accordingly. - c. Package the reordered EBO + updated draw info as a `BvhBuildResult`. -3. The render thread picks up the result on the next frame: one - `glNamedBufferSubData` call to re-upload the EBO, then swap in the new - draw info and activate the BVH. One frame of stutter, bounded by EBO - upload time (~5 ms for 32 MB). - -#### Async build and render-thread handoff - -The BVH build must not stall the render loop: - -1. `buildBvhAsync()` snapshots `object_draw_info_` under the upload mutex, - then launches a `std::thread`. -2. The thread builds the BVH and reordered EBO, then stores the result in a - `pending_bvh_result_` pointer under a separate mutex. -3. At the top of each `render()` call, `applyBvhResult()` checks for a - pending result. If found, it re-uploads the EBO (requires GL context), - swaps the draw info, and activates the BVH. -4. Until the BVH is ready, phase 1's linear scan runs every frame as before. - -#### Preprocessed sidecar format (`.ifcview`) - -The sidecar is a raw memory dump (Blender `.blend`-style) — no serialization -format, no parsing. It stores everything needed to display the model without -re-tessellating: vertex data, index data, per-object metadata, element tree -info, and the BVH. Loading is just `fread` into vectors → GPU upload → -render. The expensive `IfcGeom::Iterator` tessellation is skipped entirely. - -The IFC file is still parsed on demand (in background) for detailed property -lookup; the sidecar provides the basic properties (name, type, GUID) -immediately. +Staleness check: `source_file_size` vs actual file size. Mismatched → +reject and rebuild. Endianness marker rejects cross-arch caches. -``` -SidecarHeader (16 bytes: magic, version, endian, reserved) -uint64_t source_file_size +### GPU Instancing pipeline (the central pillar) -uint32_t + float[] vertex data (interleaved, 8 floats/vertex) -uint32_t + uint32_t[] index data (global indices, ready for EBO) -uint32_t + ObjectDrawInfo[] per-object draw metadata -uint32_t + PackedElementInfo[] element tree records (fixed-size) -uint32_t + char[] string table (concatenated UTF-8: guid, name, type) - -uint32_t num_bvh_models -per model: - uint32_t model_id - uint32_t + BvhNode[] BVH node array - uint32_t + uint32_t[] object indices -``` +Everything above plugs into a single data-flow, worth documenting on its +own because it's what makes the whole thing fast. -Staleness check: `source_file_size` is compared against the actual IFC file -size. If mismatched, the sidecar is stale and is rebuilt. This is cheap and -sufficient for a local cache (no hash computation on multi-GB files). +Per-model state on the GPU: -Endianness: if the marker reads back as `0x01020304`, the file was written on -the same architecture — just `fread` the structs directly. Otherwise, reject -the sidecar and rebuild. +| Buffer | Contents | Lifetime | +|--------|----------|----------| +| `VBO` | Interleaved local-coord vertex data (28 B/vert). One range per unique representation. | Grow-on-demand during streaming; static after finalize. | +| `EBO` | Mesh-local uint32 indices. One range per unique representation. | Same. | +| `SSBO` (binding 0) | `InstanceGpu[]` (80 B each: mat4 transform, object_id, color_override, pad). | Appended during streaming, static after finalize. | +| `visible SSBO` (binding 1) | `uint32[]` — flat list of visible instance indices, ordered by mesh, uploaded each frame. | Rewritten every frame. | +| Draw-indirect buffer | `DrawElementsIndirectCommand[]` — one per non-empty mesh, uploaded each frame. | Rewritten every frame. | -#### Performance characteristics +Draw command: -| Metric | Value | -|--------|-------| -| BVH build time (100k objects) | < 100 ms (single-threaded, background) | -| Per-frame traversal (100k objects, 50% visible) | ~0.1 ms | -| Per-frame traversal (100k objects, 5% visible) | ~0.02 ms | -| Memory overhead | 32 bytes/node + 4 bytes/object index (~1.5× object count) | -| EBO reorder (one-time) | 1–5 ms upload for 32 MB EBO | -| Sidecar file size | ~same as geometry data (vertices + indices + metadata) | -| Sidecar read time | bounded by disk I/O (~500 ms for 640 MB, ~2 s for 2.8 GB from NVMe) | -| GPU upload time | progressive: ~48 MB/frame (~1 s for 2.8 GB at 60 fps, non-blocking) | - -#### Spatial coherence bonus - -Beyond culling, BVH-leaf-sorted EBOs improve GPU cache performance. When the -GPU rasterizes a leaf's triangles, the vertices are close together in the VBO, -so the post-transform vertex cache hits more often. This can yield 10–20% -rasterization speedup even when nothing is culled (e.g. zoomed out to see the -whole model). - -### Phase 3: GPU-Driven Indirect Draw - -For models with 500k+ objects, even tile-level CPU culling is fast, but the -real bottleneck shifts to draw call submission. Phase 3 moves all per-frame -visibility decisions to the GPU via compute shaders and indirect draw commands. - -#### How it works - -Phase 3 builds on the BVH from phase 2. It does not replace the BVH — it -moves the per-frame traversal to the GPU. - -1. **Upload phase** (once, at load time): - - Per-leaf AABBs from the BVH are uploaded to a GPU SSBO (`leaf_aabbs`). - - One `DrawElementsIndirectCommand` per BVH leaf is written to an indirect - draw buffer: - ```c - struct DrawElementsIndirectCommand { - uint count; // leaf's total index count - uint instanceCount; // 1 - uint firstIndex; // offset into EBO (from BVH leaf order) - uint baseVertex; // 0 (indices are global) - uint baseInstance; // leaf_id (available in shader via gl_DrawID) - }; - ``` - - A "template" copy of the indirect buffer is kept so the compute shader - can reset culled commands each frame without re-uploading from CPU. - -2. **Cull phase** (every frame, on the GPU): - - The CPU uploads 6 frustum plane vec4s as a uniform or small UBO. - - A compute shader dispatches `ceil(N_leaves / 64)` workgroups: - ```glsl - layout(local_size_x = 64) in; - - void main() { - uint leaf_id = gl_GlobalInvocationID.x; - if (leaf_id >= leaf_count) return; - - // Copy from template (resets any previously zeroed commands) - commands[leaf_id] = template_commands[leaf_id]; - - // Frustum test - if (!aabb_vs_frustum(leaf_aabbs[leaf_id], frustum_planes)) { - commands[leaf_id].count = 0; // culled: GPU skips zero-count draws - } - } - ``` - - A memory barrier ensures the indirect buffer is visible to the draw stage. - -3. **Draw phase** (every frame): - - One call: `glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, - nullptr, N_leaves, 0)`. - - The GPU reads the indirect buffer, skips tiles with `count == 0`, and - draws the rest. Zero CPU-side per-object or per-tile work. - -#### What the CPU does per frame - -1. Upload 6 vec4 frustum planes (96 bytes). -2. Dispatch one compute shader. -3. Issue one `glMultiDrawElementsIndirect`. -4. Swap buffers. - -That's it. The CPU frame time is essentially constant regardless of model size. - -#### Future extensions (enabled by this architecture) - -Once the compute-based cull pass exists, it's straightforward to add: - -- **Hierarchical-Z occlusion culling**: render a coarse depth buffer from the - previous frame, then test BVH leaf AABBs against it in the compute shader. - Leaves fully behind closer geometry get culled. This handles interior-heavy - BIM models well (most rooms are occluded from any given viewpoint). -- **Distance-based LOD**: the compute shader can select different index ranges - (coarse vs. fine tessellation) per leaf based on distance to camera. -- **Contribution culling**: leaves whose screen-space projection is below a - pixel threshold get `count = 0`. Removes distant small objects. - -#### Performance characteristics +```c +struct DrawElementsIndirectCommand { + uint32_t count; // mesh.index_count + uint32_t instanceCount; // visible-list length for this mesh + uint32_t firstIndex; // mesh.ebo_byte_offset / 4 + uint32_t baseVertex; // mesh.vbo_byte_offset / 28 + uint32_t baseInstance; // offset into the flat visible-index array +}; +``` -| Metric | Value | -|--------|-------| -| CPU per-frame work | ~0.01 ms (constant, independent of model size) | -| GPU compute dispatch | ~0.02 ms for 2k leaves | -| Draw call overhead | 1 indirect multi-draw call | -| GPU memory overhead | ~48 bytes/leaf (AABB SSBO) + 20 bytes/leaf (indirect commands) × 2 (template + live) | -| Total for 2k leaves | ~176 KB GPU memory | -| Implementation complexity | High (compute shaders, SSBOs, memory barriers, indirect draw) | +The vertex shader reads `visible[gl_BaseInstanceARB + gl_InstanceID]` to +get the real instance id, then indexes into the instance SSBO: -#### When to use +```glsl +uint slot = uint(gl_BaseInstanceARB) + uint(gl_InstanceID); +uint iid = visible[slot]; +InstanceRecord inst = instances[iid]; +gl_Position = u_view_projection * inst.transform * vec4(a_position, 1.0); +``` -Phase 3 is worthwhile when: +`gl_BaseInstanceARB` requires `GL_ARB_shader_draw_parameters`, which is +available on all GL-4.6-capable drivers. -- The model has 500k+ objects (CPU frustum testing > 3 ms). -- Smooth 60 fps orbiting is required during interaction. -- The GPU has compute shader support (OpenGL 4.3+, which is guaranteed since - the viewer requires 4.5). +Reflection handling: at upload time we store a parallel +`instance_reflected[]` byte array (1 if the transform's upper-3×3 has +det < 0). The cull pass produces two flat visible-list slices — fwd +(non-reflected) first, rev (reflected) after — concatenated into one +buffer. The renderer issues MDI twice: fwd with `glFrontFace(GL_CCW)`, +rev with `glFrontFace(GL_CW)`. `GL_CULL_FACE` stays on and does the +right thing in both passes. -For models under 100k objects, phase 1 alone is sufficient. For 100k–500k, -phase 2 (BVH) keeps CPU culling well under 1 ms. Phase 3 is the final step -that makes the CPU frame time constant. +### Current bottleneck — Phase 3 as designed is already obsolete -### Summary +The original README's Phase 3 ("GPU-driven indirect draw") described +moving draw submission to the GPU via compute. In the meantime, GPU +instancing and MDI made the CPU-side draw cost essentially free (10 +`glMultiDrawElementsIndirect` calls per frame for 10 models). **That +goal is met.** The real Phase 3 problem is different. -``` -Model size Active phases CPU cull cost Draw calls -───────────── ────────────── ────────────── ────────── -< 10k objects Phase 1 ~0.06 ms 1 multi-draw -10k–100k Phase 1 ~0.6 ms 1 multi-draw -100k–500k Phase 1 + 2 ~0.01 ms 1 multi-draw -500k–1M+ Phase 1 + 2 + 3 ~0 (GPU) 1 indirect multi-draw -``` +#### Diagnosed on a 10-model / 379 k-instance / 128 M-triangle scene -The load path: +Observed numbers (everything in view, no movement): + +| Metric | Value | +|--------|-------| +| FPS | 10 | +| Frame time | ~100 ms | +| gl_draws | 10 | +| Sub-draws packed in indirect buffers | 67 037 | + +Elimination experiments: + +| Probe | Result | Interpretation | +|-------|--------|----------------| +| Camera off-screen (nothing visible) | → 60 fps | GPU is idle; CPU path is cheap | +| Resize window to 1/4 area | no change | Not fragment/raster bound | +| `setSamples(4)` → `setSamples(1)` | no change | Not MSAA/resolve bound | +| Comment out the two `glNamedBufferSubData` in `cullAndUploadVisible` | → 60 fps (screen blank) | **The per-frame uploads are the bottleneck.** | + +So the bottleneck is two `glNamedBufferSubData` calls per model per +frame uploading ~1.5 MB (visible list) + ~1.3 MB (indirect buffer). +3 MB/frame / 60 fps = 180 MB/s — trivial for the bus, but `glNamedBufferSubData` +against a buffer the GPU is still reading forces the driver to stall +the CPU or orphan/reallocate the backing store, and we're hitting that +on 20 buffers per frame. + +### Phase 3 (proposed) — Eliminate per-frame upload stalls + +Two ways to attack it, in ascending order of effort: + +#### 3A. Persistent mapped ring buffers (near-term) + +Allocate each of the per-frame-written buffers with +`glBufferStorage(GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_WRITE_BIT)` +at 3× the needed size. Keep one `void*` from `glMapBufferRange` forever. +Each frame, write the CPU-side data into slice `frame % 3` and bind +that slice via `glBindBufferRange`. The GPU reads slice N−1 while the +CPU writes slice N — no driver sync, no orphan, no stall. + +Scope: ~80 lines across `ModelGpuData` + `cullAndUploadVisible` + +binding in `render()` / `renderPickPass()`. No algorithmic change, no +shader change. Expected result on the stats scene: 10 fps → ~60 fps +(the measured ceiling once uploads are removed). + +#### 3B. GPU-side culling (longer-term) + +Push culling itself to the GPU. A compute shader reads the +`InstanceCpu`-equivalent SSBO + frustum planes, builds the visible list +and indirect commands in-place via atomics. Zero CPU→GPU per-frame +bytes. Also lays the foundation for occlusion and contribution culling +(both want to run on the GPU anyway, with access to the depth buffer +or screen-space projection). + +Scope: compute shader + atomic counter + BVH-traversal-on-GPU (or a +linear compute scan — simpler and still gains most of the win since +traversal isn't the bottleneck once upload is gone). Bigger change; +worth doing after 3A is measured, because 3A may be enough for a long +while. + +### Planned follow-ups (post-Phase-3) + +- **Screen-space contribution cull.** Reject instances whose projected + screen-space AABB is below a pixel threshold. Cheap CPU-side filter + that eliminates distant MEP detail. Big win on unfiltered plant-room + scenes. +- **Hierarchical-Z occlusion culling.** Render large occluders, build a + depth pyramid, test BVH / instance AABBs against it. In dense BIM, + most geometry is behind other geometry from any given viewpoint; this + is historically a 3–10× reduction in drawn instances. +- **Distance / contribution LOD.** Unique meshes pre-simplified at load + time; compute shader selects an LOD per instance per frame based on + screen-space size. Same visible-SSBO plumbing, different `firstIndex`. +- **Mesh shaders / meshlets.** Ceiling-raising but overkill until the + above are exhausted. + +## Summary table ``` -open(model.ifc): - ├─ sidecar exists (.ifcview)? - │ ├─ yes: background thread reads sidecar file (non-blocking I/O) - │ │ → allocate per-model VAO/VBO/EBO (empty, exact size) - │ │ → progressive GPU upload: 48 MB/frame VBO, then EBO - │ │ → objects appear as EBO chunks land - │ │ → BVH activates once fully loaded - │ │ → viewport interactive throughout - │ └─ no: stream from IFC via GeometryStreamer - │ → uploadChunk() appends to per-model buffers (immediately drawable) - │ → phase 1 linear-scan culling active from first chunk - │ → on completion: background BVH build, re-sort EBO, save .ifcview - └─ rendering (per model, per frame): - ├─ phase 3 available? → compute cull + indirect multi-draw - ├─ BVH available? → BVH traversal + glMultiDrawElements - └─ else / progressive → linear scan of active objects + glMultiDrawElements +Scene size Bottleneck Fix +----------- ---------- --- +< 100k instances CPU cull scan Phase 1 only (current) +100k–500k CPU cull scan BVH (Phase 2) — done +500k+ across many models visible/indirect Phase 3A mapped rings + buffer uploads (next) +--- --- --- +multi-million + occlusion-heavy fragment / overdraw HiZ occlusion + LOD ``` ## Roadmap -- [x] Material color support (per-vertex RGBA8) +- [x] Material colour support (per-vertex RGBA8) - [x] Per-model GPU buffers (VAO/VBO/EBO per model, no cross-model copies) -- [x] Per-object frustum culling (phase 1) -- [x] BVH acceleration with per-model trees (phase 2) -- [x] Raw binary `.ifcview` sidecar cache (full geometry + BVH, Blender-style) +- [x] Per-object frustum culling (Phase 1) +- [x] BVH acceleration with per-model trees (Phase 2) +- [x] Raw binary `.ifcview` sidecar cache - [x] Non-blocking sidecar loading (background thread I/O) -- [x] Progressive GPU upload (48 MB/frame chunked VBO/EBO transfer) -- [ ] GPU-driven indirect draw (phase 3) +- [x] Progressive GPU upload (VBO/EBO growth + streaming-time instance appends) +- [x] GPU instancing (unique meshes + per-placement SSBO) +- [x] `glMultiDrawElementsIndirect` draw path +- [x] Reflection-aware two-pass draw for mirrored placements +- [x] Backface culling (user-toggleable, default on) +- [x] `reorient-shells` enabled in iterator +- [ ] **Phase 3A — persistent-mapped ring buffers for visible + indirect** (next) +- [ ] Phase 3B — GPU-side compute-shader culling +- [ ] Screen-space contribution culling - [ ] Hierarchical-Z occlusion culling - [ ] Distance-based LOD selection - [ ] Vulkan/MoltenVK backend for macOS From 8360cd3ca8d1990dd502eaf9c15f225a52b92916 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Mon, 13 Apr 2026 09:33:21 +1000 Subject: [PATCH 23/37] Pivot Phase 3: diagnose as draw-bound, not upload-bound MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Earlier probes pointed at per-frame glNamedBufferSubData uploads as the bottleneck (60 fps when those two calls were commented out). That was a false reading — zeroing the uploads also emptied the indirect buffer, so MDI drew nothing. "No upload" and "no draw" were indistinguishable. Two new diagnostic env vars in render() isolate the real costs: IFC_SKIP_MDI=1 keep cull + upload + binds, skip only the MDI draws. Gives 62 fps with everything else running, confirming the non-draw path fits in ~16 ms. IFC_MAX_SUBDRAWS=N cap each MDI's drawcount. 67k -> 30k sub-draws saves 0 ms, confirming sub-draw count itself is not the bottleneck; the long tail of sub-draws carries ~no triangles. On a GTX 1650 with 128 M triangles in view, nvidia-smi sits at 95 % GPU util and FPS scales with triangle work, not sub-draw count. The card is simply rasterising at ~850 M tri/s. No CPU-side or upload trick recovers it. Revised Phase 3 is therefore shedding triangles, not bytes: 3A screen-space contribution culling (next) 3B LOD 3C HiZ occlusion 3D GPU-side compute culling README Phase 3 section rewritten around the diagnosis, including the false lead, so future work doesn't re-tread the upload path. The aborted staging+resident ring-buffer implementation was reverted (the uncommitted working tree is gone — pure glNamedBufferSubData retained for the visible + indirect buffers, which we now know is fine). Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/README.md | 187 +++++++++++++++++++------------ src/ifcviewer/ViewportWindow.cpp | 33 +++++- 2 files changed, 145 insertions(+), 75 deletions(-) diff --git a/src/ifcviewer/README.md b/src/ifcviewer/README.md index 4966d27d5ef..4af736bfad8 100644 --- a/src/ifcviewer/README.md +++ b/src/ifcviewer/README.md @@ -333,101 +333,148 @@ buffer. The renderer issues MDI twice: fwd with `glFrontFace(GL_CCW)`, rev with `glFrontFace(GL_CW)`. `GL_CULL_FACE` stays on and does the right thing in both passes. -### Current bottleneck — Phase 3 as designed is already obsolete +### Current bottleneck — draw-bound, not upload-bound The original README's Phase 3 ("GPU-driven indirect draw") described moving draw submission to the GPU via compute. In the meantime, GPU instancing and MDI made the CPU-side draw cost essentially free (10 `glMultiDrawElementsIndirect` calls per frame for 10 models). **That -goal is met.** The real Phase 3 problem is different. +goal is met.** The real ceiling lies elsewhere, and it took a couple of +bad hypotheses to pin down. -#### Diagnosed on a 10-model / 379 k-instance / 128 M-triangle scene +#### Profiled scene -Observed numbers (everything in view, no movement): +10 models / 379 k instances / 128 M triangles, everything in view, no +camera motion, GTX 1650 (PCIe dGPU, 4 GB VRAM): | Metric | Value | |--------|-------| -| FPS | 10 | -| Frame time | ~100 ms | +| FPS | 6.7 | +| Frame time | 149 ms | | gl_draws | 10 | | Sub-draws packed in indirect buffers | 67 037 | -Elimination experiments: +`nvidia-smi` reports 95 % GPU utilisation during render — the GPU is +the thing that's pinned. + +#### False lead: "the per-frame uploads are the bottleneck" -| Probe | Result | Interpretation | -|-------|--------|----------------| -| Camera off-screen (nothing visible) | → 60 fps | GPU is idle; CPU path is cheap | -| Resize window to 1/4 area | no change | Not fragment/raster bound | -| `setSamples(4)` → `setSamples(1)` | no change | Not MSAA/resolve bound | -| Comment out the two `glNamedBufferSubData` in `cullAndUploadVisible` | → 60 fps (screen blank) | **The per-frame uploads are the bottleneck.** | +The first round of probes pointed at the two `glNamedBufferSubData` +calls per model per frame (visible list ~1.5 MB + indirect buffer +~1.3 MB): -So the bottleneck is two `glNamedBufferSubData` calls per model per -frame uploading ~1.5 MB (visible list) + ~1.3 MB (indirect buffer). -3 MB/frame / 60 fps = 180 MB/s — trivial for the bus, but `glNamedBufferSubData` -against a buffer the GPU is still reading forces the driver to stall -the CPU or orphan/reallocate the backing store, and we're hitting that -on 20 buffers per frame. +| Probe | Result | Initial interpretation | +|-------|--------|------------------------| +| Camera off-screen (nothing visible) | 60 fps | GPU idle → CPU path cheap | +| Comment out the two `glNamedBufferSubData` | 60 fps, blank screen | Uploads are the bottleneck | -### Phase 3 (proposed) — Eliminate per-frame upload stalls +This led to an aborted Phase 3A implementation of persistent-mapped +triple-buffered rings (and then staging + VRAM-resident with +`glCopyNamedBufferSubData`). Neither moved the FPS needle — both still +sat at 6.7 fps. -Two ways to attack it, in ascending order of effort: +The probe was wrong: **commenting out the uploads emptied the indirect +buffer, so MDI drew zero triangles. "No upload" and "no draw" were +indistinguishable in the test.** -#### 3A. Persistent mapped ring buffers (near-term) +#### What actually isolates the draw cost -Allocate each of the per-frame-written buffers with -`glBufferStorage(GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | GL_MAP_WRITE_BIT)` -at 3× the needed size. Keep one `void*` from `glMapBufferRange` forever. -Each frame, write the CPU-side data into slice `frame % 3` and bind -that slice via `glBindBufferRange`. The GPU reads slice N−1 while the -CPU writes slice N — no driver sync, no orphan, no stall. +Two diagnostic env vars now live in `render()`: -Scope: ~80 lines across `ModelGpuData` + `cullAndUploadVisible` + -binding in `render()` / `renderPickPass()`. No algorithmic change, no -shader change. Expected result on the stats scene: 10 fps → ~60 fps -(the measured ceiling once uploads are removed). +- `IFC_SKIP_MDI=1` — keep everything (cull, upload, binds) but skip the + actual `glMultiDrawElementsIndirect` calls. +- `IFC_MAX_SUBDRAWS=N` — truncate each MDI's drawcount to N while still + running the rest of the frame. -#### 3B. GPU-side culling (longer-term) +Results on the profiled scene: -Push culling itself to the GPU. A compute shader reads the -`InstanceCpu`-equivalent SSBO + frustum planes, builds the visible list -and indirect commands in-place via atomics. Zero CPU→GPU per-frame -bytes. Also lays the foundation for occlusion and contribution culling -(both want to run on the GPU anyway, with access to the depth buffer -or screen-space projection). +| Probe | FPS | Frame time | +|-------|-----|-----------| +| baseline | 6.7 | 149 ms | +| `IFC_SKIP_MDI=1` | 62.5 | 16 ms | +| `IFC_MAX_SUBDRAWS=30000` | 6.7 | 149 ms | +| `IFC_MAX_SUBDRAWS=10000` | 7.5 | 133 ms | +| `IFC_MAX_SUBDRAWS=1000` | 20.2 | 49 ms | -Scope: compute shader + atomic counter + BVH-traversal-on-GPU (or a -linear compute scan — simpler and still gains most of the win since -traversal isn't the bottleneck once upload is gone). Bigger change; -worth doing after 3A is measured, because 3A may be enough for a long -while. +Readings: + +1. `SKIP_MDI` gives 62 fps with all upload/bind machinery still running + — the non-draw path fits in ~16 ms easily. **Not upload-bound.** +2. Halving the sub-draw count (67 k → 30 k) saves 0 ms. If per-sub-draw + command-processor overhead were material, dropping 37 k sub-draws + would save measurable time no matter which sub-draws were dropped. + It doesn't. **67 k sub-draws is not the bottleneck** — the long tail + carries almost no triangles, and the heavyweights dominate. +3. Time only starts coming down once the cap is low enough to shed bulk + triangle work (1000 sub-draws → 49 ms). The curve is consistent with + a long-tailed distribution: a handful of very big meshes × instance + counts do most of the rasterisation. + +**Conclusion: the GTX 1650 is rasterising 128 M triangles at ~850 M +tri/s, and that eats ~133 ms of the 149 ms frame.** No CPU-side or +upload-side work will recover it. The only way forward is to draw +fewer triangles. + +### Phase 3 (revised) — Shed triangles, not bytes + +In order of effort/payoff for BIM workloads: + +#### 3A. Screen-space contribution culling (near-term) + +Project each visible-instance AABB to screen space during BVH +traversal. Reject instances whose projected size is below a threshold +(~4 px). In BIM this is the single biggest win: at viewer zoom levels +that encompass a whole building, most MEP fittings, fixings, furniture +legs, door hardware etc. occupy < 1 px and contribute nothing. + +Scope: a projection + pixel-area test inside +`ViewportWindow::cullAndUploadVisible`. Zero new GPU state. Expect +10–30× reduction in drawn triangles on plant/MEP-dense scenes; full +buildings viewed in overview should approach 60 fps. + +#### 3B. Distance / contribution LOD (medium-term) + +Pre-simplify unique representations at ingress time (store LOD 0 / 1 / +2 meshes in the VBO/EBO with offsets), select LOD per instance per +frame by the same projected-size metric as 3A. The visible-SSBO +plumbing and MDI structure don't change — only `firstIndex`/`count` in +the indirect command does. Ingress side needs a decimation pass +(`meshoptimizer` or similar); GPU side is nearly free. + +#### 3C. Hierarchical-Z occlusion culling (longer-term) + +Render large occluders first, build a depth pyramid, test instance +AABBs against it. In dense BIM most geometry is behind other geometry +from any given interior viewpoint; historically a 3–10× reduction in +drawn instances. Most valuable *after* 3A+3B, which together handle +the far-away and small-detail cases. Pairs naturally with GPU-side +culling (a compute shader doing the HiZ test and writing the visible +list + indirect buffer in place). + +#### 3D. GPU-side culling via compute (longer-term) + +Push the cull loop to a compute shader reading the per-instance SSBO + +frustum planes + HiZ pyramid, emitting the visible list and indirect +commands with atomic counters. Eliminates all CPU→GPU per-frame bytes +and lets 3C scale to millions of instances. Worth doing once 3A–3C +have stabilised the CPU-side algorithm we'd be porting. ### Planned follow-ups (post-Phase-3) -- **Screen-space contribution cull.** Reject instances whose projected - screen-space AABB is below a pixel threshold. Cheap CPU-side filter - that eliminates distant MEP detail. Big win on unfiltered plant-room - scenes. -- **Hierarchical-Z occlusion culling.** Render large occluders, build a - depth pyramid, test BVH / instance AABBs against it. In dense BIM, - most geometry is behind other geometry from any given viewpoint; this - is historically a 3–10× reduction in drawn instances. -- **Distance / contribution LOD.** Unique meshes pre-simplified at load - time; compute shader selects an LOD per instance per frame based on - screen-space size. Same visible-SSBO plumbing, different `firstIndex`. -- **Mesh shaders / meshlets.** Ceiling-raising but overkill until the - above are exhausted. +- **Mesh shaders / meshlets.** Ceiling-raising, but overkill until the + above are exhausted and we've hit silicon limits on vertex/raster + throughput. ## Summary table ``` -Scene size Bottleneck Fix ------------ ---------- --- -< 100k instances CPU cull scan Phase 1 only (current) -100k–500k CPU cull scan BVH (Phase 2) — done -500k+ across many models visible/indirect Phase 3A mapped rings - buffer uploads (next) ---- --- --- -multi-million + occlusion-heavy fragment / overdraw HiZ occlusion + LOD +Scene size Bottleneck Fix +----------- ---------- --- +< 100k instances CPU cull scan Phase 1 only +100k–500k CPU cull scan BVH (Phase 2) — done +500k+ tris / overview shot GPU vertex + raster Phase 3A contribution cull + (+ 3B LOD for close-ups) +multi-million + occluders redundant rasterisation Phase 3C HiZ occlusion ``` ## Roadmap @@ -444,10 +491,10 @@ multi-million + occlusion-heavy fragment / overdraw HiZ occlusion + LOD - [x] Reflection-aware two-pass draw for mirrored placements - [x] Backface culling (user-toggleable, default on) - [x] `reorient-shells` enabled in iterator -- [ ] **Phase 3A — persistent-mapped ring buffers for visible + indirect** (next) -- [ ] Phase 3B — GPU-side compute-shader culling -- [ ] Screen-space contribution culling -- [ ] Hierarchical-Z occlusion culling -- [ ] Distance-based LOD selection +- [x] Perf diagnostic env vars (`IFC_SKIP_MDI`, `IFC_MAX_SUBDRAWS`) +- [ ] **Phase 3A — screen-space contribution culling** (next) +- [ ] Phase 3B — distance / contribution LOD +- [ ] Phase 3C — Hierarchical-Z occlusion culling +- [ ] Phase 3D — GPU-side compute-shader culling - [ ] Vulkan/MoltenVK backend for macOS - [ ] Embedded Python scripting console diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index 84778f3f2f7..4680e188bb1 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -28,6 +28,7 @@ #include #include +#include #include #include #include @@ -988,10 +989,32 @@ void ViewportWindow::render() { gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m.visible_ssbo); gl_->glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m.indirect_buffer); - const uint32_t fwd = m.indirect_forward_count; - const uint32_t rev = m.indirect_command_count - fwd; + uint32_t fwd = m.indirect_forward_count; + uint32_t rev = m.indirect_command_count - fwd; + // Perf diagnostics (confirmed 2026-04 on GTX 1650 @ 128M tris: + // draw-bound, not upload-bound — see README Phase 3): + // IFC_SKIP_MDI=1 skip the actual MDI draws (keeps cull + + // upload + binds). FPS jump == draw-bound. + // IFC_MAX_SUBDRAWS=N truncate drawcount to N per MDI. Lets + // you distinguish per-subdraw command- + // processor overhead from raw tri work. + static const bool skip_mdi = []{ + const char* e = std::getenv("IFC_SKIP_MDI"); + return e && e[0] == '1'; + }(); + static const uint32_t max_subdraws = []{ + const char* e = std::getenv("IFC_MAX_SUBDRAWS"); + return (e && *e) ? static_cast(std::atoi(e)) + : std::numeric_limits::max(); + }(); + if (max_subdraws < m.indirect_command_count) { + // Keep the fwd/rev ratio so the workload mix is preserved. + const uint32_t total = m.indirect_command_count; + fwd = static_cast((uint64_t)fwd * max_subdraws / total); + rev = max_subdraws - fwd; + } // Forward pass: non-reflected instances, standard CCW winding. - if (fwd > 0) { + if (fwd > 0 && !skip_mdi) { gl_->glFrontFace(GL_CCW); gl_->glMultiDrawElementsIndirect( GL_TRIANGLES, GL_UNSIGNED_INT, nullptr, @@ -1000,11 +1023,11 @@ void ViewportWindow::render() { } // Reverse pass: reflected instances — their world-space winding is // flipped, so telling GL the front is CW keeps cull-back working. - if (rev > 0) { + if (rev > 0 && !skip_mdi) { gl_->glFrontFace(GL_CW); gl_->glMultiDrawElementsIndirect( GL_TRIANGLES, GL_UNSIGNED_INT, - reinterpret_cast(fwd * sizeof(DrawElementsIndirectCommand)), + reinterpret_cast(m.indirect_forward_count * sizeof(DrawElementsIndirectCommand)), static_cast(rev), 0); ++gl_draw_calls_; gl_->glFrontFace(GL_CCW); From 09c1eefa9408714d53ba56ee117fac7c06ddc08d Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Mon, 13 Apr 2026 09:51:09 +1000 Subject: [PATCH 24/37] Phase 3A: screen-space contribution culling Reject frustum-visible objects whose bounding sphere projects below a pixel-radius threshold. Applied at both BVH-node level (whole subtrees pruned) and per-instance level; short-circuits when the camera is inside the AABB so nothing-you're-standing-next-to is ever lost. Pick pass passes threshold 0 so sub-pixel objects stay clickable. Threshold defaults to 2 px (radius), overridable via IFC_MIN_PX env var. Measured on the 128 M-tri test scene (GTX 1650): 0 px (off): 6.7 fps, 128 M tris 2 px: 20.2 fps, 40 M tris (31%) 4 px: 30.3 fps, 15 M tris (12%) The metric is sphere-based (cheap: one sqrt per test) rather than AABB-corner projection; loses a little precision on very elongated bounds but costs ~5x less per test and the BVH-node pre-cull means the long-tail-of-small-things case is already handled by subtree pruning before we touch individual instances. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/ViewportWindow.cpp | 64 ++++++++++++++++++++++++++++++-- src/ifcviewer/ViewportWindow.h | 10 ++++- 2 files changed, 69 insertions(+), 5 deletions(-) diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index 4680e188bb1..db002c1870d 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -824,7 +824,8 @@ uint32_t ViewportWindow::pickObjectAt(int x, int y) { return pixel; } -void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6][4]) { +void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6][4], + float focal_px, float min_pixel_radius) { // Per-mesh scratch, split by winding: fwd = non-reflected (CCW in screen // space), rev = reflected (CW in screen space). Splitting lets the draw // pass toggle glFrontFace once between two MDI calls so GL_CULL_FACE does @@ -836,9 +837,44 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] visible_by_mesh_rev_[i].clear(); } + // Bounding-sphere contribution test: approximate an AABB by its enclosing + // sphere (centre = midpoint, radius = half-diagonal). Project radius to + // pixels as r_px = focal_px * r / distance (perspective). Reject if + // smaller than the threshold. Returns true when the node/instance + // should be kept. + // + // If the camera is inside the AABB the sphere-radius test would reject + // by distance going to zero / negative — we handle that by skipping the + // test whenever the camera lies within an inflated AABB. Cheap and + // conservative: never drops things you're standing next to. + const float cx = camera_eye_.x(); + const float cy = camera_eye_.y(); + const float cz = camera_eye_.z(); + auto contributionPasses = [&](const float mn[3], const float mx[3]) -> bool { + if (min_pixel_radius <= 0.0f) return true; + // Camera inside AABB? Always keep. + if (cx >= mn[0] && cx <= mx[0] && + cy >= mn[1] && cy <= mx[1] && + cz >= mn[2] && cz <= mx[2]) { + return true; + } + float ex = 0.5f * (mx[0] - mn[0]); + float ey = 0.5f * (mx[1] - mn[1]); + float ez = 0.5f * (mx[2] - mn[2]); + float radius = std::sqrt(ex*ex + ey*ey + ez*ez); + float dx = 0.5f * (mx[0] + mn[0]) - cx; + float dy = 0.5f * (mx[1] + mn[1]) - cy; + float dz = 0.5f * (mx[2] + mn[2]) - cz; + float dist = std::sqrt(dx*dx + dy*dy + dz*dz); + // r_px = focal_px * radius / dist; compare r_px >= min_pixel_radius, + // rearranged to avoid the divide. + return focal_px * radius >= min_pixel_radius * dist; + }; + auto test_and_push = [&](uint32_t inst_idx) { const InstanceCpu& inst = m.instances[inst_idx]; if (!aabbInFrustum(inst.world_aabb_min, inst.world_aabb_max, planes)) return; + if (!contributionPasses(inst.world_aabb_min, inst.world_aabb_max)) return; if (inst.mesh_id >= m.meshes.size()) return; const bool reflected = inst_idx < m.instance_reflected.size() && m.instance_reflected[inst_idx] != 0; @@ -854,6 +890,9 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] uint32_t ni = stack[--sp]; const BvhNode& n = m.bvh.nodes[ni]; if (!aabbInFrustum(n.aabb_min, n.aabb_max, planes)) continue; + // Contribution cull the whole subtree: if the node's enclosing + // sphere is below threshold, every child is too. + if (!contributionPasses(n.aabb_min, n.aabb_max)) continue; if (n.count > 0) { for (uint32_t k = 0; k < n.count; ++k) { uint32_t item_idx = m.bvh.item_indices[n.right_or_first + k]; @@ -939,11 +978,12 @@ void ViewportWindow::updateCamera() { eye.setX(camera_target_.x() + camera_distance_ * cosf(pitch_rad) * cosf(yaw_rad)); eye.setY(camera_target_.y() + camera_distance_ * cosf(pitch_rad) * sinf(yaw_rad)); eye.setZ(camera_target_.z() + camera_distance_ * sinf(pitch_rad)); + camera_eye_ = eye; view_matrix_.setToIdentity(); view_matrix_.lookAt(eye, camera_target_, QVector3D(0, 0, 1)); proj_matrix_.setToIdentity(); float aspect = width() > 0 ? float(width()) / float(height()) : 1.0f; - proj_matrix_.perspective(45.0f, aspect, 0.1f, camera_distance_ * 10.0f); + proj_matrix_.perspective(camera_fov_y_deg_, aspect, 0.1f, camera_distance_ * 10.0f); } void ViewportWindow::render() { @@ -961,6 +1001,20 @@ void ViewportWindow::render() { float planes[6][4]; extractFrustumPlanes(vp, planes); + // Pixels-per-radian vertical focal length. Combined with per-instance + // world-space radius this gives screen-space pixel size for contribution + // culling below. + const float focal_px = 0.5f * static_cast(h) / + std::tan(qDegreesToRadians(0.5f * camera_fov_y_deg_)); + // Drop frustum-visible objects smaller than this many pixels. Override + // with IFC_MIN_PX (0 = disabled). 2 px radius = ~4x4 pixels, well below + // what's meaningful at normal viewing distances and eliminates the long + // tail of distant MEP/fixings that dominate BIM triangle counts. + static const float min_pixel_radius = []{ + const char* e = std::getenv("IFC_MIN_PX"); + return (e && *e) ? static_cast(std::atof(e)) : 2.0f; + }(); + gl_->glUseProgram(main_program_); GLint u_vp = gl_->glGetUniformLocation(main_program_, "u_view_projection"); GLint u_light = gl_->glGetUniformLocation(main_program_, "u_light_dir"); @@ -981,7 +1035,7 @@ void ViewportWindow::render() { for (auto& [model_id, m] : models_gpu_) { if (m.hidden || !m.ssbo || m.ssbo_instance_count == 0) continue; - cullAndUploadVisible(m, planes); + cullAndUploadVisible(m, planes, focal_px, min_pixel_radius); if (m.indirect_command_count == 0) continue; gl_->glBindVertexArray(m.vao); @@ -1114,7 +1168,9 @@ void ViewportWindow::renderPickPass() { for (auto& [model_id, m] : models_gpu_) { if (m.hidden || !m.ssbo || m.ssbo_instance_count == 0) continue; - cullAndUploadVisible(m, planes); + // Pick pass: contribution-cull disabled (0.0 threshold) so every + // frustum-visible object is clickable, even sub-pixel ones. + cullAndUploadVisible(m, planes, 1.0f, 0.0f); if (m.indirect_command_count == 0) continue; gl_->glBindVertexArray(m.vao); diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index 1bbc44c97c4..a8d696121a2 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -173,7 +173,13 @@ class ViewportWindow : public QWindow { // Frustum-cull m's instances (BVH if available, else linear scan), // build the per-mesh DrawElementsIndirectCommand array + flat visible // list, and upload both to m.indirect_buffer / m.visible_ssbo. - void cullAndUploadVisible(ModelGpuData& m, const float planes[6][4]); + // + // `min_pixel_radius` controls contribution culling: instances (and BVH + // subtrees) whose projected bounding-sphere radius would be below this + // many pixels are dropped. 0 = disabled (all frustum-visible kept), + // which is what the pick pass uses so clickable targets aren't filtered. + void cullAndUploadVisible(ModelGpuData& m, const float planes[6][4], + float focal_px, float min_pixel_radius); // Mouse interaction void handleMousePress(QMouseEvent* event); @@ -224,9 +230,11 @@ class ViewportWindow : public QWindow { // Camera QVector3D camera_target_{0, 0, 0}; + QVector3D camera_eye_{0, 0, 0}; // world-space eye, set in updateCamera float camera_distance_ = 50.0f; float camera_yaw_ = 45.0f; float camera_pitch_ = 30.0f; + float camera_fov_y_deg_ = 45.0f; QMatrix4x4 view_matrix_; QMatrix4x4 proj_matrix_; From 95b1b976b005b01b28ed4b9e022bc19b067f74f5 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Mon, 13 Apr 2026 10:06:37 +1000 Subject: [PATCH 25/37] README: mark Phase 3A done with measured numbers Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/README.md | 47 ++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/src/ifcviewer/README.md b/src/ifcviewer/README.md index 4af736bfad8..7bb6972b839 100644 --- a/src/ifcviewer/README.md +++ b/src/ifcviewer/README.md @@ -419,18 +419,35 @@ fewer triangles. In order of effort/payoff for BIM workloads: -#### 3A. Screen-space contribution culling (near-term) - -Project each visible-instance AABB to screen space during BVH -traversal. Reject instances whose projected size is below a threshold -(~4 px). In BIM this is the single biggest win: at viewer zoom levels -that encompass a whole building, most MEP fittings, fixings, furniture -legs, door hardware etc. occupy < 1 px and contribute nothing. - -Scope: a projection + pixel-area test inside -`ViewportWindow::cullAndUploadVisible`. Zero new GPU state. Expect -10–30× reduction in drawn triangles on plant/MEP-dense scenes; full -buildings viewed in overview should approach 60 fps. +#### 3A. Screen-space contribution culling — ✅ done + +Reject frustum-visible objects whose bounding-sphere projects below a +pixel-radius threshold. Applied both at BVH-node level (whole subtrees +pruned, so distant parts of the model never touch per-instance tests) +and per-instance level. Short-circuits when the camera is inside the +AABB so nothing-you're-standing-next-to is ever lost. Pick pass uses +threshold 0 so sub-pixel objects remain clickable. + +Sphere-based (centre = AABB midpoint, radius = half-diagonal, +r_px = focal_px · radius / distance). Loses a little precision on +very elongated bounds vs. 8-corner projection, but costs ~5× less per +test, and because BVH-node pre-cull handles the long tail in one shot +it doesn't matter. + +Threshold defaults to 2 px radius, overridable via `IFC_MIN_PX` env +var. Measured on the 10-model / 128 M-tri test scene (GTX 1650): + +| Threshold | FPS | Triangles drawn | Objects drawn | +|-----------|-----|-----------------|---------------| +| 0 px (off) | 6.7 | 128 M | 379 k | +| 2 px | 20.2 | 40 M (31 %) | 89 k (24 %) | +| 4 px | 30.3 | 15 M (12 %) | 29 k (8 %) | + +At 4 px, frame time breakdown matches: ~16 ms non-draw baseline (from +`IFC_SKIP_MDI=1`) + ~18 ms of raster (15 M tris / 850 M tri/s) ≈ 34 ms += observed 33 ms. The ceiling is now genuinely vertex/raster +throughput on the post-cull geometry — next steps (LOD, HiZ) attack +that directly. #### 3B. Distance / contribution LOD (medium-term) @@ -491,9 +508,9 @@ multi-million + occluders redundant rasterisation Phase 3C HiZ occlusion - [x] Reflection-aware two-pass draw for mirrored placements - [x] Backface culling (user-toggleable, default on) - [x] `reorient-shells` enabled in iterator -- [x] Perf diagnostic env vars (`IFC_SKIP_MDI`, `IFC_MAX_SUBDRAWS`) -- [ ] **Phase 3A — screen-space contribution culling** (next) -- [ ] Phase 3B — distance / contribution LOD +- [x] Perf diagnostic env vars (`IFC_SKIP_MDI`, `IFC_MAX_SUBDRAWS`, `IFC_MIN_PX`) +- [x] Phase 3A — screen-space contribution culling +- [ ] **Phase 3B — distance / contribution LOD** (next) - [ ] Phase 3C — Hierarchical-Z occlusion culling - [ ] Phase 3D — GPU-side compute-shader culling - [ ] Vulkan/MoltenVK backend for macOS From 3fe183e1582669b30c30e82398b814767907f18b Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Mon, 13 Apr 2026 18:31:43 +1000 Subject: [PATCH 26/37] Phase 3B: per-instance LOD via meshoptimizer simplifySloppy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Decimate each unique mesh once at sidecar-build time and swap to the reduced index slice per-instance per-frame when projected sphere radius drops below IFC_LOD1_PX (default 30). Same VBO, same SSBO, just a different firstIndex/count in the indirect command. Extends MeshInfo (48→56 B) with lod1_ebo_byte_offset + lod1_index_count and bumps the sidecar to v5. buildLods() runs inside onStreamingFinished, appends decimated indices to sd.indices, applyLodExtension pushes the EBO suffix to the live GPU state, and the sidecar is written with LOD1 baked in. simplifySloppy (voxel clustering) is used instead of the default edge-collapse meshopt_simplify because BIM brep output is per-triangle- unwelded and non-manifold after welding — simplify returned the input unchanged for every mesh tested. Sloppy ignores topology. Knobs (IFC_LOD_SLOPPY, IFC_LOD_ERROR, IFC_LOD_RATIO, IFC_LOD_MIN_SAVINGS, IFC_LOD_LOCK_BORDER, IFC_LOD_DEBUG) are available for A/B tuning. Result on the 128M-tri 10-model test scene (GTX 1650, 2px contribution cull): 20.2 → 43.2 fps, 40M → 14M visible triangles, no change in object count. LOD build adds 100–600 ms per model on first open, cached thereafter. README Phase 3B section is now a full writeup of pipeline, selection, decimator-choice rationale, env vars, and measured numbers. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/CMakeLists.txt | 2 + src/ifcviewer/InstancedGeometry.h | 18 ++- src/ifcviewer/LodBuilder.cpp | 203 ++++++++++++++++++++++++++++++ src/ifcviewer/LodBuilder.h | 56 +++++++++ src/ifcviewer/MainWindow.cpp | 14 +++ src/ifcviewer/README.md | 137 +++++++++++++++++--- src/ifcviewer/SidecarCache.cpp | 6 +- src/ifcviewer/SidecarCache.h | 5 +- src/ifcviewer/ViewportWindow.cpp | 117 ++++++++++++++--- src/ifcviewer/ViewportWindow.h | 16 ++- 10 files changed, 533 insertions(+), 41 deletions(-) create mode 100644 src/ifcviewer/LodBuilder.cpp create mode 100644 src/ifcviewer/LodBuilder.h diff --git a/src/ifcviewer/CMakeLists.txt b/src/ifcviewer/CMakeLists.txt index 9f1c4dac502..70642acabf2 100644 --- a/src/ifcviewer/CMakeLists.txt +++ b/src/ifcviewer/CMakeLists.txt @@ -26,6 +26,7 @@ set(QT_VERSION 6 CACHE STRING "Qt version") find_package(Qt${QT_VERSION} COMPONENTS Core Gui Widgets OpenGL REQUIRED PATHS ${QT_DIR}) find_package(OpenGL REQUIRED) +find_package(meshoptimizer REQUIRED) file(GLOB IFCVIEWER_CPP_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) file(GLOB IFCVIEWER_H_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.h) @@ -51,6 +52,7 @@ target_link_libraries(IfcViewer PRIVATE Qt${QT_VERSION}::Widgets Qt${QT_VERSION}::OpenGL OpenGL::GL + meshoptimizer::meshoptimizer ) if(UNIX AND NOT APPLE) diff --git a/src/ifcviewer/InstancedGeometry.h b/src/ifcviewer/InstancedGeometry.h index 1c027976ef1..ef79751806a 100644 --- a/src/ifcviewer/InstancedGeometry.h +++ b/src/ifcviewer/InstancedGeometry.h @@ -33,18 +33,28 @@ static constexpr int INSTANCED_VERTEX_STRIDE_BYTES = 28; static constexpr int INSTANCED_VERTEX_STRIDE_FLOATS = 7; // Per-mesh metadata on the CPU side. Meshes own a slice of the model's -// VBO and EBO (both local-coords/mesh-local indices). +// VBO (shared across LODs) and one or more slices of the EBO, one per LOD. +// +// LOD0 is the original, full-resolution tessellation — the fields +// `ebo_byte_offset` / `index_count` describe it. +// +// LOD1 is an optional decimated copy of the same triangles referencing the +// same vertex buffer. Built at sidecar time via meshoptimizer for meshes +// whose triangle count crosses a threshold. `lod1_index_count == 0` +// means no LOD1 was built; the renderer must use LOD0 at every distance. struct MeshInfo { uint32_t vbo_byte_offset = 0; // where this mesh's vertices start uint32_t vertex_count = 0; - uint32_t ebo_byte_offset = 0; // where this mesh's indices start - uint32_t index_count = 0; + uint32_t ebo_byte_offset = 0; // LOD0 indices + uint32_t index_count = 0; // LOD0 index count float local_aabb_min[3]{}; float local_aabb_max[3]{}; uint32_t first_instance = 0; // index into per-model instances array uint32_t instance_count = 0; + uint32_t lod1_ebo_byte_offset = 0; + uint32_t lod1_index_count = 0; // 0 = no LOD1 available }; -static_assert(sizeof(MeshInfo) == 48, "MeshInfo must be 48 bytes"); +static_assert(sizeof(MeshInfo) == 56, "MeshInfo must be 56 bytes"); // Per-instance record uploaded to an SSBO and read by the vertex shader. // Layout deliberately matches std430 expectations: diff --git a/src/ifcviewer/LodBuilder.cpp b/src/ifcviewer/LodBuilder.cpp new file mode 100644 index 00000000000..88b8c9f0468 --- /dev/null +++ b/src/ifcviewer/LodBuilder.cpp @@ -0,0 +1,203 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#include "LodBuilder.h" + +#include + +#include +#include +#include +#include +#include + +void buildLods(SidecarData& sd, + int min_triangles, + float target_ratio, + float target_error) { + if (sd.meshes.empty() || sd.vertices.empty() || sd.indices.empty()) return; + + const size_t vtx_stride_bytes = INSTANCED_VERTEX_STRIDE_BYTES; + const size_t vtx_stride_floats = INSTANCED_VERTEX_STRIDE_FLOATS; + const size_t total_vertex_count = sd.vertices.size() / vtx_stride_floats; + + // Env var knobs so we can tune without rebuilding. + // IFC_LOD_LOCK_BORDER=1 re-enable LockBorder (off by default: BIM + // geometry is often non-manifold so locking + // borders prevents any collapse). + // IFC_LOD_ERROR= override target_error (default 0.05 → 0.2). + // IFC_LOD_RATIO= override target_ratio. + // IFC_LOD_MIN_SAVINGS=<0..1> minimum fraction of tris saved to accept + // (default 0.25). + // IFC_LOD_DEBUG=1 print per-mesh diagnostics for the first + // few meshes of each call. + // IFC_LOD_SLOPPY=0 disable sloppy (clustering) decimator. + // Default ON: BIM brep output is usually + // non-manifold, so edge-collapse simplify + // returns the input unchanged. + const char* env_lock = std::getenv("IFC_LOD_LOCK_BORDER"); + const char* env_err = std::getenv("IFC_LOD_ERROR"); + const char* env_ratio = std::getenv("IFC_LOD_RATIO"); + const char* env_savings = std::getenv("IFC_LOD_MIN_SAVINGS"); + const char* env_debug = std::getenv("IFC_LOD_DEBUG"); + const char* env_sloppy = std::getenv("IFC_LOD_SLOPPY"); + + const bool lock_border = env_lock && env_lock[0] == '1'; + const bool use_sloppy = !(env_sloppy && env_sloppy[0] == '0'); + if (env_err) target_error = static_cast(std::atof(env_err)); + if (env_ratio) target_ratio = static_cast(std::atof(env_ratio)); + float min_savings = 0.25f; + if (env_savings) min_savings = static_cast(std::atof(env_savings)); + const bool debug = env_debug && env_debug[0] == '1'; + + // Loosened defaults: BIM meshes are non-manifold; LockBorder ≈ zero + // collapses. A 0.2 error budget still looks fine at sub-4px. + if (target_error < 0.2f) target_error = 0.2f; + + // Scratch buffers reused across meshes so we only allocate once. + std::vector simplified; + std::vector shadow; + simplified.reserve(1024); + shadow.reserve(1024); + + int dbg_printed = 0; + int dbg_rejected_savings = 0; + int dbg_rejected_noreduce = 0; + int dbg_accepted = 0; + + for (auto& mesh : sd.meshes) { + mesh.lod1_ebo_byte_offset = 0; + mesh.lod1_index_count = 0; + + const uint32_t tri_count = mesh.index_count / 3; + if (static_cast(tri_count) < min_triangles) continue; + if (mesh.vertex_count == 0) continue; + + // meshopt wants a pointer to the *first position* and a vertex_count + // equal to the number of referenced vertices (i.e. the absolute upper + // bound on indices we might see). Indices in `sd.indices` for this + // mesh are mesh-local (0..mesh.vertex_count). Pass the base-vertex + // as an offset into sd.vertices so meshopt reads positions at the + // right place. + const uint32_t base_vertex = mesh.vbo_byte_offset / vtx_stride_bytes; + if (base_vertex + mesh.vertex_count > total_vertex_count) continue; + + const uint32_t first_index = mesh.ebo_byte_offset / sizeof(uint32_t); + if (first_index + mesh.index_count > sd.indices.size()) continue; + + const float* positions = + sd.vertices.data() + base_vertex * vtx_stride_floats; + const uint32_t* indices = sd.indices.data() + first_index; + + const size_t target_index_count = std::max( + 3, static_cast(mesh.index_count * target_ratio) / 3 * 3); + + // The instanced VBO stores each triangle's vertices separately, so the + // mesh's index buffer is topologically disconnected — every edge is + // boundary, every vertex is unique, and meshopt_simplify can't collapse + // anything. Build a shadow index buffer that welds by position, so + // shared-position vertices share an ID; then simplify on that. Output + // indices are still valid mesh-local IDs (canonical representatives), + // usable directly as LOD1 indices against the same VBO. + shadow.resize(mesh.index_count); + meshopt_generateShadowIndexBuffer( + shadow.data(), + indices, mesh.index_count, + positions, mesh.vertex_count, + sizeof(float) * 3, // compare only xyz + vtx_stride_bytes); + + simplified.resize(mesh.index_count); + float result_error = 0.0f; + size_t new_index_count = 0; + + if (use_sloppy) { + // Cluster-based decimator. Ignores topology entirely; great for + // BIM brep output which is usually non-manifold / has T-junctions. + // Operates directly on the original indices — welding isn't + // needed since it quantises positions into voxel cells. + new_index_count = meshopt_simplifySloppy( + simplified.data(), + indices, mesh.index_count, + positions, mesh.vertex_count, vtx_stride_bytes, + target_index_count, target_error, + &result_error); + } else { + const unsigned int options = + lock_border ? static_cast(meshopt_SimplifyLockBorder) : 0u; + new_index_count = meshopt_simplify( + simplified.data(), + shadow.data(), mesh.index_count, + positions, mesh.vertex_count, vtx_stride_bytes, + target_index_count, target_error, + options, &result_error); + } + + if (debug && dbg_printed < 8) { + std::fprintf(stderr, + " [lod] mesh tris=%u target=%zu got=%zu err=%.4f\n", + tri_count, target_index_count / 3, + new_index_count / 3, result_error); + ++dbg_printed; + } + + // Accept only if we actually saved a meaningful chunk of tris. + if (new_index_count == 0 || new_index_count >= mesh.index_count) { + ++dbg_rejected_noreduce; + continue; + } + + const uint32_t saved = mesh.index_count - static_cast(new_index_count); + if (static_cast(saved) < min_savings * static_cast(mesh.index_count)) { + ++dbg_rejected_savings; + continue; + } + ++dbg_accepted; + + // Append the surviving indices to sd.indices; record the offset. + const size_t append_offset_bytes = sd.indices.size() * sizeof(uint32_t); + sd.indices.insert(sd.indices.end(), + simplified.begin(), + simplified.begin() + new_index_count); + mesh.lod1_ebo_byte_offset = static_cast(append_offset_bytes); + mesh.lod1_index_count = static_cast(new_index_count); + } + + if (debug) { + std::fprintf(stderr, + " [lod] summary: accepted=%d rejected_noreduce=%d rejected_savings=%d " + "(lock_border=%d target_error=%.3f target_ratio=%.3f min_savings=%.3f)\n", + dbg_accepted, dbg_rejected_noreduce, dbg_rejected_savings, + lock_border ? 1 : 0, target_error, target_ratio, min_savings); + } +} + +LodStats summariseLods(const SidecarData& sd) { + LodStats s; + s.meshes_total = static_cast(sd.meshes.size()); + for (const auto& m : sd.meshes) { + s.tris_lod0 += m.index_count / 3; + if (m.lod1_index_count > 0) { + ++s.meshes_with_lod1; + s.tris_lod1 += m.lod1_index_count / 3; + s.tris_lod0_for_lod1 += m.index_count / 3; + } + } + return s; +} diff --git a/src/ifcviewer/LodBuilder.h b/src/ifcviewer/LodBuilder.h new file mode 100644 index 00000000000..a937ae49870 --- /dev/null +++ b/src/ifcviewer/LodBuilder.h @@ -0,0 +1,56 @@ +/******************************************************************************** + * * + * This file is part of IfcOpenShell. * + * * + * IfcOpenShell is free software: you can redistribute it and/or modify * + * it under the terms of the Lesser GNU General Public License as published by * + * the Free Software Foundation, either version 3.0 of the License, or * + * (at your option) any later version. * + * * + * IfcOpenShell is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * Lesser GNU General Public License for more details. * + * * + * You should have received a copy of the Lesser GNU General Public License * + * along with this program. If not, see . * + * * + ********************************************************************************/ + +#ifndef LODBUILDER_H +#define LODBUILDER_H + +#include "SidecarCache.h" + +// Build a LOD1 index slice for every mesh in `sd` whose triangle count is +// above `min_triangles`, using meshoptimizer's edge-collapse decimator. The +// LOD1 indices are appended to `sd.indices`; each MeshInfo's +// lod1_ebo_byte_offset + lod1_index_count are populated to point at the +// appended range. Meshes that don't qualify (too small) or where the +// decimator couldn't meet the target within the error budget have +// lod1_index_count left at 0 (renderer falls back to LOD0). +// +// Defaults match the Phase 3B first-iteration design: +// min_triangles = 500 — below this the overhead dominates +// target_ratio = 0.25 — aim for 25% of original tris +// target_error = 0.05 — stop if relative error exceeds 5% +// +// `sd.vertices` is read (position is the first 3 floats of each +// INSTANCED_VERTEX_STRIDE_FLOATS-wide vertex) but not modified — LOD1 +// reuses the same vertex buffer, just with a different index list. +void buildLods(SidecarData& sd, + int min_triangles = 500, + float target_ratio = 0.25f, + float target_error = 0.05f); + +// Cheap summary for logging. Safe to call before or after buildLods. +struct LodStats { + uint32_t meshes_total = 0; + uint32_t meshes_with_lod1 = 0; + uint32_t tris_lod0 = 0; // sum across all meshes + uint32_t tris_lod1 = 0; // only for meshes that got LOD1 + uint32_t tris_lod0_for_lod1 = 0; // LOD0 tris of the meshes that got LOD1 +}; +LodStats summariseLods(const SidecarData& sd); + +#endif // LODBUILDER_H diff --git a/src/ifcviewer/MainWindow.cpp b/src/ifcviewer/MainWindow.cpp index 8b63f3bdf68..7dc5454700b 100644 --- a/src/ifcviewer/MainWindow.cpp +++ b/src/ifcviewer/MainWindow.cpp @@ -20,6 +20,7 @@ #include "MainWindow.h" #include "AppSettings.h" #include "SettingsWindow.h" +#include "LodBuilder.h" #include "SidecarCache.h" #include @@ -395,6 +396,19 @@ void MainWindow::onStreamingFinished() { sd.elements.push_back(pe); } + // Build LOD1 for eligible meshes (extends sd.indices and + // populates MeshInfo::lod1_*), push the extension onto the + // live GPU state so this session benefits too, then cache. + QElapsedTimer t_lod; t_lod.start(); + buildLods(sd); + LodStats ls = summariseLods(sd); + qDebug(" LOD build: %lld ms — %u/%u meshes got LOD1 " + "(%u tris → %u tris for those meshes)", + t_lod.elapsed(), + ls.meshes_with_lod1, ls.meshes_total, + ls.tris_lod0_for_lod1, ls.tris_lod1); + viewport_->applyLodExtension(loading_model_id_, sd); + std::string ifc_path = it->second.file_path.toStdString(); uint64_t file_size = static_cast( QFileInfo(it->second.file_path).size()); diff --git a/src/ifcviewer/README.md b/src/ifcviewer/README.md index 7bb6972b839..82bd89555cd 100644 --- a/src/ifcviewer/README.md +++ b/src/ifcviewer/README.md @@ -92,7 +92,8 @@ engine with a Qt6 interface and OpenGL 4.5 rendering. | `GeometryStreamer.h/cpp` | Background iterator runner; emits `MeshChunk` + `InstanceChunk` | | `InstancedGeometry.h` | Shared structs: `MeshInfo`, `InstanceCpu`, `InstanceGpu`, chunk records | | `BvhAccel.h/cpp` | Median-split BVH builder; operates on instance world-AABBs | -| `SidecarCache.h/cpp` | Raw binary `.ifcview` (v4) sidecar read/write | +| `LodBuilder.h/cpp` | Post-stream decimation of unique meshes via meshoptimizer (`simplifySloppy`) | +| `SidecarCache.h/cpp` | Raw binary `.ifcview` (v5) sidecar read/write | | `AppSettings.h/cpp` | Persisted preferences (geometry library, stats overlay, backface culling) | | `SettingsWindow.h/cpp` | Settings dialog | | `CMakeLists.txt` | Build configuration | @@ -106,6 +107,9 @@ engine with a Qt6 interface and OpenGL 4.5 rendering. at GL 4.1). - **IfcOpenShell C++ libraries** (IfcParse, IfcGeom, and their dependencies: Open CASCADE, Boost, Eigen3, optionally CGAL). +- **[meshoptimizer](https://github.com/zeux/meshoptimizer)** — linked via + `find_package(meshoptimizer REQUIRED)`. Used at sidecar-build time for LOD + decimation; not needed at runtime once a sidecar exists. ## Building @@ -266,7 +270,7 @@ while stack not empty: Depth 64 is enough for billions of items on any balanced tree. The stack is on the C++ stack, zero per-frame allocation. -#### Sidecar format (`.ifcview`, v4) +#### Sidecar format (`.ifcview`, v5) Raw memory dump, Blender-`.blend`-style — no serialisation, no parsing. Stores everything needed to skip the `IfcGeom::Iterator` pass: @@ -276,7 +280,7 @@ SidecarHeader (magic "IFVW", version, endian, ...) uint64_t source_file_size uint32_t + float[] vertex data (7 floats × N_verts, local coords) uint32_t + uint32_t[] index data (mesh-local) -uint32_t + MeshInfo[] per-unique-mesh metadata (48 B each) +uint32_t + MeshInfo[] per-unique-mesh metadata (56 B each, incl. LOD1 slice) uint32_t + InstanceCpu[] per-placement records (transform + AABB + ids) uint32_t + PackedElementInfo[] element tree records uint32_t + char[] string table @@ -449,14 +453,117 @@ At 4 px, frame time breakdown matches: ~16 ms non-draw baseline (from throughput on the post-cull geometry — next steps (LOD, HiZ) attack that directly. -#### 3B. Distance / contribution LOD (medium-term) - -Pre-simplify unique representations at ingress time (store LOD 0 / 1 / -2 meshes in the VBO/EBO with offsets), select LOD per instance per -frame by the same projected-size metric as 3A. The visible-SSBO -plumbing and MDI structure don't change — only `firstIndex`/`count` in -the indirect command does. Ingress side needs a decimation pass -(`meshoptimizer` or similar); GPU side is nearly free. +#### 3B. Distance / contribution LOD — ✅ done + +Decimate each unique representation once (at sidecar-build time), store +the reduced index slice in the same EBO, and switch to it per-instance +per-frame whenever the projected sphere radius is small enough that the +reduced silhouette is indistinguishable from the original. + +##### Pipeline + +1. **After streaming finishes**, `MainWindow` calls `buildLods(sd)` on + the snapshotted `SidecarData`. Each eligible mesh's decimated index + list is appended to `sd.indices`; the per-mesh `MeshInfo` gains two + new fields: + + ```cpp + uint32_t lod1_ebo_byte_offset; // appended slice, same VBO + uint32_t lod1_index_count; // 0 = no LOD1 was built + ``` + + `MeshInfo` grew from 48 to 56 bytes, which also bumps the sidecar + format to v5. + +2. `viewport_->applyLodExtension(model_id, sd)` pushes the new index + suffix onto the live EBO via `glNamedBufferSubData` and replaces the + CPU-side `m.meshes` vector. The VBO and instance SSBO are untouched + — LOD1 reuses the same vertices, only the indices differ. + +3. The sidecar is then written with both LOD0 and LOD1 indices baked in, + so subsequent loads of the same file pick up LOD1 for free. + +##### Selection + +The contribution-cull pass already computes each instance's projected +pixel radius. LOD1 is selected when that radius falls below +`IFC_LOD1_PX` (default 30 px) and the mesh has a non-empty LOD1 slice. +Camera-inside-AABB short-circuits select LOD0 (treated as "infinite +radius") so you never accidentally see the reduced mesh up close. + +The visible-instance pipeline gains two more buckets (`fwd_lod1_`, +`rev_lod1_`), so the four-way split is now `{fwd, rev} × {LOD0, LOD1}`. +LOD0/LOD1 within a winding slice are contiguous — only winding requires +`glFrontFace` to flip between MDI calls, LOD does not. `firstIndex` / +`count` in the `DrawElementsIndirectCommand` pick which slice of the EBO +to walk; everything else (base vertex, base instance, SSBO bindings, +shader) is unchanged. + +##### Decimator choice: `meshopt_simplifySloppy` + +The first attempt used `meshopt_simplify`, which is an edge-collapse +decimator. It returned every input mesh unchanged (`err = 0.0`) for two +reasons, both inherent to BIM brep output: + +1. **Per-triangle vertex duplication.** The instanced VBO stores each + triangle's vertices separately so that hard-edge normals can differ + across triangles. Topologically there are no shared vertices, so no + edges exist for `meshopt_simplify` to collapse. A + `meshopt_generateShadowIndexBuffer` welding pass (hash xyz only, + ignore the interleaved normal/colour) fixes this half cheaply — the + VBO isn't touched, only a per-call shadow index buffer is built. +2. **Non-manifold topology even after welding.** BIM brep output has + T-junctions, coplanar slivers, separate solids meeting at a plane, + and multi-material cuts. `meshopt_simplify` needs valid 2-manifold + edge pairs to score collapses; it refuses the non-manifold ones, the + priority queue never fires, and it returns the input untouched. + +`meshopt_simplifySloppy` is a **voxel-clustering decimator** — it +quantises positions into cells and merges everything in a cell to a +single point. Topology is irrelevant, so it works directly on the +original indices (welding isn't even needed). The trade-off is that it +rounds off sharp corners and can produce slightly degenerate triangles, +so it doesn't look great at mid-screen size. For a LOD1 that only +activates below 30 px projected radius that's invisible in practice. If +you ever want LOD1 to remain active at larger sizes, the only robust +fix is to pre-process BIM meshes into manifold form (fuse coplanar +faces, split at T-junctions) — a significant project unto itself. + +##### Tuning knobs (env vars) + +| Var | Default | Effect | +|-----|---------|--------| +| `IFC_LOD1_PX` | `30` | Projected sphere radius (px) below which LOD1 kicks in. `0` disables LOD1 entirely. | +| `IFC_LOD_SLOPPY` | `1` | `0` falls back to edge-collapse (`meshopt_simplify`) on shadow-welded indices. Typically produces zero LOD1 output for BIM — useful only for A/B comparison. | +| `IFC_LOD_ERROR` | `0.2` | Target relative error passed to meshopt. | +| `IFC_LOD_RATIO` | `0.25` | Target triangle-count ratio (LOD1 aims for 25 % of LOD0 tris). | +| `IFC_LOD_MIN_SAVINGS` | `0.25` | Reject the LOD1 result if it doesn't shave at least this fraction of triangles. | +| `IFC_LOD_LOCK_BORDER` | `0` | `1` re-enables `meshopt_SimplifyLockBorder` (only meaningful with `IFC_LOD_SLOPPY=0`). | +| `IFC_LOD_DEBUG` | `0` | `1` prints per-mesh `tris / target / got / err` for the first 8 candidate meshes plus an accept/reject summary per model. | + +##### Measured results + +Same 10-model / 128 M-tri scene as Phase 3A (GTX 1650), 2 px contribution +threshold, overview camera, all models finalised with LOD1 built: + +| Build | FPS | Frame time | Visible tris | Visible objs | +|-------|-----|-----------|--------------|--------------| +| Phase 3A alone (2 px) | 20.2 | 49 ms | 40 M | 89 k | +| Phase 3A + 3B (LOD1 ≤ 30 px) | **43.2** | **23 ms** | 14 M | 81 k | + +Roughly half the remaining frame time, same object count (LOD is +lossless w.r.t. visibility — swapping index slice doesn't hide +anything). The triangle reduction on meshes that qualified for LOD1 is +~80 %: e.g. 4.17 M → 0.82 M tris for the 3618 eligible meshes of Model +1, 3.25 M → 0.65 M for Model 2, etc. Only about 20 % of unique meshes +qualify (the threshold is 500 tris — below that the indirect-command +overhead dominates), but those are the fat tail carrying most of the +rasterisation cost. + +LOD build itself runs on the main thread inside `onStreamingFinished`; +typical cost is 100–600 ms per model, folded into the already-visible +"finalizing" step. Cached into the sidecar afterwards, so subsequent +opens skip it entirely. #### 3C. Hierarchical-Z occlusion culling (longer-term) @@ -490,7 +597,7 @@ Scene size Bottleneck Fix < 100k instances CPU cull scan Phase 1 only 100k–500k CPU cull scan BVH (Phase 2) — done 500k+ tris / overview shot GPU vertex + raster Phase 3A contribution cull - (+ 3B LOD for close-ups) + + Phase 3B LOD (done) multi-million + occluders redundant rasterisation Phase 3C HiZ occlusion ``` @@ -508,10 +615,10 @@ multi-million + occluders redundant rasterisation Phase 3C HiZ occlusion - [x] Reflection-aware two-pass draw for mirrored placements - [x] Backface culling (user-toggleable, default on) - [x] `reorient-shells` enabled in iterator -- [x] Perf diagnostic env vars (`IFC_SKIP_MDI`, `IFC_MAX_SUBDRAWS`, `IFC_MIN_PX`) +- [x] Perf diagnostic env vars (`IFC_SKIP_MDI`, `IFC_MAX_SUBDRAWS`, `IFC_MIN_PX`, `IFC_LOD1_PX`) - [x] Phase 3A — screen-space contribution culling -- [ ] **Phase 3B — distance / contribution LOD** (next) -- [ ] Phase 3C — Hierarchical-Z occlusion culling +- [x] Phase 3B — distance / contribution LOD (meshoptimizer `simplifySloppy`) +- [ ] **Phase 3C — Hierarchical-Z occlusion culling** (next) - [ ] Phase 3D — GPU-side compute-shader culling - [ ] Vulkan/MoltenVK backend for macOS - [ ] Embedded Python scripting console diff --git a/src/ifcviewer/SidecarCache.cpp b/src/ifcviewer/SidecarCache.cpp index 3c5ca9cd8d5..da3943988d2 100644 --- a/src/ifcviewer/SidecarCache.cpp +++ b/src/ifcviewer/SidecarCache.cpp @@ -17,7 +17,11 @@ * * ********************************************************************************/ -// v4 layout (all multi-byte fields native-endian; endianness marker in header): +// v5 layout (all multi-byte fields native-endian; endianness marker in header). +// Same sequence as v4; the only change is that MeshInfo grew two uint32_ts +// (lod1_ebo_byte_offset + lod1_index_count) and `indices` may contain extra +// appended LOD1 slices pointed at by those offsets. +// // // SidecarHeader (16 bytes) // uint64_t source_file_size diff --git a/src/ifcviewer/SidecarCache.h b/src/ifcviewer/SidecarCache.h index e14eb9d2561..332abdc8029 100644 --- a/src/ifcviewer/SidecarCache.h +++ b/src/ifcviewer/SidecarCache.h @@ -34,7 +34,10 @@ #include static constexpr uint32_t SIDECAR_MAGIC = 0x49465657; // "IFVW" -static constexpr uint32_t SIDECAR_VERSION = 4; +// v5 = MeshInfo extended with lod1_ebo_byte_offset + lod1_index_count (56 B). +// sd.indices may contain an appended LOD1 index slice for each mesh +// where meshoptimizer decimation produced useful output. +static constexpr uint32_t SIDECAR_VERSION = 5; static constexpr uint32_t SIDECAR_ENDIAN = 0x01020304; // Fixed-size element record. Strings are stored as (offset, length) pairs diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index db002c1870d..2606ffd3f3f 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -751,6 +751,34 @@ void ViewportWindow::applyCachedModel(uint32_t model_id, SidecarData data) { ssbo_bytes / (1024.0*1024.0)); } +void ViewportWindow::applyLodExtension(uint32_t model_id, const SidecarData& sd) { + if (!gl_initialized_) return; + auto it = models_gpu_.find(model_id); + if (it == models_gpu_.end() || !it->second.finalized) return; + ModelGpuData& m = it->second; + + const size_t total_ib_bytes = sd.indices.size() * sizeof(uint32_t); + if (total_ib_bytes <= m.ebo_used) { + // buildLods didn't add anything; just refresh the meshes vector in + // case lod1_* fields were touched. + m.meshes = sd.meshes; + return; + } + + context_->makeCurrent(this); + if (total_ib_bytes > m.ebo_capacity) { + if (!growModelEbo(m, total_ib_bytes)) return; + } + const size_t append_bytes = total_ib_bytes - m.ebo_used; + const uint32_t* appended_src = + sd.indices.data() + (m.ebo_used / sizeof(uint32_t)); + gl_->glNamedBufferSubData(m.ebo, m.ebo_used, append_bytes, appended_src); + m.ebo_used = total_ib_bytes; + + // Replace mesh metadata so cullAndUploadVisible sees the new lod1_ fields. + m.meshes = sd.meshes; +} + void ViewportWindow::resetScene() { if (!gl_initialized_) return; context_->makeCurrent(this); @@ -826,17 +854,33 @@ uint32_t ViewportWindow::pickObjectAt(int x, int y) { void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6][4], float focal_px, float min_pixel_radius) { - // Per-mesh scratch, split by winding: fwd = non-reflected (CCW in screen - // space), rev = reflected (CW in screen space). Splitting lets the draw + // Per-mesh scratch, split by winding × LOD. Winding split lets the draw // pass toggle glFrontFace once between two MDI calls so GL_CULL_FACE does - // the right thing for both. - if (visible_by_mesh_fwd_.size() < m.meshes.size()) visible_by_mesh_fwd_.resize(m.meshes.size()); - if (visible_by_mesh_rev_.size() < m.meshes.size()) visible_by_mesh_rev_.resize(m.meshes.size()); + // the right thing for both. LOD split means instances that want the + // decimated mesh go into a different bucket that emits against + // mesh.lod1_ebo_byte_offset / lod1_index_count. + auto resize_if = [&](std::vector>& v) { + if (v.size() < m.meshes.size()) v.resize(m.meshes.size()); + }; + resize_if(visible_by_mesh_fwd_lod0_); + resize_if(visible_by_mesh_fwd_lod1_); + resize_if(visible_by_mesh_rev_lod0_); + resize_if(visible_by_mesh_rev_lod1_); for (size_t i = 0; i < m.meshes.size(); ++i) { - visible_by_mesh_fwd_[i].clear(); - visible_by_mesh_rev_[i].clear(); + visible_by_mesh_fwd_lod0_[i].clear(); + visible_by_mesh_fwd_lod1_[i].clear(); + visible_by_mesh_rev_lod0_[i].clear(); + visible_by_mesh_rev_lod1_[i].clear(); } + // LOD1 switches in when projected sphere radius (in pixels) drops below + // this threshold. Overridable for tuning. Set to 0 to disable LOD1 + // entirely (always draw LOD0). + static const float lod1_px_threshold = []{ + const char* e = std::getenv("IFC_LOD1_PX"); + return (e && *e) ? static_cast(std::atof(e)) : 30.0f; + }(); + // Bounding-sphere contribution test: approximate an AABB by its enclosing // sphere (centre = midpoint, radius = half-diagonal). Project radius to // pixels as r_px = focal_px * r / distance (perspective). Reject if @@ -871,15 +915,44 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] return focal_px * radius >= min_pixel_radius * dist; }; + // Returns projected sphere radius in pixels (or +inf when camera is + // inside the AABB). Shares the geometry with contributionPasses; this + // version returns the value so we can also use it for LOD selection. + auto pixelRadius = [&](const float mn[3], const float mx[3]) -> float { + if (cx >= mn[0] && cx <= mx[0] && + cy >= mn[1] && cy <= mx[1] && + cz >= mn[2] && cz <= mx[2]) { + return std::numeric_limits::infinity(); + } + float ex = 0.5f * (mx[0] - mn[0]); + float ey = 0.5f * (mx[1] - mn[1]); + float ez = 0.5f * (mx[2] - mn[2]); + float radius = std::sqrt(ex*ex + ey*ey + ez*ez); + float dx = 0.5f * (mx[0] + mn[0]) - cx; + float dy = 0.5f * (mx[1] + mn[1]) - cy; + float dz = 0.5f * (mx[2] + mn[2]) - cz; + float dist = std::sqrt(dx*dx + dy*dy + dz*dz); + return dist > 0.0f ? focal_px * radius / dist + : std::numeric_limits::infinity(); + }; + auto test_and_push = [&](uint32_t inst_idx) { const InstanceCpu& inst = m.instances[inst_idx]; if (!aabbInFrustum(inst.world_aabb_min, inst.world_aabb_max, planes)) return; if (!contributionPasses(inst.world_aabb_min, inst.world_aabb_max)) return; if (inst.mesh_id >= m.meshes.size()) return; + const MeshInfo& mesh = m.meshes[inst.mesh_id]; + const bool want_lod1 = mesh.lod1_index_count > 0 && + lod1_px_threshold > 0.0f && + pixelRadius(inst.world_aabb_min, inst.world_aabb_max) < lod1_px_threshold; const bool reflected = inst_idx < m.instance_reflected.size() && m.instance_reflected[inst_idx] != 0; - if (reflected) visible_by_mesh_rev_[inst.mesh_id].push_back(inst_idx); - else visible_by_mesh_fwd_[inst.mesh_id].push_back(inst_idx); + auto& bucket = + reflected ? (want_lod1 ? visible_by_mesh_rev_lod1_ + : visible_by_mesh_rev_lod0_) + : (want_lod1 ? visible_by_mesh_fwd_lod1_ + : visible_by_mesh_fwd_lod0_); + bucket[inst.mesh_id].push_back(inst_idx); }; if (!m.bvh.nodes.empty()) { @@ -911,22 +984,28 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] for (uint32_t i = 0; i < m.instances.size(); ++i) test_and_push(i); } - // Flatten fwd-slice first, then rev-slice, into visible_flat_. Build - // matching DrawElementsIndirectCommands; commands for the fwd slice fill - // [0, indirect_forward_count), rev fills [indirect_forward_count, end). + // Flatten fwd-slice first (LOD0 then LOD1), then rev-slice (ditto), into + // visible_flat_. Commands for the fwd slice fill [0, indirect_forward_count), + // rev fills [indirect_forward_count, end). LOD0/LOD1 within a winding + // slice are contiguous — winding is what requires glFrontFace to flip + // between MDI calls, LOD is not. visible_flat_.clear(); indirect_scratch_.clear(); - auto emit_slice = [&](std::vector>& by_mesh) { + auto emit_slice = [&](std::vector>& by_mesh, int lod) { for (size_t mi = 0; mi < m.meshes.size(); ++mi) { const auto& mesh = m.meshes[mi]; const uint32_t vis_count = static_cast(by_mesh[mi].size()); - if (vis_count == 0 || mesh.index_count == 0) continue; + const uint32_t idx_count = + (lod == 1) ? mesh.lod1_index_count : mesh.index_count; + const uint32_t ebo_off = + (lod == 1) ? mesh.lod1_ebo_byte_offset : mesh.ebo_byte_offset; + if (vis_count == 0 || idx_count == 0) continue; DrawElementsIndirectCommand cmd; - cmd.count = mesh.index_count; + cmd.count = idx_count; cmd.instanceCount = vis_count; - cmd.firstIndex = mesh.ebo_byte_offset / sizeof(uint32_t); + cmd.firstIndex = ebo_off / sizeof(uint32_t); cmd.baseVertex = mesh.vbo_byte_offset / INSTANCED_VERTEX_STRIDE_BYTES; cmd.baseInstance = static_cast(visible_flat_.size()); indirect_scratch_.push_back(cmd); @@ -936,9 +1015,11 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] } }; - emit_slice(visible_by_mesh_fwd_); + emit_slice(visible_by_mesh_fwd_lod0_, 0); + emit_slice(visible_by_mesh_fwd_lod1_, 1); m.indirect_forward_count = static_cast(indirect_scratch_.size()); - emit_slice(visible_by_mesh_rev_); + emit_slice(visible_by_mesh_rev_lod0_, 0); + emit_slice(visible_by_mesh_rev_lod1_, 1); m.indirect_command_count = static_cast(indirect_scratch_.size()); // Upload visible list (keep binding alive even when empty). diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index a8d696121a2..fe54cce9210 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -127,6 +127,13 @@ class ViewportWindow : public QWindow { // any existing state for model_id and marks it drawable. void applyCachedModel(uint32_t model_id, SidecarData data); + // After buildLods() has extended sd.indices + populated lod1_* fields, + // push just the appended index slice + the refreshed mesh metadata onto + // the live GPU state for model_id. VBO / SSBO / instance array are left + // alone; only the EBO grows and m.meshes is replaced. No-op if the + // model isn't finalised on the viewport. + void applyLodExtension(uint32_t model_id, const SidecarData& sd); + void hideModel(uint32_t model_id); void showModel(uint32_t model_id); void removeModel(uint32_t model_id); @@ -223,8 +230,13 @@ class ViewportWindow : public QWindow { // per-frame allocation. indirect_scratch_ is the matching array of // DrawElementsIndirectCommand records — forward-declared as bytes so // the header doesn't need the struct definition. - std::vector> visible_by_mesh_fwd_; - std::vector> visible_by_mesh_rev_; + // Four buckets = {fwd, rev} × {LOD0, LOD1}. LOD1 buckets are only + // populated when the mesh has lod1_index_count > 0 and the projected + // pixel radius is below the LOD switch threshold. + std::vector> visible_by_mesh_fwd_lod0_; + std::vector> visible_by_mesh_fwd_lod1_; + std::vector> visible_by_mesh_rev_lod0_; + std::vector> visible_by_mesh_rev_lod1_; std::vector visible_flat_; std::vector indirect_scratch_; From 91c8e46d1de7a68ecf06517c44713a14ed16b0dd Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Mon, 13 Apr 2026 23:25:33 +1000 Subject: [PATCH 27/37] Phase 3C: Hierarchical-Z occlusion culling (CPU-side v1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the main draw, blit the MSAA default-framebuffer depth to a single-sample 256×128 depth texture, read it back, and build a CPU max-reduced mip pyramid. Next frame's cullAndUploadVisible projects each BVH node / instance AABB through the previous frame's VP and compares the AABB's nearest depth against the pyramid's deepest value at the matching mip level; strictly-beyond AABBs are rejected. Conservative direction (aabb_near > hiz_max) — never wrongly rejects a visible instance, so no flicker. BVH subtree-level test lets a single 8-corner projection reject up to a leaf's worth of instances. Tuning knobs: IFC_NO_HIZ=1 disables; IFC_HIZ_SIZE overrides base width. New stats counter hiz_rej shows rejects/frame. Measured: big win on interior views (GPU-bound), roughly zero net effect on exterior overviews (CPU-bound on cull traversal, so the saved GPU work is masked). Tried a 3-deep PBO ring for async readback and reverted — the extra frame of staleness produced visible flicker on fast orbit, and the synchronous readback wasn't actually a measured bottleneck at 256×128. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/README.md | 129 +++++++++++++-- src/ifcviewer/ViewportWindow.cpp | 264 ++++++++++++++++++++++++++++++- src/ifcviewer/ViewportWindow.h | 40 +++++ 3 files changed, 420 insertions(+), 13 deletions(-) diff --git a/src/ifcviewer/README.md b/src/ifcviewer/README.md index 82bd89555cd..afa20426103 100644 --- a/src/ifcviewer/README.md +++ b/src/ifcviewer/README.md @@ -565,15 +565,120 @@ typical cost is 100–600 ms per model, folded into the already-visible "finalizing" step. Cached into the sidecar afterwards, so subsequent opens skip it entirely. -#### 3C. Hierarchical-Z occlusion culling (longer-term) +#### 3C. Hierarchical-Z occlusion culling — ✅ done (v1, CPU-side) -Render large occluders first, build a depth pyramid, test instance -AABBs against it. In dense BIM most geometry is behind other geometry -from any given interior viewpoint; historically a 3–10× reduction in -drawn instances. Most valuable *after* 3A+3B, which together handle -the far-away and small-detail cases. Pairs naturally with GPU-side -culling (a compute shader doing the HiZ test and writing the visible -list + indirect buffer in place). +Reject frustum-visible instances whose AABB is fully behind something +already drawn. The last drawn frame's depth buffer is the oracle — if a +region's deepest rasterised fragment is closer than an AABB's nearest +point, nothing in that AABB can win the depth test. + +In dense BIM this matters most on interior views: standing inside a +building, 80–95 % of the model sits behind the walls of the current +room and contributes nothing to the frame. Phase 3A drops the +*distant-and-small* geometry, 3B drops its triangle count when kept, +and 3C drops the *close-and-big-but-hidden* bulk that neither of those +can touch. On an outdoor overview shot (nothing is occluded) 3C does +almost nothing — which is fine, 3A+3B already cover that case. + +##### Pipeline (v1: CPU-side, 1-frame stale) + +``` +render(): + draw main scene into MSAA default fb + axis gizmo + buildHizPyramid(): <-- new + glBlitFramebuffer MSAA depth → single-sample depth tex (256×128) + glReadPixels depth tex → CPU + max-reduce mip chain on CPU (8–9 levels) + store the VP that produced this frame + swapBuffers + +cullAndUploadVisible(): + per BVH node: frustum ∧ contribution ∧ hiz (subtree early-out) + per instance: frustum ∧ contribution ∧ hiz +``` + +The pyramid is always the *previous* frame's depth. On a newly loaded +scene or after a camera jump the cull is conservatively too permissive +for a frame or two (draws the occluded stuff by accident) and then +settles. No flicker because we never *wrongly reject* a visible +instance — the comparison is `aabb_near_depth > hiz_max`, so the +worst case is a kept instance that was actually occluded. + +##### Why CPU-side? + +Because the readback is cheap at this resolution (~128 KB / frame, +single glReadPixels ≈ 0.5 ms on PCIe) and the test itself is trivial +— ~100 k AABBs × 8 corners × a small mip lookup is well under a +millisecond on one thread. Phase 3D will port the cull to a compute +shader reading the pyramid as a texture, eliminating the readback; but +Phase 3C's CPU implementation was small enough to do first and +measure. + +No MSAA complication on the write side: we just blit the default +framebuffer's multi-sample depth into a single-sample texture (GL +handles the resolve). No separate occluder pass either — we use the +previous completed frame's depth buffer directly, which is what a +temporal-reprojection HiZ reduces to when the "occluder set" is +"everything visible last frame". + +##### The test + +```cpp +project 8 AABB corners through hiz_vp → NDC rect + min z +if any corner has w ≤ 0: return false // crosses near plane +if rect is outside [-1, 1]²: return false +pick mip level where rect ≤ 2×2 texels +hiz_max = max(pyramid[mip][covered texels]) +return aabb_near_depth > hiz_max +``` + +Comparing the AABB's *closest* point against the pyramid's *deepest* +value is the conservative direction — it only rejects when the AABB +is strictly beyond everything we already drew in that region. We pick +the mip at which the rect covers ≲ 2 texels on each axis so the lookup +is O(1) regardless of AABB size. + +##### BVH integration + +The same test runs on interior BVH node AABBs before leaf expansion, +so an occluded subtree skips all its instances in one shot. This is +where most of the per-frame cost savings show up on interior shots — +rejecting a 500-instance BVH subtree costs one 8-corner projection. + +##### Tuning knobs + +| Var | Default | Effect | +|-----|---------|--------| +| `IFC_NO_HIZ` | unset | `1` disables HiZ entirely (forces the Phase-3B-only path). | +| `IFC_HIZ_SIZE` | `256` | Base pyramid width in texels; height tracks viewport aspect. Raise for more accurate near-silhouette occlusion, lower to shrink readback. | + +The stats overlay gains one counter, `hiz_rej`, showing how many +instances per frame the HiZ test rejected. On outdoor overview shots +it hovers near zero; on indoor shots it climbs into the hundreds of +thousands and the frame time drops accordingly. + +##### Known caveats + +- **1 frame stale.** The pyramid is aligned to last frame's view, so + when you whip the camera across the scene we may draw one frame of + stuff that the new view would have occluded. Invisible in practice + at 60 fps. We tried a 3-deep PBO ring for async readback (2-frame + stale) and it produced visible flicker on fast orbits — reverted. +- **Readback syncs the GPU.** `glGetTextureImage` is blocking. + Measured cost is well under a millisecond at 256×128; not a + bottleneck on the machines tested. Phase 3D's compute-shader cull + removes it entirely. +- **Doesn't move the needle on overview shots.** Those scenes are + CPU-bound on the cull traversal itself, not GPU-bound on drawing, + so cutting the drawn-triangle count in half is invisible in the + frame time. `hiz_rej` still rises modestly on overviews (the frustum + hull contains everything behind visible walls) but saved GPU work + is masked by CPU cost. HiZ pays off on interior views, where the + GPU *was* the bottleneck. If a project never leaves overview, + `IFC_NO_HIZ=1` shaves the ~1 ms of HiZ cost. +- **Transparent geometry would need special handling**, but the + current renderer doesn't have any, so no-op for now. #### 3D. GPU-side culling via compute (longer-term) @@ -598,7 +703,7 @@ Scene size Bottleneck Fix 100k–500k CPU cull scan BVH (Phase 2) — done 500k+ tris / overview shot GPU vertex + raster Phase 3A contribution cull + Phase 3B LOD (done) -multi-million + occluders redundant rasterisation Phase 3C HiZ occlusion +multi-million + occluders redundant rasterisation Phase 3C HiZ (done, CPU readback) ``` ## Roadmap @@ -615,10 +720,10 @@ multi-million + occluders redundant rasterisation Phase 3C HiZ occlusion - [x] Reflection-aware two-pass draw for mirrored placements - [x] Backface culling (user-toggleable, default on) - [x] `reorient-shells` enabled in iterator -- [x] Perf diagnostic env vars (`IFC_SKIP_MDI`, `IFC_MAX_SUBDRAWS`, `IFC_MIN_PX`, `IFC_LOD1_PX`) +- [x] Perf diagnostic env vars (`IFC_SKIP_MDI`, `IFC_MAX_SUBDRAWS`, `IFC_MIN_PX`, `IFC_LOD1_PX`, `IFC_NO_HIZ`, `IFC_HIZ_SIZE`) - [x] Phase 3A — screen-space contribution culling - [x] Phase 3B — distance / contribution LOD (meshoptimizer `simplifySloppy`) -- [ ] **Phase 3C — Hierarchical-Z occlusion culling** (next) -- [ ] Phase 3D — GPU-side compute-shader culling +- [x] Phase 3C — Hierarchical-Z occlusion culling (v1, CPU-side readback) +- [ ] **Phase 3D — GPU-side compute-shader culling** (next; replaces the readback) - [ ] Vulkan/MoltenVK backend for macOS - [ ] Embedded Python scripting console diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index 2606ffd3f3f..fdfff63997c 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -343,6 +343,10 @@ ViewportWindow::~ViewportWindow() { if (pick_fbo_) gl_->glDeleteFramebuffers(1, &pick_fbo_); if (pick_color_tex_) gl_->glDeleteTextures(1, &pick_color_tex_); if (pick_depth_rbo_) gl_->glDeleteRenderbuffers(1, &pick_depth_rbo_); + if (hiz_fbo_) gl_->glDeleteFramebuffers(1, &hiz_fbo_); + if (hiz_depth_tex_) gl_->glDeleteTextures(1, &hiz_depth_tex_); + if (hiz_resolve_fbo_) gl_->glDeleteFramebuffers(1, &hiz_resolve_fbo_); + if (hiz_resolve_depth_tex_) gl_->glDeleteTextures(1, &hiz_resolve_depth_tex_); } context_->doneCurrent(); } @@ -821,6 +825,240 @@ void ViewportWindow::removeModel(uint32_t model_id) { void ViewportWindow::setSelectedObjectId(uint32_t id) { selected_object_id_ = id; } +// --- HiZ occlusion culling (Phase 3C) ----------------------------------- + +// Baseline HiZ resolution. 256x128 is enough to cull big occluders +// (walls, slabs) reliably; finer detail doesn't help much because we're +// sampling the pyramid at the mip level where the AABB's rect is ~2 +// texels anyway. Readback cost is ~128 KB/frame ≈ negligible. +// IFC_HIZ_SIZE= overrides the width; height tracks aspect. +static int hizBaseWidth() { + static const int w = []{ + const char* e = std::getenv("IFC_HIZ_SIZE"); + return (e && *e) ? std::max(64, std::atoi(e)) : 256; + }(); + return w; +} + +static bool hizEnabled() { + static const bool disabled = []{ + const char* e = std::getenv("IFC_NO_HIZ"); + return e && e[0] == '1'; + }(); + return !disabled; +} + +void ViewportWindow::buildHizPyramid() { + if (!gl_initialized_) return; + + const int win_w = width() * devicePixelRatio(); + const int win_h = height() * devicePixelRatio(); + if (win_w <= 0 || win_h <= 0) return; + + const int base_w = hizBaseWidth(); + const int base_h = std::max(1, (base_w * win_h) / win_w); + + // Depth format must match the default FBO's depth format for the blit + // to succeed — GL spec requires identical internal formats for depth + // blits. Qt's default surface uses 24-bit depth (setDepthBufferSize(24) + // in initGL), so we match with DEPTH_COMPONENT24 on both textures. + // + // Resolve target (full window size, single sample). Needed because + // GL also forbids scale-blitting from an MSAA source: resolve at 1:1 + // first, then down-blit. + if (win_w != hiz_resolve_w_ || win_h != hiz_resolve_h_) { + if (hiz_resolve_fbo_) gl_->glDeleteFramebuffers(1, &hiz_resolve_fbo_); + if (hiz_resolve_depth_tex_) gl_->glDeleteTextures(1, &hiz_resolve_depth_tex_); + gl_->glCreateTextures(GL_TEXTURE_2D, 1, &hiz_resolve_depth_tex_); + gl_->glTextureStorage2D(hiz_resolve_depth_tex_, 1, + GL_DEPTH_COMPONENT24, win_w, win_h); + gl_->glCreateFramebuffers(1, &hiz_resolve_fbo_); + gl_->glNamedFramebufferTexture(hiz_resolve_fbo_, GL_DEPTH_ATTACHMENT, + hiz_resolve_depth_tex_, 0); + hiz_resolve_w_ = win_w; + hiz_resolve_h_ = win_h; + } + + if (base_w != hiz_base_w_ || base_h != hiz_base_h_) { + if (hiz_fbo_) gl_->glDeleteFramebuffers(1, &hiz_fbo_); + if (hiz_depth_tex_) gl_->glDeleteTextures(1, &hiz_depth_tex_); + gl_->glCreateTextures(GL_TEXTURE_2D, 1, &hiz_depth_tex_); + gl_->glTextureStorage2D(hiz_depth_tex_, 1, GL_DEPTH_COMPONENT24, + base_w, base_h); + gl_->glCreateFramebuffers(1, &hiz_fbo_); + gl_->glNamedFramebufferTexture(hiz_fbo_, GL_DEPTH_ATTACHMENT, + hiz_depth_tex_, 0); + + hiz_base_w_ = base_w; + hiz_base_h_ = base_h; + hiz_depth_readback_.assign(base_w * base_h, 1.0f); + + // Build the mip-offset table. Level 0 = base_w x base_h. + hiz_mip_offset_.clear(); + hiz_mip_w_.clear(); + hiz_mip_h_.clear(); + uint32_t off = 0; + int mw = base_w, mh = base_h; + while (mw >= 1 && mh >= 1) { + hiz_mip_offset_.push_back(off); + hiz_mip_w_.push_back(static_cast(mw)); + hiz_mip_h_.push_back(static_cast(mh)); + off += static_cast(mw) * static_cast(mh); + if (mw == 1 && mh == 1) break; + mw = std::max(1, mw / 2); + mh = std::max(1, mh / 2); + } + hiz_pyramid_.assign(off, 1.0f); + } + + // Two-step: MSAA default-fb → full-size SS resolve, then SS → down-scaled. + // GL forbids scaling a blit whose source is multisampled, and also + // requires matching depth internal formats — hence this dance. + gl_->glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + gl_->glBindFramebuffer(GL_DRAW_FRAMEBUFFER, hiz_resolve_fbo_); + gl_->glBlitFramebuffer(0, 0, win_w, win_h, + 0, 0, win_w, win_h, + GL_DEPTH_BUFFER_BIT, GL_NEAREST); + + gl_->glBindFramebuffer(GL_READ_FRAMEBUFFER, hiz_resolve_fbo_); + gl_->glBindFramebuffer(GL_DRAW_FRAMEBUFFER, hiz_fbo_); + gl_->glBlitFramebuffer(0, 0, win_w, win_h, + 0, 0, hiz_base_w_, hiz_base_h_, + GL_DEPTH_BUFFER_BIT, GL_NEAREST); + gl_->glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + gl_->glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + + // One-shot diagnostic so blit failures aren't silent. We only warn + // the first handful of times — GL errors can pile up and spam. + static int err_warn_budget = 3; + if (err_warn_budget > 0) { + GLenum e = gl_->glGetError(); + if (e != GL_NO_ERROR) { + qWarning("HiZ blit/readback GL error 0x%04x (win %dx%d → %dx%d → %dx%d)", + e, win_w, win_h, win_w, win_h, hiz_base_w_, hiz_base_h_); + --err_warn_budget; + } + } + + // Synchronous readback into level 0 of the pyramid. At 256x128 this + // is ~128 KB and the driver copy is fast enough not to matter in + // practice; PBO-ring async was tried and made orbiting flicker worse + // (2-frame-stale depth vs 1-frame). + gl_->glGetTextureImage(hiz_depth_tex_, 0, GL_DEPTH_COMPONENT, GL_FLOAT, + static_cast(hiz_depth_readback_.size() * sizeof(float)), + hiz_depth_readback_.data()); + + // Copy level 0 into the pyramid, then max-reduce subsequent levels. + std::memcpy(hiz_pyramid_.data() + hiz_mip_offset_[0], + hiz_depth_readback_.data(), + hiz_depth_readback_.size() * sizeof(float)); + for (size_t lvl = 1; lvl < hiz_mip_offset_.size(); ++lvl) { + const uint32_t pw = hiz_mip_w_[lvl - 1]; + const uint32_t ph = hiz_mip_h_[lvl - 1]; + const uint32_t cw = hiz_mip_w_[lvl]; + const uint32_t ch = hiz_mip_h_[lvl]; + const float* parent = hiz_pyramid_.data() + hiz_mip_offset_[lvl - 1]; + float* child = hiz_pyramid_.data() + hiz_mip_offset_[lvl]; + for (uint32_t y = 0; y < ch; ++y) { + const uint32_t py0 = std::min(2 * y, ph - 1); + const uint32_t py1 = std::min(2 * y + 1, ph - 1); + for (uint32_t x = 0; x < cw; ++x) { + const uint32_t px0 = std::min(2 * x, pw - 1); + const uint32_t px1 = std::min(2 * x + 1, pw - 1); + const float a = parent[py0 * pw + px0]; + const float b = parent[py0 * pw + px1]; + const float c = parent[py1 * pw + px0]; + const float d = parent[py1 * pw + px1]; + child[y * cw + x] = std::max(std::max(a, b), std::max(c, d)); + } + } + } + + hiz_vp_ = proj_matrix_ * view_matrix_; + hiz_vp_valid_ = true; +} + +bool ViewportWindow::aabbOccludedByHiz(const float mn[3], const float mx[3]) const { + if (!hiz_vp_valid_ || hiz_pyramid_.empty()) return false; + + // Project all 8 corners through the HiZ frame's VP (stored last frame). + // Track NDC min/max over x, y, z. If any corner has w <= 0, the AABB + // straddles the near plane and we skip (behaves like "not occluded"). + float sx_min = std::numeric_limits::infinity(); + float sx_max = -std::numeric_limits::infinity(); + float sy_min = std::numeric_limits::infinity(); + float sy_max = -std::numeric_limits::infinity(); + float sz_min = std::numeric_limits::infinity(); + const float* vp = hiz_vp_.constData(); // column-major + for (int c = 0; c < 8; ++c) { + const float x = (c & 1) ? mx[0] : mn[0]; + const float y = (c & 2) ? mx[1] : mn[1]; + const float z = (c & 4) ? mx[2] : mn[2]; + const float cx = vp[0]*x + vp[4]*y + vp[8]*z + vp[12]; + const float cy = vp[1]*x + vp[5]*y + vp[9]*z + vp[13]; + const float cz = vp[2]*x + vp[6]*y + vp[10]*z + vp[14]; + const float cw = vp[3]*x + vp[7]*y + vp[11]*z + vp[15]; + if (cw <= 1e-4f) return false; // near-plane straddle + const float inv = 1.0f / cw; + const float nx = cx * inv; + const float ny = cy * inv; + const float nz = cz * inv; + if (nx < sx_min) sx_min = nx; if (nx > sx_max) sx_max = nx; + if (ny < sy_min) sy_min = ny; if (ny > sy_max) sy_max = ny; + if (nz < sz_min) sz_min = nz; + } + + if (sx_max < -1.0f || sx_min > 1.0f || + sy_max < -1.0f || sy_min > 1.0f) return false; + if (sz_min < -1.0f) return false; + + sx_min = std::max(sx_min, -1.0f); + sx_max = std::min(sx_max, 1.0f); + sy_min = std::max(sy_min, -1.0f); + sy_max = std::min(sy_max, 1.0f); + + const float u_min = 0.5f * (sx_min + 1.0f); + const float u_max = 0.5f * (sx_max + 1.0f); + const float v_min = 0.5f * (sy_min + 1.0f); + const float v_max = 0.5f * (sy_max + 1.0f); + const float aabb_near_depth = 0.5f * (sz_min + 1.0f); + + // Pick mip level where the projected rect covers at most 2 texels on + // each axis; sample the max over the covered texels there. + const float px_w = (u_max - u_min) * static_cast(hiz_base_w_); + const float px_h = (v_max - v_min) * static_cast(hiz_base_h_); + int mip = 0; + while ((int)hiz_mip_offset_.size() - 1 > mip && + ((px_w / (1 << mip)) > 2.0f || (px_h / (1 << mip)) > 2.0f)) { + ++mip; + } + + const uint32_t mw = hiz_mip_w_[mip]; + const uint32_t mh = hiz_mip_h_[mip]; + int x0 = static_cast(std::floor(u_min * mw)); + int x1 = static_cast(std::ceil (u_max * mw)); + int y0 = static_cast(std::floor(v_min * mh)); + int y1 = static_cast(std::ceil (v_max * mh)); + if (x0 < 0) x0 = 0; + if (y0 < 0) y0 = 0; + if (x1 > (int)mw) x1 = mw; + if (y1 > (int)mh) y1 = mh; + if (x1 <= x0 || y1 <= y0) return false; + + const float* level = hiz_pyramid_.data() + hiz_mip_offset_[mip]; + float hiz_max = 0.0f; + for (int y = y0; y < y1; ++y) { + const float* row = level + static_cast(y) * mw; + for (int x = x0; x < x1; ++x) { + if (row[x] > hiz_max) hiz_max = row[x]; + } + } + + // AABB's closest point must be strictly farther than everything drawn + // in the region for it to be fully occluded. + return aabb_near_depth > hiz_max; +} + uint32_t ViewportWindow::pickObjectAt(int x, int y) { if (!gl_initialized_) return 0; context_->makeCurrent(this); @@ -936,10 +1174,19 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] : std::numeric_limits::infinity(); }; + // HiZ occlusion is skipped entirely when the pick pass runs + // (min_pixel_radius == 0 on that path), when the user disables it via + // env var, or before the first pyramid has been built. + const bool hiz_on = hizEnabled() && min_pixel_radius > 0.0f && hiz_vp_valid_; + auto test_and_push = [&](uint32_t inst_idx) { const InstanceCpu& inst = m.instances[inst_idx]; if (!aabbInFrustum(inst.world_aabb_min, inst.world_aabb_max, planes)) return; if (!contributionPasses(inst.world_aabb_min, inst.world_aabb_max)) return; + if (hiz_on && aabbOccludedByHiz(inst.world_aabb_min, inst.world_aabb_max)) { + ++hiz_reject_count_; + return; + } if (inst.mesh_id >= m.meshes.size()) return; const MeshInfo& mesh = m.meshes[inst.mesh_id]; const bool want_lod1 = mesh.lod1_index_count > 0 && @@ -966,6 +1213,12 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] // Contribution cull the whole subtree: if the node's enclosing // sphere is below threshold, every child is too. if (!contributionPasses(n.aabb_min, n.aabb_max)) continue; + // HiZ cull the whole subtree: if the node AABB is fully + // occluded, every leaf is too. The conservative test (AABB + // near-depth vs max pyramid depth) never rejects a visible + // parent wrongly even when some children could have peeked + // through. + if (hiz_on && aabbOccludedByHiz(n.aabb_min, n.aabb_max)) continue; if (n.count > 0) { for (uint32_t k = 0; k < n.count; ++k) { uint32_t item_idx = m.bvh.item_indices[n.right_or_first + k]; @@ -1108,6 +1361,7 @@ void ViewportWindow::render() { visible_objects_ = 0; gl_draw_calls_ = 0; indirect_sub_draws_ = 0; + hiz_reject_count_ = 0; // Start each frame with CCW-is-front; the two-pass draw below flips // back and forth. Harmless when culling is off. @@ -1178,6 +1432,13 @@ void ViewportWindow::render() { renderAxisGizmo(); + // Build HiZ from this frame's resolved depth for next frame's cull. + // Synchronous glReadPixels inside — cost ~0.5 ms at 256x128 on a + // mid-range dGPU. Skippable via IFC_NO_HIZ=1. + if (hizEnabled()) { + buildHizPyramid(); + } + context_->swapBuffers(this); float dt = frame_clock_.restart() / 1000.0f; @@ -1215,12 +1476,13 @@ void ViewportWindow::render() { emit frameStatsUpdated(stats); qDebug("[frame] %.1f fps %.2f ms obj %u/%u tri %u/%u " - "meshes %u gl_draws %u sub_draws %u " + "meshes %u gl_draws %u sub_draws %u hiz_rej %u " "vram %.1f MB (vbo %.1f + ebo %.1f + ssbo %.1f) models %zu (%zu hidden)", last_fps_, 1000.0f / last_fps_, visible_objects_, total_obj, visible_triangles_, total_tri, total_meshes, gl_draw_calls_, indirect_sub_draws_, + hiz_reject_count_, (total_vbo + total_ebo + total_ssbo) / (1024.0*1024.0), total_vbo / (1024.0*1024.0), total_ebo / (1024.0*1024.0), diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index fe54cce9210..fd584bb4cb8 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -172,6 +172,20 @@ class ViewportWindow : public QWindow { void buildShaders(); void buildAxisGizmo(); void setupVaoLayout(GLuint vao, GLuint vbo, GLuint ebo); + + // Resolve the default framebuffer's MSAA depth into a single-sample + // texture, read it back, and max-reduce a mip pyramid on the CPU. The + // resulting pyramid is stored in hiz_pyramid_ along with the VP matrix + // used to draw it; next frame's cullAndUploadVisible can test AABBs + // against it. Synchronous readback — at 256×128 the cost is sub-ms + // and not a measured bottleneck; Phase 3D's compute-shader cull will + // eliminate the readback entirely. + void buildHizPyramid(); + + // True if the AABB is fully occluded by the previous frame's depth. + // Returns false when the HiZ is invalid, the AABB crosses the near + // plane, or the projection falls outside NDC. + bool aabbOccludedByHiz(const float mn[3], const float mx[3]) const; bool growModelVbo(ModelGpuData& m, size_t needed_total); bool growModelEbo(ModelGpuData& m, size_t needed_total); bool growModelSsbo(ModelGpuData& m, size_t needed_total); @@ -219,6 +233,32 @@ class ViewportWindow : public QWindow { int pick_width_ = 0; int pick_height_ = 0; + // HiZ occlusion culling (Phase 3C). + // + // Each frame after the main draw we blit the MSAA depth buffer down + // into a single-sample depth texture (hiz_fbo_ / hiz_depth_tex_), then + // glReadPixels it into hiz_depth_readback_. We max-reduce that into a + // mip pyramid (hiz_pyramid_) and remember the VP matrix used + // (hiz_vp_ + hiz_vp_valid_) so next frame's cull can test AABBs + // against a slightly-stale depth. Skipped for the pick pass and when + // IFC_NO_HIZ=1. + GLuint hiz_fbo_ = 0; + GLuint hiz_depth_tex_ = 0; + GLuint hiz_resolve_fbo_ = 0; // full-size single-sample resolve + GLuint hiz_resolve_depth_tex_ = 0; + int hiz_resolve_w_ = 0; + int hiz_resolve_h_ = 0; + int hiz_base_w_ = 0; + int hiz_base_h_ = 0; + std::vector hiz_depth_readback_; // hiz_base_w_ * hiz_base_h_ floats + std::vector hiz_pyramid_; // concatenated mip levels + std::vector hiz_mip_offset_; // into hiz_pyramid_ + std::vector hiz_mip_w_; + std::vector hiz_mip_h_; + QMatrix4x4 hiz_vp_; + bool hiz_vp_valid_ = false; + uint32_t hiz_reject_count_ = 0; // per-frame stat + // Per-frame stats uint32_t visible_triangles_ = 0; uint32_t visible_objects_ = 0; From cc680df236e1cb3d79a3baf55157a5d4f44dd300 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Tue, 14 Apr 2026 20:33:21 +1000 Subject: [PATCH 28/37] Cull: read AABBs from compact bvh_items in the hot path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cullAndUploadVisible was reading each instance's AABB through m.instances[idx] — a 104-byte InstanceCpu struct — for the frustum / contribution / HiZ tests. Only 24 of those bytes (the two float[3] AABBs) are actually used by the tests; the rest (4×4 transform + header) is pure cache-line waste, and with 569k instances the array is 59 MB, well past any cache. bvh_items[idx] already stores a 1:1 compact 28-byte record with the same AABB, built unconditionally in buildBvhForModel(). Switch the hot test path to read from it, and only touch InstanceCpu once an instance has passed all three tests (for mesh_id). Modest ~20 % drop in cull-traverse time on a 569k-object overview (26 ms → 21 ms). Also add four cull-phase timers (clr / trv / emt / upl) to the per-second stats line so future optimisation work has concrete numbers to chase. Confirmed via these timers that bucket clears, emit and GPU upload are all <1 ms combined; traversal is where the remaining CPU cost lives. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/ViewportWindow.cpp | 43 +++++++++++++++++++++++++++----- src/ifcviewer/ViewportWindow.h | 10 ++++++++ 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index fdfff63997c..a85b33bae40 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -1097,6 +1097,9 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] // the right thing for both. LOD split means instances that want the // decimated mesh go into a different bucket that emits against // mesh.lod1_ebo_byte_offset / lod1_index_count. + QElapsedTimer phase_timer; + phase_timer.start(); + auto resize_if = [&](std::vector>& v) { if (v.size() < m.meshes.size()) v.resize(m.meshes.size()); }; @@ -1110,6 +1113,8 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] visible_by_mesh_rev_lod0_[i].clear(); visible_by_mesh_rev_lod1_[i].clear(); } + cull_clear_ns_ += phase_timer.nsecsElapsed(); + phase_timer.restart(); // LOD1 switches in when projected sphere radius (in pixels) drops below // this threshold. Overridable for tuning. Set to 0 to disable LOD1 @@ -1179,19 +1184,26 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] // env var, or before the first pyramid has been built. const bool hiz_on = hizEnabled() && min_pixel_radius > 0.0f && hiz_vp_valid_; + // Hot path: read the AABB from the compact bvh_items array (28 B stride) + // rather than the wide InstanceCpu (104 B stride). Most instances fail + // frustum or contribution, so we want to avoid touching the wider struct + // until a survivor needs its mesh_id. This alone turns the cull from + // cache-miss-per-instance into stream-friendly linear reads. auto test_and_push = [&](uint32_t inst_idx) { - const InstanceCpu& inst = m.instances[inst_idx]; - if (!aabbInFrustum(inst.world_aabb_min, inst.world_aabb_max, planes)) return; - if (!contributionPasses(inst.world_aabb_min, inst.world_aabb_max)) return; - if (hiz_on && aabbOccludedByHiz(inst.world_aabb_min, inst.world_aabb_max)) { + const BvhItem& item = m.bvh_items[inst_idx]; + if (!aabbInFrustum(item.aabb_min, item.aabb_max, planes)) return; + if (!contributionPasses(item.aabb_min, item.aabb_max)) return; + if (hiz_on && aabbOccludedByHiz(item.aabb_min, item.aabb_max)) { ++hiz_reject_count_; return; } + // Survivor — now pay the wide-struct fetch for mesh_id. + const InstanceCpu& inst = m.instances[inst_idx]; if (inst.mesh_id >= m.meshes.size()) return; const MeshInfo& mesh = m.meshes[inst.mesh_id]; const bool want_lod1 = mesh.lod1_index_count > 0 && lod1_px_threshold > 0.0f && - pixelRadius(inst.world_aabb_min, inst.world_aabb_max) < lod1_px_threshold; + pixelRadius(item.aabb_min, item.aabb_max) < lod1_px_threshold; const bool reflected = inst_idx < m.instance_reflected.size() && m.instance_reflected[inst_idx] != 0; auto& bucket = @@ -1236,6 +1248,8 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] } else { for (uint32_t i = 0; i < m.instances.size(); ++i) test_and_push(i); } + cull_traverse_ns_ += phase_timer.nsecsElapsed(); + phase_timer.restart(); // Flatten fwd-slice first (LOD0 then LOD1), then rev-slice (ditto), into // visible_flat_. Commands for the fwd slice fill [0, indirect_forward_count), @@ -1274,6 +1288,8 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] emit_slice(visible_by_mesh_rev_lod0_, 0); emit_slice(visible_by_mesh_rev_lod1_, 1); m.indirect_command_count = static_cast(indirect_scratch_.size()); + cull_emit_ns_ += phase_timer.nsecsElapsed(); + phase_timer.restart(); // Upload visible list (keep binding alive even when empty). size_t vis_bytes = std::max(visible_flat_.size() * sizeof(uint32_t), @@ -1293,7 +1309,10 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] // Upload indirect command buffer. size_t ind_bytes = indirect_scratch_.size() * sizeof(DrawElementsIndirectCommand); - if (ind_bytes == 0) return; + if (ind_bytes == 0) { + cull_upload_ns_ += phase_timer.nsecsElapsed(); + return; + } if (m.indirect_buffer == 0 || m.indirect_capacity < ind_bytes) { if (m.indirect_buffer) gl_->glDeleteBuffers(1, &m.indirect_buffer); size_t new_cap = m.indirect_capacity ? m.indirect_capacity : 4096; @@ -1303,6 +1322,7 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] m.indirect_capacity = new_cap; } gl_->glNamedBufferSubData(m.indirect_buffer, 0, ind_bytes, indirect_scratch_.data()); + cull_upload_ns_ += phase_timer.nsecsElapsed(); } void ViewportWindow::updateCamera() { @@ -1446,6 +1466,7 @@ void ViewportWindow::render() { frame_count_++; if (accumulated_time_ >= 1.0f) { last_fps_ = static_cast(frame_count_) / accumulated_time_; + const uint32_t frames_in_window = static_cast(frame_count_); frame_count_ = 0; accumulated_time_ = 0.0f; @@ -1475,14 +1496,24 @@ void ViewportWindow::render() { stats.indirect_sub_draws = indirect_sub_draws_; emit frameStatsUpdated(stats); + const double inv_frames = frames_in_window > 0 + ? 1.0 / static_cast(frames_in_window) : 0.0; + const double clr_ms = cull_clear_ns_ * 1e-6 * inv_frames; + const double trv_ms = cull_traverse_ns_ * 1e-6 * inv_frames; + const double emt_ms = cull_emit_ns_ * 1e-6 * inv_frames; + const double upl_ms = cull_upload_ns_ * 1e-6 * inv_frames; + cull_clear_ns_ = cull_traverse_ns_ = cull_emit_ns_ = cull_upload_ns_ = 0; + qDebug("[frame] %.1f fps %.2f ms obj %u/%u tri %u/%u " "meshes %u gl_draws %u sub_draws %u hiz_rej %u " + "cull[clr %.2f trv %.2f emt %.2f upl %.2f]ms " "vram %.1f MB (vbo %.1f + ebo %.1f + ssbo %.1f) models %zu (%zu hidden)", last_fps_, 1000.0f / last_fps_, visible_objects_, total_obj, visible_triangles_, total_tri, total_meshes, gl_draw_calls_, indirect_sub_draws_, hiz_reject_count_, + clr_ms, trv_ms, emt_ms, upl_ms, (total_vbo + total_ebo + total_ssbo) / (1024.0*1024.0), total_vbo / (1024.0*1024.0), total_ebo / (1024.0*1024.0), diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index fd584bb4cb8..5d22f892884 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -259,6 +259,16 @@ class ViewportWindow : public QWindow { bool hiz_vp_valid_ = false; uint32_t hiz_reject_count_ = 0; // per-frame stat + // Cull-phase timers. Accumulated across all frames in the current + // 1-second stats window; divided by frame_count_ at print time to + // give per-frame average ms. Reset each window. Lets us see where + // CPU time actually goes: bucket clears vs BVH traversal vs emit vs + // GPU upload. + uint64_t cull_clear_ns_ = 0; + uint64_t cull_traverse_ns_ = 0; + uint64_t cull_emit_ns_ = 0; + uint64_t cull_upload_ns_ = 0; + // Per-frame stats uint32_t visible_triangles_ = 0; uint32_t visible_objects_ = 0; From a17399212969e7e23249cf3736a60ea92b892c39 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Tue, 14 Apr 2026 20:50:58 +1000 Subject: [PATCH 29/37] Cull: skip cullAndUploadVisible + HiZ on still frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit render() was re-running the full cull every 16 ms timer tick even when nothing had changed — the camera matrices, scene state, and therefore visible set were all identical to the previous frame's. The GPU was still happy to redraw from the cached indirect buffer, but the CPU was burning 21 ms/frame rebuilding the same visible list. Detect the no-op case by comparing view/proj against last_cull_view_ / last_cull_proj_ and checking a scene-dirty flag (have_cached_cull_) that every mutator on models_gpu_ invalidates — finalizeModel, applyCachedModel, applyLodExtension, hide/show/remove/reset, and uploadInstanceChunk. When the check passes we skip both cullAndUploadVisible and buildHizPyramid (the depth buffer is bit-identical, so re-reading it produces the same pyramid). Per-model visible_objects / visible_triangles stats now live on ModelGpuData so the stats line reports correct numbers on skipped frames instead of reading from a stale indirect_scratch_. Measured on a 569k-object overview: still frames go 22 fps → 62 fps; orbiting goes 23 fps → ~30-50 fps depending on how hard you move the mouse (the cull only pays its full cost on the ~25 % of frames where the camera actually moved). The stats line gains a "skipped N/M" field so you can see the ratio live. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/ViewportWindow.cpp | 72 +++++++++++++++++++++++++++----- src/ifcviewer/ViewportWindow.h | 16 +++++++ 2 files changed, 77 insertions(+), 11 deletions(-) diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index a85b33bae40..2070467362f 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -612,6 +612,7 @@ void ViewportWindow::uploadInstanceChunk(const InstanceChunk& chunk) { if (chunk.local_mesh_id < m.meshes.size()) { m.total_triangles += m.meshes[chunk.local_mesh_id].index_count / 3; } + have_cached_cull_ = false; } void ViewportWindow::finalizeModel(uint32_t model_id) { @@ -635,6 +636,7 @@ void ViewportWindow::finalizeModel(uint32_t model_id) { buildBvhForModel(m, model_id); m.finalized = true; + have_cached_cull_ = false; const size_t ssbo_bytes = m.ssbo_instance_count * sizeof(InstanceGpu); qDebug("Model %u finalized: %zu verts, %zu meshes, %zu instances, %.1f MB vram " @@ -743,6 +745,7 @@ void ViewportWindow::applyCachedModel(uint32_t model_id, SidecarData data) { m.finalized = true; models_gpu_.emplace(model_id, std::move(m)); + have_cached_cull_ = false; qDebug("Sidecar apply: model %u %zu verts, %zu meshes, %zu instances " "%.1f MB vram (vbo %.1f + ebo %.1f + ssbo %.1f)", @@ -766,6 +769,7 @@ void ViewportWindow::applyLodExtension(uint32_t model_id, const SidecarData& sd) // buildLods didn't add anything; just refresh the meshes vector in // case lod1_* fields were touched. m.meshes = sd.meshes; + have_cached_cull_ = false; return; } @@ -781,6 +785,7 @@ void ViewportWindow::applyLodExtension(uint32_t model_id, const SidecarData& sd) // Replace mesh metadata so cullAndUploadVisible sees the new lod1_ fields. m.meshes = sd.meshes; + have_cached_cull_ = false; } void ViewportWindow::resetScene() { @@ -796,16 +801,23 @@ void ViewportWindow::resetScene() { } models_gpu_.clear(); selected_object_id_ = 0; + have_cached_cull_ = false; } void ViewportWindow::hideModel(uint32_t model_id) { auto it = models_gpu_.find(model_id); - if (it != models_gpu_.end()) it->second.hidden = true; + if (it != models_gpu_.end()) { + it->second.hidden = true; + have_cached_cull_ = false; + } } void ViewportWindow::showModel(uint32_t model_id) { auto it = models_gpu_.find(model_id); - if (it != models_gpu_.end()) it->second.hidden = false; + if (it != models_gpu_.end()) { + it->second.hidden = false; + have_cached_cull_ = false; + } } void ViewportWindow::removeModel(uint32_t model_id) { @@ -820,6 +832,7 @@ void ViewportWindow::removeModel(uint32_t model_id) { if (it->second.visible_ssbo) gl_->glDeleteBuffers(1, &it->second.visible_ssbo); if (it->second.indirect_buffer) gl_->glDeleteBuffers(1, &it->second.indirect_buffer); models_gpu_.erase(it); + have_cached_cull_ = false; } } @@ -1288,6 +1301,17 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] emit_slice(visible_by_mesh_rev_lod0_, 0); emit_slice(visible_by_mesh_rev_lod1_, 1); m.indirect_command_count = static_cast(indirect_scratch_.size()); + + // Per-model stats snapshot — summed into the frame counters regardless + // of whether this frame ran a full cull or reused the cached one. + uint32_t model_vis_obj = 0, model_vis_tri = 0; + for (const auto& cmd : indirect_scratch_) { + model_vis_tri += (cmd.count / 3) * cmd.instanceCount; + model_vis_obj += cmd.instanceCount; + } + m.cached_visible_objects = model_vis_obj; + m.cached_visible_triangles = model_vis_tri; + cull_emit_ns_ += phase_timer.nsecsElapsed(); phase_timer.restart(); @@ -1381,7 +1405,23 @@ void ViewportWindow::render() { visible_objects_ = 0; gl_draw_calls_ = 0; indirect_sub_draws_ = 0; - hiz_reject_count_ = 0; + // Only reset hiz_reject_count_ on frames where we actually re-cull; + // otherwise we'd wipe the previous cull's number and print 0 every + // still frame. See the cull_this_frame branch below. + + // Decide whether this frame's view+scene is identical to the last + // successful cull. If so the per-model indirect buffers / visible + // SSBOs are still valid — we just re-issue the draws from them and + // skip the expensive cull traversal entirely. + const bool camera_unchanged = have_cached_cull_ + && last_cull_view_ == view_matrix_ + && last_cull_proj_ == proj_matrix_; + const bool cull_this_frame = !camera_unchanged; + if (cull_this_frame) { + hiz_reject_count_ = 0; + } else { + ++cull_skipped_frames_; + } // Start each frame with CCW-is-front; the two-pass draw below flips // back and forth. Harmless when culling is off. @@ -1390,7 +1430,9 @@ void ViewportWindow::render() { for (auto& [model_id, m] : models_gpu_) { if (m.hidden || !m.ssbo || m.ssbo_instance_count == 0) continue; - cullAndUploadVisible(m, planes, focal_px, min_pixel_radius); + if (cull_this_frame) { + cullAndUploadVisible(m, planes, focal_px, min_pixel_radius); + } if (m.indirect_command_count == 0) continue; gl_->glBindVertexArray(m.vao); @@ -1442,20 +1484,25 @@ void ViewportWindow::render() { gl_->glFrontFace(GL_CCW); } - for (const auto& cmd : indirect_scratch_) { - visible_triangles_ += (cmd.count / 3) * cmd.instanceCount; - visible_objects_ += cmd.instanceCount; - } + visible_triangles_ += m.cached_visible_triangles; + visible_objects_ += m.cached_visible_objects; indirect_sub_draws_ += m.indirect_command_count; } + if (cull_this_frame) { + last_cull_view_ = view_matrix_; + last_cull_proj_ = proj_matrix_; + have_cached_cull_ = true; + } gl_->glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); renderAxisGizmo(); // Build HiZ from this frame's resolved depth for next frame's cull. // Synchronous glReadPixels inside — cost ~0.5 ms at 256x128 on a - // mid-range dGPU. Skippable via IFC_NO_HIZ=1. - if (hizEnabled()) { + // mid-range dGPU. Skippable via IFC_NO_HIZ=1. Also skipped on + // still frames: if we didn't re-cull, the depth buffer is + // bit-identical to the one we already turned into a pyramid. + if (hizEnabled() && cull_this_frame) { buildHizPyramid(); } @@ -1503,10 +1550,12 @@ void ViewportWindow::render() { const double emt_ms = cull_emit_ns_ * 1e-6 * inv_frames; const double upl_ms = cull_upload_ns_ * 1e-6 * inv_frames; cull_clear_ns_ = cull_traverse_ns_ = cull_emit_ns_ = cull_upload_ns_ = 0; + const uint32_t skipped = cull_skipped_frames_; + cull_skipped_frames_ = 0; qDebug("[frame] %.1f fps %.2f ms obj %u/%u tri %u/%u " "meshes %u gl_draws %u sub_draws %u hiz_rej %u " - "cull[clr %.2f trv %.2f emt %.2f upl %.2f]ms " + "cull[clr %.2f trv %.2f emt %.2f upl %.2f]ms skipped %u/%u " "vram %.1f MB (vbo %.1f + ebo %.1f + ssbo %.1f) models %zu (%zu hidden)", last_fps_, 1000.0f / last_fps_, visible_objects_, total_obj, @@ -1514,6 +1563,7 @@ void ViewportWindow::render() { total_meshes, gl_draw_calls_, indirect_sub_draws_, hiz_reject_count_, clr_ms, trv_ms, emt_ms, upl_ms, + skipped, frames_in_window, (total_vbo + total_ebo + total_ssbo) / (1024.0*1024.0), total_vbo / (1024.0*1024.0), total_ebo / (1024.0*1024.0), diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index 5d22f892884..26c6d20b588 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -79,6 +79,13 @@ struct ModelGpuData { std::vector instance_reflected; uint32_t ssbo_instance_count = 0; + // Stats snapshot from the last cullAndUploadVisible call. Cached so we + // can report the same numbers on skipped-cull frames (see + // have_cached_cull_ on ViewportWindow) without iterating the per-model + // scratch array again. + uint32_t cached_visible_objects = 0; + uint32_t cached_visible_triangles = 0; + // Per-instance world AABB + BVH (built at finalize). The BVH is the // same ordering as `instances`; bvh_items[i] corresponds to instances[i]. std::vector bvh_items; @@ -268,6 +275,15 @@ class ViewportWindow : public QWindow { uint64_t cull_traverse_ns_ = 0; uint64_t cull_emit_ns_ = 0; uint64_t cull_upload_ns_ = 0; + uint32_t cull_skipped_frames_ = 0; + + // Skip cullAndUploadVisible + buildHizPyramid when the camera and scene + // haven't changed since the last cull. The existing per-model + // indirect_buffer / visible_ssbo are still correct and just get + // redrawn. Invalidated by any function that mutates models_gpu_. + QMatrix4x4 last_cull_view_; + QMatrix4x4 last_cull_proj_; + bool have_cached_cull_ = false; // Per-frame stats uint32_t visible_triangles_ = 0; From bbe644e92c283c945ba6840c4128fd98ff8ce551 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Tue, 14 Apr 2026 21:05:27 +1000 Subject: [PATCH 30/37] ifcviewer: event-driven rendering, idle scenes cost zero CPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaced the 16ms QTimer with QEvent::UpdateRequest delivered via requestUpdate(), posted from every state mutator (mouse/wheel, model lifecycle, selection, visibility, resize). A static BIM scene — the common case for a viewer — now does no work at all between user actions. FPS is now measured as time spent inside render() rather than wall-clock gap between frames, so idle gaps don't pollute the 1-second window and the headline number reflects real render throughput. Headline fps still caps at vsync; sub-vsync profiling lives in the cull[...] phase timers. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/ViewportWindow.cpp | 53 +++++++++++++++++++++++++------- src/ifcviewer/ViewportWindow.h | 8 +++-- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index 2070467362f..a48ef7f6d42 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -317,10 +317,12 @@ ViewportWindow::ViewportWindow(QWindow* parent) fmt.setSamples(4); setFormat(fmt); - connect(&render_timer_, &QTimer::timeout, this, [this]() { - if (isExposed()) render(); - }); - render_timer_.setInterval(16); + // Redraw is driven by QEvent::UpdateRequest. We post one via + // requestUpdate() from every function that mutates visible state + // (mouse/wheel, model lifecycle, selection, resize). When nothing + // changes — the common case for a static BIM model — we don't burn + // CPU/GPU redrawing the same frame. Qt coalesces multiple + // requestUpdate() calls inside a single vblank. } ViewportWindow::~ViewportWindow() { @@ -381,11 +383,11 @@ void ViewportWindow::initGL() { context_->makeCurrent(this); if (on) gl_->glEnable(GL_CULL_FACE); else gl_->glDisable(GL_CULL_FACE); + requestUpdate(); }); gl_initialized_ = true; - frame_clock_.start(); - render_timer_.start(); + requestUpdate(); emit initialized(); } @@ -613,6 +615,7 @@ void ViewportWindow::uploadInstanceChunk(const InstanceChunk& chunk) { m.total_triangles += m.meshes[chunk.local_mesh_id].index_count / 3; } have_cached_cull_ = false; + requestUpdate(); } void ViewportWindow::finalizeModel(uint32_t model_id) { @@ -637,6 +640,7 @@ void ViewportWindow::finalizeModel(uint32_t model_id) { m.finalized = true; have_cached_cull_ = false; + requestUpdate(); const size_t ssbo_bytes = m.ssbo_instance_count * sizeof(InstanceGpu); qDebug("Model %u finalized: %zu verts, %zu meshes, %zu instances, %.1f MB vram " @@ -746,6 +750,7 @@ void ViewportWindow::applyCachedModel(uint32_t model_id, SidecarData data) { m.finalized = true; models_gpu_.emplace(model_id, std::move(m)); have_cached_cull_ = false; + requestUpdate(); qDebug("Sidecar apply: model %u %zu verts, %zu meshes, %zu instances " "%.1f MB vram (vbo %.1f + ebo %.1f + ssbo %.1f)", @@ -770,6 +775,7 @@ void ViewportWindow::applyLodExtension(uint32_t model_id, const SidecarData& sd) // case lod1_* fields were touched. m.meshes = sd.meshes; have_cached_cull_ = false; + requestUpdate(); return; } @@ -786,6 +792,7 @@ void ViewportWindow::applyLodExtension(uint32_t model_id, const SidecarData& sd) // Replace mesh metadata so cullAndUploadVisible sees the new lod1_ fields. m.meshes = sd.meshes; have_cached_cull_ = false; + requestUpdate(); } void ViewportWindow::resetScene() { @@ -802,6 +809,7 @@ void ViewportWindow::resetScene() { models_gpu_.clear(); selected_object_id_ = 0; have_cached_cull_ = false; + requestUpdate(); } void ViewportWindow::hideModel(uint32_t model_id) { @@ -809,6 +817,7 @@ void ViewportWindow::hideModel(uint32_t model_id) { if (it != models_gpu_.end()) { it->second.hidden = true; have_cached_cull_ = false; + requestUpdate(); } } @@ -817,6 +826,7 @@ void ViewportWindow::showModel(uint32_t model_id) { if (it != models_gpu_.end()) { it->second.hidden = false; have_cached_cull_ = false; + requestUpdate(); } } @@ -833,10 +843,14 @@ void ViewportWindow::removeModel(uint32_t model_id) { if (it->second.indirect_buffer) gl_->glDeleteBuffers(1, &it->second.indirect_buffer); models_gpu_.erase(it); have_cached_cull_ = false; + requestUpdate(); } } -void ViewportWindow::setSelectedObjectId(uint32_t id) { selected_object_id_ = id; } +void ViewportWindow::setSelectedObjectId(uint32_t id) { + selected_object_id_ = id; + requestUpdate(); +} // --- HiZ occlusion culling (Phase 3C) ----------------------------------- @@ -1367,6 +1381,9 @@ void ViewportWindow::updateCamera() { void ViewportWindow::render() { if (!gl_initialized_ || !isExposed()) return; + QElapsedTimer frame_cost_clock; + frame_cost_clock.start(); + context_->makeCurrent(this); updateCamera(); @@ -1508,8 +1525,13 @@ void ViewportWindow::render() { context_->swapBuffers(this); - float dt = frame_clock_.restart() / 1000.0f; - accumulated_time_ += dt; + // Measure frame *cost* (time spent inside render()) rather than the + // wall-clock gap between frames. With event-driven rendering, idle gaps + // between requestUpdate() calls would otherwise pollute the FPS window. + // Reported fps = "if I rendered continuously, this is the rate I'd hit", + // which is what profiling actually wants. + const float frame_cost_s = frame_cost_clock.nsecsElapsed() * 1e-9f; + accumulated_time_ += frame_cost_s; frame_count_++; if (accumulated_time_ >= 1.0f) { last_fps_ = static_cast(frame_count_) / accumulated_time_; @@ -1649,13 +1671,19 @@ void ViewportWindow::renderAxisGizmo() { } void ViewportWindow::exposeEvent(QExposeEvent*) { - if (isExposed() && !gl_initialized_) initGL(); + if (isExposed()) { + if (!gl_initialized_) initGL(); + else requestUpdate(); + } } void ViewportWindow::resizeEvent(QResizeEvent*) { - if (gl_initialized_) render(); + if (gl_initialized_) requestUpdate(); } bool ViewportWindow::event(QEvent* e) { switch (e->type()) { + case QEvent::UpdateRequest: + if (isExposed() && gl_initialized_) render(); + return true; case QEvent::MouseButtonPress: handleMousePress(static_cast(e)); return true; case QEvent::MouseButtonRelease: handleMouseRelease(static_cast(e)); return true; case QEvent::MouseMove: handleMouseMove(static_cast(e)); return true; @@ -1673,6 +1701,7 @@ void ViewportWindow::handleMouseRelease(QMouseEvent* e) { uint32_t id = pickObjectAt(e->pos().x(), e->pos().y()); selected_object_id_ = id; emit objectPicked(id); + requestUpdate(); // selection highlight changed } active_button_ = Qt::NoButton; } @@ -1695,10 +1724,12 @@ void ViewportWindow::handleMouseMove(QMouseEvent* e) { camera_pitch_ += delta.y() * 0.3f; camera_pitch_ = qBound(-89.0f, camera_pitch_, 89.0f); } + requestUpdate(); } } void ViewportWindow::handleWheel(QWheelEvent* e) { float factor = e->angleDelta().y() > 0 ? 0.9f : 1.1f; camera_distance_ *= factor; camera_distance_ = qMax(0.1f, camera_distance_); + requestUpdate(); } diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index 26c6d20b588..0a95ede0775 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -107,6 +106,11 @@ struct ModelGpuData { bool hidden = false; }; +// Rendering is event-driven: render() runs only when QEvent::UpdateRequest +// is delivered, posted via requestUpdate(). An idle scene costs zero CPU. +// INVARIANT: every public mutator that changes what should be on screen +// (camera, selection, model lifecycle, visibility) MUST call requestUpdate() +// before returning, or the viewport will go silently stale. class ViewportWindow : public QWindow { Q_OBJECT public: @@ -217,8 +221,6 @@ class ViewportWindow : public QWindow { QOpenGLContext* context_ = nullptr; QOpenGLFunctions_4_5_Core* gl_ = nullptr; - QTimer render_timer_; - QElapsedTimer frame_clock_; bool gl_initialized_ = false; // Shaders From 91198c99cf3660911dda839bfd281ab0a1cfe61c Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Tue, 14 Apr 2026 21:54:15 +1000 Subject: [PATCH 31/37] ifcviewer: quantize VBO to 16 B/vertex (sidecar v6) Position now u16x3 normalized against each mesh's local AABB; normal oct-encoded to i16x2; RGBA8 colour unchanged. Per-mesh dequant basis lives in a new MeshGpu SSBO at binding 2; both main and pick shaders mix() against it before applying the instance transform. Drops VBO and sidecar size by ~43 % (28 -> 16 B/vert), which matters mostly for warm-load downloads of precomputed sidecars and steady-state VRAM. LodBuilder dequantizes positions into a scratch buffer before calling meshopt, since meshoptimizer needs float positions. Also fixes a streaming-time crash in cullAndUploadVisible: bvh_items was only populated at finalize, but the linear fallback indexes it during streaming. Mirror BvhItem appends in uploadInstanceChunk so the hot path stays valid before the BVH is built. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/InstancedGeometry.h | 39 ++++- src/ifcviewer/LodBuilder.cpp | 36 ++++- src/ifcviewer/LodBuilder.h | 7 +- src/ifcviewer/MainWindow.cpp | 2 +- src/ifcviewer/README.md | 20 ++- src/ifcviewer/SidecarCache.cpp | 11 +- src/ifcviewer/SidecarCache.h | 11 +- src/ifcviewer/ViewportWindow.cpp | 243 +++++++++++++++++++++++++----- src/ifcviewer/ViewportWindow.h | 2 + 9 files changed, 299 insertions(+), 72 deletions(-) diff --git a/src/ifcviewer/InstancedGeometry.h b/src/ifcviewer/InstancedGeometry.h index ef79751806a..729e4df1474 100644 --- a/src/ifcviewer/InstancedGeometry.h +++ b/src/ifcviewer/InstancedGeometry.h @@ -24,14 +24,36 @@ #include #include -// Per-vertex layout for instanced meshes, stored in local coordinates. -// 28 bytes per vertex: -// pos(3 float) -- 12 B -// normal(3 float) -- 12 B -// color(4 bytes RGBA8, read as GL_UNSIGNED_BYTE*4 normalized) -- 4 B -static constexpr int INSTANCED_VERTEX_STRIDE_BYTES = 28; +// Per-vertex layout for instanced meshes, stored in local coordinates, +// quantized against each mesh's local AABB. 16 bytes per vertex: +// offset 0 pos 3 x uint16 normalized -> [0,1]; dequant to +// mix(mesh.aabb_min, mesh.aabb_max, t) +// offset 6 _pad 2 bytes +// offset 8 normal 2 x int16 normalized -> [-1,1]; octahedral-decoded +// offset 12 color 4 x uint8 normalized -> [0,1] +// +// Quantization basis is per mesh, stored in the MeshGpu SSBO bound at +// binding=2. The vertex shader looks up its basis via the instance's mesh_id. +static constexpr int INSTANCED_VERTEX_STRIDE_BYTES = 16; + +// Streamer-side intermediate format: 7 floats per vertex (pos3 + normal3 + +// color-as-float). GeometryStreamer writes this into MeshChunk.vertices; +// ViewportWindow::uploadMeshChunk quantizes it down to STRIDE_BYTES on the +// way to the VBO. Not the GPU layout — purely a transfer convention. static constexpr int INSTANCED_VERTEX_STRIDE_FLOATS = 7; +static constexpr int INSTANCED_VERTEX_POS_OFFSET = 0; +static constexpr int INSTANCED_VERTEX_NORMAL_OFFSET = 8; +static constexpr int INSTANCED_VERTEX_COLOR_OFFSET = 12; + +// Per-mesh quantization basis, uploaded to a std430 SSBO. Two vec4s so +// std430 layout is trivial (no alignment surprises). w components unused. +struct alignas(16) MeshGpu { + float aabb_min[4]; // xyz = local AABB min; w = 0 + float aabb_max[4]; // xyz = local AABB max; w = 0 +}; +static_assert(sizeof(MeshGpu) == 32, "MeshGpu must be 32 bytes"); + // Per-mesh metadata on the CPU side. Meshes own a slice of the model's // VBO (shared across LODs) and one or more slices of the EBO, one per LOD. // @@ -61,12 +83,13 @@ static_assert(sizeof(MeshInfo) == 56, "MeshInfo must be 56 bytes"); // mat4 transform (64 B column-major) // uint object_id // uint color_override_rgba8 -- 0 = use baked vertex color, else override -// uint _pad0, _pad1 -- align to 16 for std430 +// uint mesh_id -- index into per-model MeshGpu[] +// uint _pad1 -- align to 16 for std430 struct alignas(16) InstanceGpu { float transform[16]; uint32_t object_id = 0; uint32_t color_override_rgba8 = 0; - uint32_t _pad0 = 0; + uint32_t mesh_id = 0; // index into per-model MeshGpu[] uint32_t _pad1 = 0; }; static_assert(sizeof(InstanceGpu) == 80, "InstanceGpu must be 80 bytes"); diff --git a/src/ifcviewer/LodBuilder.cpp b/src/ifcviewer/LodBuilder.cpp index 88b8c9f0468..35b97df44a6 100644 --- a/src/ifcviewer/LodBuilder.cpp +++ b/src/ifcviewer/LodBuilder.cpp @@ -33,9 +33,8 @@ void buildLods(SidecarData& sd, float target_error) { if (sd.meshes.empty() || sd.vertices.empty() || sd.indices.empty()) return; - const size_t vtx_stride_bytes = INSTANCED_VERTEX_STRIDE_BYTES; - const size_t vtx_stride_floats = INSTANCED_VERTEX_STRIDE_FLOATS; - const size_t total_vertex_count = sd.vertices.size() / vtx_stride_floats; + const size_t vtx_stride_bytes = INSTANCED_VERTEX_STRIDE_BYTES; + const size_t total_vertex_count = sd.vertices.size() / vtx_stride_bytes; // Env var knobs so we can tune without rebuilding. // IFC_LOD_LOCK_BORDER=1 re-enable LockBorder (off by default: BIM @@ -73,8 +72,10 @@ void buildLods(SidecarData& sd, // Scratch buffers reused across meshes so we only allocate once. std::vector simplified; std::vector shadow; + std::vector dequant_pos; // 3 floats/vertex, dequantized simplified.reserve(1024); shadow.reserve(1024); + dequant_pos.reserve(1024 * 3); int dbg_printed = 0; int dbg_rejected_savings = 0; @@ -101,8 +102,27 @@ void buildLods(SidecarData& sd, const uint32_t first_index = mesh.ebo_byte_offset / sizeof(uint32_t); if (first_index + mesh.index_count > sd.indices.size()) continue; - const float* positions = - sd.vertices.data() + base_vertex * vtx_stride_floats; + // Dequantize positions for this mesh into a temp float array. + // meshopt needs contiguous float3 positions with a known stride; + // quantized bytes aren't directly usable. + const uint8_t* quant_base = + sd.vertices.data() + base_vertex * vtx_stride_bytes; + dequant_pos.resize(static_cast(mesh.vertex_count) * 3); + const float extent[3] = { + mesh.local_aabb_max[0] - mesh.local_aabb_min[0], + mesh.local_aabb_max[1] - mesh.local_aabb_min[1], + mesh.local_aabb_max[2] - mesh.local_aabb_min[2], + }; + for (uint32_t v = 0; v < mesh.vertex_count; ++v) { + const uint16_t* p = reinterpret_cast( + quant_base + v * vtx_stride_bytes); + for (int a = 0; a < 3; ++a) { + float t = p[a] / 65535.0f; + dequant_pos[v * 3 + a] = mesh.local_aabb_min[a] + t * extent[a]; + } + } + const float* positions = dequant_pos.data(); + const size_t local_pos_stride = sizeof(float) * 3; const uint32_t* indices = sd.indices.data() + first_index; const size_t target_index_count = std::max( @@ -121,7 +141,7 @@ void buildLods(SidecarData& sd, indices, mesh.index_count, positions, mesh.vertex_count, sizeof(float) * 3, // compare only xyz - vtx_stride_bytes); + local_pos_stride); simplified.resize(mesh.index_count); float result_error = 0.0f; @@ -135,7 +155,7 @@ void buildLods(SidecarData& sd, new_index_count = meshopt_simplifySloppy( simplified.data(), indices, mesh.index_count, - positions, mesh.vertex_count, vtx_stride_bytes, + positions, mesh.vertex_count, local_pos_stride, target_index_count, target_error, &result_error); } else { @@ -144,7 +164,7 @@ void buildLods(SidecarData& sd, new_index_count = meshopt_simplify( simplified.data(), shadow.data(), mesh.index_count, - positions, mesh.vertex_count, vtx_stride_bytes, + positions, mesh.vertex_count, local_pos_stride, target_index_count, target_error, options, &result_error); } diff --git a/src/ifcviewer/LodBuilder.h b/src/ifcviewer/LodBuilder.h index a937ae49870..0147ba82f9b 100644 --- a/src/ifcviewer/LodBuilder.h +++ b/src/ifcviewer/LodBuilder.h @@ -35,9 +35,10 @@ // target_ratio = 0.25 — aim for 25% of original tris // target_error = 0.05 — stop if relative error exceeds 5% // -// `sd.vertices` is read (position is the first 3 floats of each -// INSTANCED_VERTEX_STRIDE_FLOATS-wide vertex) but not modified — LOD1 -// reuses the same vertex buffer, just with a different index list. +// `sd.vertices` is raw bytes at the quantized layout; positions are +// dequantized per-mesh (using MeshInfo.local_aabb_min/max) into a temp +// float array before feeding meshoptimizer. Vertices are not modified — +// LOD1 reuses the same VBO, just with a different index list. void buildLods(SidecarData& sd, int min_triangles = 500, float target_ratio = 0.25f, diff --git a/src/ifcviewer/MainWindow.cpp b/src/ifcviewer/MainWindow.cpp index 7dc5454700b..0e8162f0436 100644 --- a/src/ifcviewer/MainWindow.cpp +++ b/src/ifcviewer/MainWindow.cpp @@ -239,7 +239,7 @@ void MainWindow::applySidecarData(ModelId mid, SidecarData data) { qDebug("Sidecar hit: %s (%zu verts, %zu indices, %zu meshes, %zu instances, %zu elements)", model.file_path.toStdString().c_str(), - data.vertices.size() / INSTANCED_VERTEX_STRIDE_FLOATS, + data.vertices.size() / INSTANCED_VERTEX_STRIDE_BYTES, data.indices.size(), data.meshes.size(), data.instances.size(), diff --git a/src/ifcviewer/README.md b/src/ifcviewer/README.md index afa20426103..be4a69ec42b 100644 --- a/src/ifcviewer/README.md +++ b/src/ifcviewer/README.md @@ -46,9 +46,12 @@ engine with a Qt6 interface and OpenGL 4.5 rendering. - **Per-model GPU buffers**: each loaded model gets its own VAO/VBO/EBO/instance-SSBO/visible-SSBO/indirect-buffer. No cross-model growth copies. Removing a model frees its GPU memory immediately. -- **Local-coordinate vertex format (28 B):** position (3 floats) + normal - (3 floats) + packed RGBA8 colour (1 uint). The per-instance transform is - applied in the vertex shader via an SSBO lookup. No world-baked vertex data. +- **Quantized local-coordinate vertex format (16 B):** position as + `u16x3` normalised against each mesh's local AABB, octahedral-encoded + normal as `i16x2`, packed RGBA8 colour. Dequantisation basis is per + mesh, uploaded once in a `MeshGpu` SSBO at binding 2. The per-instance + transform is applied in the vertex shader. No world-baked vertex data. + ~43 % smaller VBO and sidecar than the previous 28 B float layout. - **Multi-draw indirect:** every frame the CPU builds a flat list of visible instance indices and one `DrawElementsIndirectCommand` per non-empty mesh, then issues a single `glMultiDrawElementsIndirect` per model. 50k visible @@ -93,7 +96,7 @@ engine with a Qt6 interface and OpenGL 4.5 rendering. | `InstancedGeometry.h` | Shared structs: `MeshInfo`, `InstanceCpu`, `InstanceGpu`, chunk records | | `BvhAccel.h/cpp` | Median-split BVH builder; operates on instance world-AABBs | | `LodBuilder.h/cpp` | Post-stream decimation of unique meshes via meshoptimizer (`simplifySloppy`) | -| `SidecarCache.h/cpp` | Raw binary `.ifcview` (v5) sidecar read/write | +| `SidecarCache.h/cpp` | Raw binary `.ifcview` (v6) sidecar read/write | | `AppSettings.h/cpp` | Persisted preferences (geometry library, stats overlay, backface culling) | | `SettingsWindow.h/cpp` | Settings dialog | | `CMakeLists.txt` | Build configuration | @@ -270,7 +273,7 @@ while stack not empty: Depth 64 is enough for billions of items on any balanced tree. The stack is on the C++ stack, zero per-frame allocation. -#### Sidecar format (`.ifcview`, v5) +#### Sidecar format (`.ifcview`, v6) Raw memory dump, Blender-`.blend`-style — no serialisation, no parsing. Stores everything needed to skip the `IfcGeom::Iterator` pass: @@ -278,7 +281,7 @@ Stores everything needed to skip the `IfcGeom::Iterator` pass: ``` SidecarHeader (magic "IFVW", version, endian, ...) uint64_t source_file_size -uint32_t + float[] vertex data (7 floats × N_verts, local coords) +uint32_t + uint8_t[] vertex data (16 B/vert quantized; per-mesh basis in MeshInfo) uint32_t + uint32_t[] index data (mesh-local) uint32_t + MeshInfo[] per-unique-mesh metadata (56 B each, incl. LOD1 slice) uint32_t + InstanceCpu[] per-placement records (transform + AABB + ids) @@ -298,7 +301,8 @@ Per-model state on the GPU: | Buffer | Contents | Lifetime | |--------|----------|----------| -| `VBO` | Interleaved local-coord vertex data (28 B/vert). One range per unique representation. | Grow-on-demand during streaming; static after finalize. | +| `VBO` | Quantized local-coord vertex data (16 B/vert: u16x3 pos, oct i16x2 normal, RGBA8). One range per unique representation. | Grow-on-demand during streaming; static after finalize. | +| `MeshGpu SSBO` (binding 2) | Per-mesh dequant basis (`vec4 aabb_min`, `vec4 aabb_max`). | Grow-on-demand; static after finalize. | | `EBO` | Mesh-local uint32 indices. One range per unique representation. | Same. | | `SSBO` (binding 0) | `InstanceGpu[]` (80 B each: mat4 transform, object_id, color_override, pad). | Appended during streaming, static after finalize. | | `visible SSBO` (binding 1) | `uint32[]` — flat list of visible instance indices, ordered by mesh, uploaded each frame. | Rewritten every frame. | @@ -311,7 +315,7 @@ struct DrawElementsIndirectCommand { uint32_t count; // mesh.index_count uint32_t instanceCount; // visible-list length for this mesh uint32_t firstIndex; // mesh.ebo_byte_offset / 4 - uint32_t baseVertex; // mesh.vbo_byte_offset / 28 + uint32_t baseVertex; // mesh.vbo_byte_offset / 16 uint32_t baseInstance; // offset into the flat visible-index array }; ``` diff --git a/src/ifcviewer/SidecarCache.cpp b/src/ifcviewer/SidecarCache.cpp index da3943988d2..171bf4bda65 100644 --- a/src/ifcviewer/SidecarCache.cpp +++ b/src/ifcviewer/SidecarCache.cpp @@ -17,17 +17,16 @@ * * ********************************************************************************/ -// v5 layout (all multi-byte fields native-endian; endianness marker in header). -// Same sequence as v4; the only change is that MeshInfo grew two uint32_ts -// (lod1_ebo_byte_offset + lod1_index_count) and `indices` may contain extra -// appended LOD1 slices pointed at by those offsets. +// v6 layout (all multi-byte fields native-endian; endianness marker in header). +// Same sequence as v5; the only change is that vertex data is now raw bytes +// at the 16 B/vertex quantized layout (see InstancedGeometry.h). // // // SidecarHeader (16 bytes) // uint64_t source_file_size // -// uint32_t num_vertices_floats -// float[] vertex data (28 B/vertex: pos3 + normal3 + color1_packed) +// uint32_t num_vertex_bytes +// uint8_t[] vertex data (16 B/vertex: pos u16x3 + pad2 + oct-normal i16x2 + rgba8) // uint32_t num_indices // uint32_t[] index data (mesh-local indices; base_vertex applied at draw time) // diff --git a/src/ifcviewer/SidecarCache.h b/src/ifcviewer/SidecarCache.h index 332abdc8029..e2e34373abe 100644 --- a/src/ifcviewer/SidecarCache.h +++ b/src/ifcviewer/SidecarCache.h @@ -37,7 +37,9 @@ static constexpr uint32_t SIDECAR_MAGIC = 0x49465657; // "IFVW" // v5 = MeshInfo extended with lod1_ebo_byte_offset + lod1_index_count (56 B). // sd.indices may contain an appended LOD1 index slice for each mesh // where meshoptimizer decimation produced useful output. -static constexpr uint32_t SIDECAR_VERSION = 5; +// v6 = VBO vertices quantized to 16 B/vertex (pos u16x3 + normal oct i16x2 + +// color u8x4). Dequant basis is per-mesh MeshInfo.local_aabb_min/max. +static constexpr uint32_t SIDECAR_VERSION = 6; static constexpr uint32_t SIDECAR_ENDIAN = 0x01020304; // Fixed-size element record. Strings are stored as (offset, length) pairs @@ -56,10 +58,11 @@ struct PackedElementInfo { }; // Everything needed to display an already-tessellated model without -// re-running the iterator. v4 schema: instanced geometry. +// re-running the iterator. v6 schema: instanced + quantized geometry. struct SidecarData { - // Per-model GPU geometry (local coords). 28 bytes/vertex. - std::vector vertices; + // Per-model GPU geometry (local coords). Raw VBO bytes at the + // INSTANCED_VERTEX_STRIDE_BYTES layout (16 B/vertex as of v6). + std::vector vertices; std::vector indices; // Mesh dictionary and per-instance data. diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index a48ef7f6d42..4731a431867 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -44,16 +44,17 @@ static_assert(sizeof(DrawElementsIndirectCommand) == 20, "indirect cmd must be 2 // Shaders // ----------------------------------------------------------------------------- // -// Vertex layout (GL side, 28 bytes): -// location 0: vec3 a_position (local coords) -// location 1: vec3 a_normal (local) -// location 2: vec4 a_color (GL_UNSIGNED_BYTE * 4 normalized) +// Vertex layout (GL side, 16 bytes — quantized; see InstancedGeometry.h): +// location 0: vec3 a_position_q (u16x3 normalized, per-mesh AABB basis) +// location 1: vec2 a_normal_oct (i16x2 normalized, octahedral) +// location 2: vec4 a_color (u8x4 normalized) // // Per-instance record in SSBO std430 (80 bytes): // mat4 transform // uint object_id // uint color_override_rgba8 -- 0 => use baked a_color -// uint _pad0, _pad1 +// uint mesh_id -- index into per-model MeshGpu[] +// uint _pad1 // // The draw calls pass `u_instance_offset = mesh.first_instance`; the shader // reads `instances[u_instance_offset + gl_InstanceID]`. @@ -61,15 +62,16 @@ static_assert(sizeof(DrawElementsIndirectCommand) == 20, "indirect cmd must be 2 static const char* MAIN_VERTEX_SHADER = R"( #version 450 core #extension GL_ARB_shader_draw_parameters : require -layout(location = 0) in vec3 a_position; -layout(location = 1) in vec3 a_normal; +// Quantized vertex inputs — see InstancedGeometry.h for layout. +layout(location = 0) in vec3 a_position_q; // u16x3 normalized -> [0,1] +layout(location = 1) in vec2 a_normal_oct; // i16x2 normalized -> [-1,1] layout(location = 2) in vec4 a_color; struct InstanceRecord { mat4 transform; uint object_id; uint color_override; - uint _pad0; + uint mesh_id; uint _pad1; }; layout(std430, binding = 0) readonly buffer Instances { @@ -78,6 +80,10 @@ layout(std430, binding = 0) readonly buffer Instances { layout(std430, binding = 1) readonly buffer VisibleIndices { uint visible[]; }; +struct MeshQuant { vec4 aabb_min; vec4 aabb_max; }; +layout(std430, binding = 2) readonly buffer Meshes { + MeshQuant meshes[]; +}; uniform mat4 u_view_projection; uniform uint u_selected_id; @@ -87,11 +93,24 @@ out vec4 v_color; flat out uint v_object_id; flat out uint v_selected; +// Meyer et al. octahedral normal decode. Input is in [-1,1]^2. +vec3 octDecode(vec2 e) { + vec3 n = vec3(e.xy, 1.0 - abs(e.x) - abs(e.y)); + if (n.z < 0.0) n.xy = (1.0 - abs(n.yx)) * vec2(n.x >= 0.0 ? 1.0 : -1.0, + n.y >= 0.0 ? 1.0 : -1.0); + return normalize(n); +} + void main() { uint slot = uint(gl_BaseInstanceARB) + uint(gl_InstanceID); uint iid = visible[slot]; InstanceRecord inst = instances[iid]; - vec4 world = inst.transform * vec4(a_position, 1.0); + MeshQuant mq = meshes[inst.mesh_id]; + + // Dequantize local position against this mesh's AABB. + vec3 pos_local = mix(mq.aabb_min.xyz, mq.aabb_max.xyz, a_position_q); + + vec4 world = inst.transform * vec4(pos_local, 1.0); gl_Position = u_view_projection * world; // Rotate the normal by the upper-3x3 of the transform. BIM placements @@ -101,8 +120,9 @@ void main() { // otherwise mirrored instances shade as if inside-out. The same // determinant sign is what GL_CULL_FACE uses to decide winding, so // keeping them in agreement means backface culling is safe to enable. + vec3 n_local = octDecode(a_normal_oct); mat3 rot = mat3(inst.transform); - vec3 n = rot * a_normal; + vec3 n = rot * n_local; if (determinant(rot) < 0.0) n = -n; v_normal = normalize(n); @@ -152,13 +172,13 @@ void main() { static const char* PICK_VERTEX_SHADER = R"( #version 450 core #extension GL_ARB_shader_draw_parameters : require -layout(location = 0) in vec3 a_position; +layout(location = 0) in vec3 a_position_q; struct InstanceRecord { mat4 transform; uint object_id; uint color_override; - uint _pad0; + uint mesh_id; uint _pad1; }; layout(std430, binding = 0) readonly buffer Instances { @@ -167,6 +187,10 @@ layout(std430, binding = 0) readonly buffer Instances { layout(std430, binding = 1) readonly buffer VisibleIndices { uint visible[]; }; +struct MeshQuant { vec4 aabb_min; vec4 aabb_max; }; +layout(std430, binding = 2) readonly buffer Meshes { + MeshQuant meshes[]; +}; uniform mat4 u_view_projection; @@ -176,7 +200,9 @@ void main() { uint slot = uint(gl_BaseInstanceARB) + uint(gl_InstanceID); uint iid = visible[slot]; InstanceRecord inst = instances[iid]; - gl_Position = u_view_projection * inst.transform * vec4(a_position, 1.0); + MeshQuant mq = meshes[inst.mesh_id]; + vec3 pos_local = mix(mq.aabb_min.xyz, mq.aabb_max.xyz, a_position_q); + gl_Position = u_view_projection * inst.transform * vec4(pos_local, 1.0); v_object_id = inst.object_id; } )"; @@ -240,6 +266,51 @@ static GLuint linkProgram(QOpenGLFunctions_4_5_Core* gl, GLuint vert, GLuint fra // ----------------------------------------------------------------------------- +// Meyer et al. octahedral normal encode. Input unit vector -> [-1,1]^2. +static void octEncode(const float n[3], float out[2]) { + float ax = std::fabs(n[0]), ay = std::fabs(n[1]), az = std::fabs(n[2]); + float denom = ax + ay + az; + if (denom < 1e-12f) { out[0] = 0.0f; out[1] = 0.0f; return; } + float px = n[0] / denom; + float py = n[1] / denom; + if (n[2] < 0.0f) { + float sx = px >= 0.0f ? 1.0f : -1.0f; + float sy = py >= 0.0f ? 1.0f : -1.0f; + float nx = (1.0f - std::fabs(py)) * sx; + float ny = (1.0f - std::fabs(px)) * sy; + px = nx; py = ny; + } + out[0] = px; + out[1] = py; +} + +// Quantize a streamer-format vertex (pos3 + normal3 + color-as-float) into +// the 16 B VBO record, given the mesh's tight local AABB. `extent_recip` +// is 1/(max-min) per axis, or 0 for degenerate axes (quantum becomes 0). +static void quantizeVertex(const float src[7], + const float aabb_min[3], + const float extent_recip[3], + uint8_t dst[INSTANCED_VERTEX_STRIDE_BYTES]) { + // Position -> u16 normalized. + uint16_t* p = reinterpret_cast(dst + INSTANCED_VERTEX_POS_OFFSET); + for (int a = 0; a < 3; ++a) { + float t = (src[a] - aabb_min[a]) * extent_recip[a]; + if (t < 0.0f) t = 0.0f; else if (t > 1.0f) t = 1.0f; + p[a] = static_cast(t * 65535.0f + 0.5f); + } + // Normal -> oct i16x2. + float oct[2]; + octEncode(src + 3, oct); + int16_t* n = reinterpret_cast(dst + INSTANCED_VERTEX_NORMAL_OFFSET); + for (int a = 0; a < 2; ++a) { + float v = oct[a]; + if (v < -1.0f) v = -1.0f; else if (v > 1.0f) v = 1.0f; + n[a] = static_cast(std::lrintf(v * 32767.0f)); + } + // Color passes through — streamer packs 4 bytes into the 7th float slot. + std::memcpy(dst + INSTANCED_VERTEX_COLOR_OFFSET, src + 6, 4); +} + // Determinant of the upper-left 3x3 of a column-major mat4 stored as 16 floats. // Sign tells us whether the transform contains a reflection, which is what // decides which glFrontFace winding to draw the instance with. @@ -334,6 +405,7 @@ ViewportWindow::~ViewportWindow() { if (m.vbo) gl_->glDeleteBuffers(1, &m.vbo); if (m.ebo) gl_->glDeleteBuffers(1, &m.ebo); if (m.ssbo) gl_->glDeleteBuffers(1, &m.ssbo); + if (m.mesh_info_ssbo) gl_->glDeleteBuffers(1, &m.mesh_info_ssbo); if (m.visible_ssbo) gl_->glDeleteBuffers(1, &m.visible_ssbo); if (m.indirect_buffer) gl_->glDeleteBuffers(1, &m.indirect_buffer); } @@ -396,19 +468,22 @@ void ViewportWindow::setupVaoLayout(GLuint vao, GLuint vbo, GLuint ebo) { gl_->glVertexArrayVertexBuffer(vao, 0, vbo, 0, INSTANCED_VERTEX_STRIDE_BYTES); gl_->glVertexArrayElementBuffer(vao, ebo); - // position (3 float @ 0) + // position (3 x u16 normalized @ 0) gl_->glEnableVertexArrayAttrib(vao, 0); - gl_->glVertexArrayAttribFormat(vao, 0, 3, GL_FLOAT, GL_FALSE, 0); + gl_->glVertexArrayAttribFormat(vao, 0, 3, GL_UNSIGNED_SHORT, GL_TRUE, + INSTANCED_VERTEX_POS_OFFSET); gl_->glVertexArrayAttribBinding(vao, 0, 0); - // normal (3 float @ 12) + // normal oct-encoded (2 x i16 normalized @ 8) gl_->glEnableVertexArrayAttrib(vao, 1); - gl_->glVertexArrayAttribFormat(vao, 1, 3, GL_FLOAT, GL_FALSE, 12); + gl_->glVertexArrayAttribFormat(vao, 1, 2, GL_SHORT, GL_TRUE, + INSTANCED_VERTEX_NORMAL_OFFSET); gl_->glVertexArrayAttribBinding(vao, 1, 0); - // color (4 ubyte @ 24, normalized) + // color (4 x u8 normalized @ 12) gl_->glEnableVertexArrayAttrib(vao, 2); - gl_->glVertexArrayAttribFormat(vao, 2, 4, GL_UNSIGNED_BYTE, GL_TRUE, 24); + gl_->glVertexArrayAttribFormat(vao, 2, 4, GL_UNSIGNED_BYTE, GL_TRUE, + INSTANCED_VERTEX_COLOR_OFFSET); gl_->glVertexArrayAttribBinding(vao, 2, 0); } @@ -544,8 +619,44 @@ void ViewportWindow::uploadMeshChunk(const MeshChunk& chunk) { ModelGpuData& m = getOrCreateModel(chunk.model_id); - const size_t vb_size = chunk.vertices.size() * sizeof(float); - const size_t ib_size = chunk.indices.size() * sizeof(uint32_t); + // Streamer format: 7 floats/vertex (pos3 + normal3 + color-as-float). + const size_t src_stride_floats = 7; + const size_t n_verts = chunk.vertices.size() / src_stride_floats; + + // Recompute a tight local AABB from the actual vertex positions — the + // chunk-provided AABB can be slightly loose, which wastes quantization + // precision. Also derives the dequant basis we'll ship to the GPU. + float bmin[3] = { std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + std::numeric_limits::infinity() }; + float bmax[3] = { -std::numeric_limits::infinity(), + -std::numeric_limits::infinity(), + -std::numeric_limits::infinity() }; + for (size_t i = 0; i < n_verts; ++i) { + const float* v = chunk.vertices.data() + i * src_stride_floats; + for (int a = 0; a < 3; ++a) { + if (v[a] < bmin[a]) bmin[a] = v[a]; + if (v[a] > bmax[a]) bmax[a] = v[a]; + } + } + // Degenerate / zero-extent axis: collapse to a single quantum. The + // dequant shader will output bmin[a] for every vertex, which is correct. + float extent_recip[3]; + for (int a = 0; a < 3; ++a) { + float ext = bmax[a] - bmin[a]; + extent_recip[a] = ext > 0.0f ? 1.0f / ext : 0.0f; + } + + // Quantize into a scratch buffer sized to the destination layout. + std::vector quant(n_verts * INSTANCED_VERTEX_STRIDE_BYTES); + for (size_t i = 0; i < n_verts; ++i) { + quantizeVertex(chunk.vertices.data() + i * src_stride_floats, + bmin, extent_recip, + quant.data() + i * INSTANCED_VERTEX_STRIDE_BYTES); + } + + const size_t vb_size = quant.size(); + const size_t ib_size = chunk.indices.size() * sizeof(uint32_t); if (m.vbo_used + vb_size > m.vbo_capacity) { if (!growModelVbo(m, m.vbo_used + vb_size)) return; @@ -556,18 +667,17 @@ void ViewportWindow::uploadMeshChunk(const MeshChunk& chunk) { MeshInfo info; info.vbo_byte_offset = static_cast(m.vbo_used); - info.vertex_count = static_cast( - chunk.vertices.size() / INSTANCED_VERTEX_STRIDE_FLOATS); + info.vertex_count = static_cast(n_verts); info.ebo_byte_offset = static_cast(m.ebo_used); info.index_count = static_cast(chunk.indices.size()); for (int a = 0; a < 3; ++a) { - info.local_aabb_min[a] = chunk.local_aabb_min[a]; - info.local_aabb_max[a] = chunk.local_aabb_max[a]; + info.local_aabb_min[a] = bmin[a]; + info.local_aabb_max[a] = bmax[a]; } info.first_instance = 0; info.instance_count = 0; - gl_->glNamedBufferSubData(m.vbo, m.vbo_used, vb_size, chunk.vertices.data()); + gl_->glNamedBufferSubData(m.vbo, m.vbo_used, vb_size, quant.data()); gl_->glNamedBufferSubData(m.ebo, m.ebo_used, ib_size, chunk.indices.data()); m.vbo_used += vb_size; m.ebo_used += ib_size; @@ -575,6 +685,33 @@ void ViewportWindow::uploadMeshChunk(const MeshChunk& chunk) { if (m.meshes.size() <= chunk.local_mesh_id) m.meshes.resize(chunk.local_mesh_id + 1); m.meshes[chunk.local_mesh_id] = info; + + // Write the matching dequant basis into the MeshGpu SSBO. Grow on + // demand; geometrically doubling keeps this amortized O(1) over streaming. + MeshGpu mg{}; + for (int a = 0; a < 3; ++a) { + mg.aabb_min[a] = bmin[a]; + mg.aabb_max[a] = bmax[a]; + } + mg.aabb_min[3] = 0.0f; + mg.aabb_max[3] = 0.0f; + + const size_t mg_offset = chunk.local_mesh_id * sizeof(MeshGpu); + if (mg_offset + sizeof(MeshGpu) > m.mesh_info_capacity) { + size_t new_cap = m.mesh_info_capacity ? m.mesh_info_capacity : 32 * sizeof(MeshGpu); + while (new_cap < mg_offset + sizeof(MeshGpu)) new_cap *= 2; + GLuint new_ssbo = 0; + gl_->glCreateBuffers(1, &new_ssbo); + gl_->glNamedBufferStorage(new_ssbo, new_cap, nullptr, GL_DYNAMIC_STORAGE_BIT); + if (m.mesh_info_ssbo && m.mesh_info_capacity > 0) { + gl_->glCopyNamedBufferSubData(m.mesh_info_ssbo, new_ssbo, 0, 0, + m.mesh_info_capacity); + gl_->glDeleteBuffers(1, &m.mesh_info_ssbo); + } + m.mesh_info_ssbo = new_ssbo; + m.mesh_info_capacity = new_cap; + } + gl_->glNamedBufferSubData(m.mesh_info_ssbo, mg_offset, sizeof(MeshGpu), &mg); } void ViewportWindow::uploadInstanceChunk(const InstanceChunk& chunk) { @@ -594,6 +731,15 @@ void ViewportWindow::uploadInstanceChunk(const InstanceChunk& chunk) { m.instances.push_back(inst); m.instance_reflected.push_back(transformIsReflected(inst.transform) ? 1 : 0); + // Mirror into bvh_items so the hot cull path (which reads AABBs out of + // bvh_items even when no BVH has been built yet) stays correct during + // streaming. finalizeModel rebuilds the real BVH over these items. + BvhItem bi; + std::memcpy(bi.aabb_min, inst.world_aabb_min, sizeof(bi.aabb_min)); + std::memcpy(bi.aabb_max, inst.world_aabb_max, sizeof(bi.aabb_max)); + bi.model_id = inst.model_id; + m.bvh_items.push_back(bi); + // Append the GPU record to the instance SSBO so the model is drawable // immediately, without waiting for finalizeModel. The visible-list // architecture means SSBO order is irrelevant to correctness. @@ -601,7 +747,7 @@ void ViewportWindow::uploadInstanceChunk(const InstanceChunk& chunk) { std::memcpy(gpu.transform, inst.transform, sizeof(gpu.transform)); gpu.object_id = inst.object_id; gpu.color_override_rgba8 = inst.color_override_rgba8; - gpu._pad0 = 0; + gpu.mesh_id = inst.mesh_id; gpu._pad1 = 0; const size_t offset = m.ssbo_instance_count * sizeof(InstanceGpu); @@ -659,9 +805,10 @@ bool ViewportWindow::snapshotModel(uint32_t model_id, SidecarData& out) const { const auto& m = it->second; if (!m.finalized) return false; - // GPU readback of the packed VBO/EBO ranges actually in use. + // GPU readback of the packed VBO/EBO ranges actually in use. VBO is + // raw bytes at the quantized layout. if (m.vbo_used > 0) { - out.vertices.resize(m.vbo_used / sizeof(float)); + out.vertices.resize(m.vbo_used); gl_->glGetNamedBufferSubData(m.vbo, 0, m.vbo_used, out.vertices.data()); } if (m.ebo_used > 0) { @@ -685,6 +832,7 @@ void ViewportWindow::applyCachedModel(uint32_t model_id, SidecarData data) { if (existing->second.vbo) gl_->glDeleteBuffers(1, &existing->second.vbo); if (existing->second.ebo) gl_->glDeleteBuffers(1, &existing->second.ebo); if (existing->second.ssbo) gl_->glDeleteBuffers(1, &existing->second.ssbo); + if (existing->second.mesh_info_ssbo) gl_->glDeleteBuffers(1, &existing->second.mesh_info_ssbo); if (existing->second.visible_ssbo) gl_->glDeleteBuffers(1, &existing->second.visible_ssbo); if (existing->second.indirect_buffer) gl_->glDeleteBuffers(1, &existing->second.indirect_buffer); models_gpu_.erase(existing); @@ -695,7 +843,7 @@ void ViewportWindow::applyCachedModel(uint32_t model_id, SidecarData data) { gl_->glCreateBuffers(1, &m.vbo); gl_->glCreateBuffers(1, &m.ebo); - const size_t vb_bytes = data.vertices.size() * sizeof(float); + const size_t vb_bytes = data.vertices.size(); const size_t ib_bytes = data.indices.size() * sizeof(uint32_t); m.vbo_capacity = std::max(vb_bytes, 1); m.ebo_capacity = std::max(ib_bytes, 1); @@ -709,8 +857,7 @@ void ViewportWindow::applyCachedModel(uint32_t model_id, SidecarData data) { m.vbo_used = vb_bytes; m.ebo_used = ib_bytes; - m.vertex_count = static_cast( - data.vertices.size() / INSTANCED_VERTEX_STRIDE_FLOATS); + m.vertex_count = static_cast(vb_bytes / INSTANCED_VERTEX_STRIDE_BYTES); m.meshes = std::move(data.meshes); m.instances = std::move(data.instances); @@ -728,7 +875,7 @@ void ViewportWindow::applyCachedModel(uint32_t model_id, SidecarData data) { std::memcpy(dst.transform, src.transform, sizeof(dst.transform)); dst.object_id = src.object_id; dst.color_override_rgba8 = src.color_override_rgba8; - dst._pad0 = 0; + dst.mesh_id = src.mesh_id; dst._pad1 = 0; } gl_->glCreateBuffers(1, &m.ssbo); @@ -738,6 +885,30 @@ void ViewportWindow::applyCachedModel(uint32_t model_id, SidecarData data) { } m.ssbo_instance_count = static_cast(gpu.size()); + // Build and upload the per-mesh quantization SSBO from cached meshes. + { + std::vector mesh_gpu(m.meshes.size()); + for (size_t i = 0; i < m.meshes.size(); ++i) { + for (int a = 0; a < 3; ++a) { + mesh_gpu[i].aabb_min[a] = m.meshes[i].local_aabb_min[a]; + mesh_gpu[i].aabb_max[a] = m.meshes[i].local_aabb_max[a]; + } + mesh_gpu[i].aabb_min[3] = 0.0f; + mesh_gpu[i].aabb_max[3] = 0.0f; + } + const size_t mg_bytes = mesh_gpu.size() * sizeof(MeshGpu); + gl_->glCreateBuffers(1, &m.mesh_info_ssbo); + if (mg_bytes > 0) { + gl_->glNamedBufferStorage(m.mesh_info_ssbo, mg_bytes, + mesh_gpu.data(), GL_DYNAMIC_STORAGE_BIT); + m.mesh_info_capacity = mg_bytes; + } else { + gl_->glNamedBufferStorage(m.mesh_info_ssbo, sizeof(MeshGpu), + nullptr, GL_DYNAMIC_STORAGE_BIT); + m.mesh_info_capacity = sizeof(MeshGpu); + } + } + // Recompute the reflection flag from each instance's transform — the // sidecar only caches InstanceCpu, not the parallel reflection flags. m.instance_reflected.resize(m.instances.size()); @@ -754,7 +925,7 @@ void ViewportWindow::applyCachedModel(uint32_t model_id, SidecarData data) { qDebug("Sidecar apply: model %u %zu verts, %zu meshes, %zu instances " "%.1f MB vram (vbo %.1f + ebo %.1f + ssbo %.1f)", - model_id, data.vertices.size() / INSTANCED_VERTEX_STRIDE_FLOATS, + model_id, vb_bytes / INSTANCED_VERTEX_STRIDE_BYTES, models_gpu_[model_id].meshes.size(), models_gpu_[model_id].instances.size(), (vb_bytes + ib_bytes + ssbo_bytes) / (1024.0*1024.0), @@ -803,6 +974,7 @@ void ViewportWindow::resetScene() { if (m.vbo) gl_->glDeleteBuffers(1, &m.vbo); if (m.ebo) gl_->glDeleteBuffers(1, &m.ebo); if (m.ssbo) gl_->glDeleteBuffers(1, &m.ssbo); + if (m.mesh_info_ssbo) gl_->glDeleteBuffers(1, &m.mesh_info_ssbo); if (m.visible_ssbo) gl_->glDeleteBuffers(1, &m.visible_ssbo); if (m.indirect_buffer) gl_->glDeleteBuffers(1, &m.indirect_buffer); } @@ -839,6 +1011,7 @@ void ViewportWindow::removeModel(uint32_t model_id) { if (it->second.vbo) gl_->glDeleteBuffers(1, &it->second.vbo); if (it->second.ebo) gl_->glDeleteBuffers(1, &it->second.ebo); if (it->second.ssbo) gl_->glDeleteBuffers(1, &it->second.ssbo); + if (it->second.mesh_info_ssbo) gl_->glDeleteBuffers(1, &it->second.mesh_info_ssbo); if (it->second.visible_ssbo) gl_->glDeleteBuffers(1, &it->second.visible_ssbo); if (it->second.indirect_buffer) gl_->glDeleteBuffers(1, &it->second.indirect_buffer); models_gpu_.erase(it); @@ -1455,6 +1628,7 @@ void ViewportWindow::render() { gl_->glBindVertexArray(m.vao); gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m.ssbo); gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m.visible_ssbo); + gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m.mesh_info_ssbo); gl_->glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m.indirect_buffer); uint32_t fwd = m.indirect_forward_count; @@ -1622,6 +1796,7 @@ void ViewportWindow::renderPickPass() { gl_->glBindVertexArray(m.vao); gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m.ssbo); gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m.visible_ssbo); + gl_->glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m.mesh_info_ssbo); gl_->glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m.indirect_buffer); const uint32_t fwd = m.indirect_forward_count; diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index 0a95ede0775..ed6668cc116 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -60,6 +60,8 @@ struct ModelGpuData { GLuint vbo = 0; GLuint ebo = 0; GLuint ssbo = 0; + GLuint mesh_info_ssbo = 0; // MeshGpu[] — per-mesh quantization basis + size_t mesh_info_capacity = 0; // bytes size_t vbo_capacity = 0; size_t ebo_capacity = 0; From 036864c7197b2097f09b78e447d6f6e5411bfdc6 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Tue, 14 Apr 2026 21:55:14 +1000 Subject: [PATCH 32/37] =?UTF-8?q?ifcviewer:=20README=20=E2=80=94=20documen?= =?UTF-8?q?t=20event-driven=20rendering=20and=20VBO=20quantization?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the event-driven rendering bullet (zero idle cost, in-render frame timing) and roadmap entries for VBO quantization and event-driven rendering. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/ifcviewer/README.md b/src/ifcviewer/README.md index be4a69ec42b..01ceef7ecfd 100644 --- a/src/ifcviewer/README.md +++ b/src/ifcviewer/README.md @@ -78,6 +78,14 @@ engine with a Qt6 interface and OpenGL 4.5 rendering. through load. - **Non-blocking sidecar loading**: sidecars are read on a background thread; only the final GPU upload touches the main thread. +- **Event-driven rendering:** no continuous render timer. Frames are + scheduled via `QWindow::requestUpdate()` only when something changes + (camera move, streaming chunk, hover, settings). When the camera and + scene are idle the cull pass and HiZ readback are skipped entirely + and the main thread blocks in the Qt event loop — the viewer costs + zero CPU/GPU on a static scene. FPS is still reported accurately + because frame cost is measured *inside* `render()`, not as wall-clock + between frames. - **GPU object picking**: a second render pass writes object IDs into an R32UI framebuffer. Click reads back one pixel. No CPU-side raycasting. - **Multi-model support**: multiple IFCs can be loaded simultaneously. @@ -728,6 +736,8 @@ multi-million + occluders redundant rasterisation Phase 3C HiZ (done, CPU - [x] Phase 3A — screen-space contribution culling - [x] Phase 3B — distance / contribution LOD (meshoptimizer `simplifySloppy`) - [x] Phase 3C — Hierarchical-Z occlusion culling (v1, CPU-side readback) +- [x] Quantized VBO (16 B/vert, sidecar v6) +- [x] Event-driven rendering (zero idle CPU/GPU, cull skipped on still frames) - [ ] **Phase 3D — GPU-side compute-shader culling** (next; replaces the readback) - [ ] Vulkan/MoltenVK backend for macOS - [ ] Embedded Python scripting console From f243f804da9aaedc54ef13ac100e3c58f6219c5b Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Wed, 15 Apr 2026 15:18:51 +1000 Subject: [PATCH 33/37] ifcviewer: parallel per-model CPU cull Split cullAndUploadVisible into cullModelCpu (CPU-only, thread-safe) and uploadCullResults (GL-only, main thread). render() fans the per-model culls out via std::async and joins before the serial upload pass. The cull scratch (vis_fwd/rev_lod0/1, visible_flat, indirect_scratch) moved onto ModelGpuData so each worker owns its output buffers. Phase timers and hiz_reject_count_ are atomic since workers fetch_add into them. A new wall-clock timer around the dispatch block reports the actual frame-time contribution; the existing clr/trv/emt counters are now documented as per-thread sums. Measured on the 18-model / 569k-instance test scene: wall-clock cull dropped from ~25 ms to ~5 ms while the aggregate CPU work (trv) stayed ~30 ms. Frame time 34 ms -> 19 ms. IFC_CULL_THREADS=0 forces the single-threaded fallback. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/ViewportWindow.cpp | 135 +++++++++++++++++++++---------- src/ifcviewer/ViewportWindow.h | 54 ++++++++----- 2 files changed, 127 insertions(+), 62 deletions(-) diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index 4731a431867..66525e28cf5 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -1292,6 +1292,12 @@ uint32_t ViewportWindow::pickObjectAt(int x, int y) { void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6][4], float focal_px, float min_pixel_radius) { + cullModelCpu(m, planes, focal_px, min_pixel_radius); + uploadCullResults(m); +} + +void ViewportWindow::cullModelCpu(ModelGpuData& m, const float planes[6][4], + float focal_px, float min_pixel_radius) { // Per-mesh scratch, split by winding × LOD. Winding split lets the draw // pass toggle glFrontFace once between two MDI calls so GL_CULL_FACE does // the right thing for both. LOD split means instances that want the @@ -1303,15 +1309,15 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] auto resize_if = [&](std::vector>& v) { if (v.size() < m.meshes.size()) v.resize(m.meshes.size()); }; - resize_if(visible_by_mesh_fwd_lod0_); - resize_if(visible_by_mesh_fwd_lod1_); - resize_if(visible_by_mesh_rev_lod0_); - resize_if(visible_by_mesh_rev_lod1_); + resize_if(m.vis_fwd_lod0); + resize_if(m.vis_fwd_lod1); + resize_if(m.vis_rev_lod0); + resize_if(m.vis_rev_lod1); for (size_t i = 0; i < m.meshes.size(); ++i) { - visible_by_mesh_fwd_lod0_[i].clear(); - visible_by_mesh_fwd_lod1_[i].clear(); - visible_by_mesh_rev_lod0_[i].clear(); - visible_by_mesh_rev_lod1_[i].clear(); + m.vis_fwd_lod0[i].clear(); + m.vis_fwd_lod1[i].clear(); + m.vis_rev_lod0[i].clear(); + m.vis_rev_lod1[i].clear(); } cull_clear_ns_ += phase_timer.nsecsElapsed(); phase_timer.restart(); @@ -1394,7 +1400,7 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] if (!aabbInFrustum(item.aabb_min, item.aabb_max, planes)) return; if (!contributionPasses(item.aabb_min, item.aabb_max)) return; if (hiz_on && aabbOccludedByHiz(item.aabb_min, item.aabb_max)) { - ++hiz_reject_count_; + hiz_reject_count_.fetch_add(1, std::memory_order_relaxed); return; } // Survivor — now pay the wide-struct fetch for mesh_id. @@ -1407,10 +1413,10 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] const bool reflected = inst_idx < m.instance_reflected.size() && m.instance_reflected[inst_idx] != 0; auto& bucket = - reflected ? (want_lod1 ? visible_by_mesh_rev_lod1_ - : visible_by_mesh_rev_lod0_) - : (want_lod1 ? visible_by_mesh_fwd_lod1_ - : visible_by_mesh_fwd_lod0_); + reflected ? (want_lod1 ? m.vis_rev_lod1 + : m.vis_rev_lod0) + : (want_lod1 ? m.vis_fwd_lod1 + : m.vis_fwd_lod0); bucket[inst.mesh_id].push_back(inst_idx); }; @@ -1456,8 +1462,8 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] // rev fills [indirect_forward_count, end). LOD0/LOD1 within a winding // slice are contiguous — winding is what requires glFrontFace to flip // between MDI calls, LOD is not. - visible_flat_.clear(); - indirect_scratch_.clear(); + m.visible_flat.clear(); + m.indirect_scratch.clear(); auto emit_slice = [&](std::vector>& by_mesh, int lod) { for (size_t mi = 0; mi < m.meshes.size(); ++mi) { @@ -1474,25 +1480,25 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] cmd.instanceCount = vis_count; cmd.firstIndex = ebo_off / sizeof(uint32_t); cmd.baseVertex = mesh.vbo_byte_offset / INSTANCED_VERTEX_STRIDE_BYTES; - cmd.baseInstance = static_cast(visible_flat_.size()); - indirect_scratch_.push_back(cmd); + cmd.baseInstance = static_cast(m.visible_flat.size()); + m.indirect_scratch.push_back(cmd); - visible_flat_.insert(visible_flat_.end(), - by_mesh[mi].begin(), by_mesh[mi].end()); + m.visible_flat.insert(m.visible_flat.end(), + by_mesh[mi].begin(), by_mesh[mi].end()); } }; - emit_slice(visible_by_mesh_fwd_lod0_, 0); - emit_slice(visible_by_mesh_fwd_lod1_, 1); - m.indirect_forward_count = static_cast(indirect_scratch_.size()); - emit_slice(visible_by_mesh_rev_lod0_, 0); - emit_slice(visible_by_mesh_rev_lod1_, 1); - m.indirect_command_count = static_cast(indirect_scratch_.size()); + emit_slice(m.vis_fwd_lod0, 0); + emit_slice(m.vis_fwd_lod1, 1); + m.indirect_forward_count = static_cast(m.indirect_scratch.size()); + emit_slice(m.vis_rev_lod0, 0); + emit_slice(m.vis_rev_lod1, 1); + m.indirect_command_count = static_cast(m.indirect_scratch.size()); // Per-model stats snapshot — summed into the frame counters regardless // of whether this frame ran a full cull or reused the cached one. uint32_t model_vis_obj = 0, model_vis_tri = 0; - for (const auto& cmd : indirect_scratch_) { + for (const auto& cmd : m.indirect_scratch) { model_vis_tri += (cmd.count / 3) * cmd.instanceCount; model_vis_obj += cmd.instanceCount; } @@ -1500,10 +1506,14 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] m.cached_visible_triangles = model_vis_tri; cull_emit_ns_ += phase_timer.nsecsElapsed(); - phase_timer.restart(); +} + +void ViewportWindow::uploadCullResults(ModelGpuData& m) { + QElapsedTimer phase_timer; + phase_timer.start(); // Upload visible list (keep binding alive even when empty). - size_t vis_bytes = std::max(visible_flat_.size() * sizeof(uint32_t), + size_t vis_bytes = std::max(m.visible_flat.size() * sizeof(uint32_t), sizeof(uint32_t)); if (m.visible_ssbo == 0 || m.visible_ssbo_capacity < vis_bytes) { if (m.visible_ssbo) gl_->glDeleteBuffers(1, &m.visible_ssbo); @@ -1513,13 +1523,13 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] gl_->glNamedBufferStorage(m.visible_ssbo, new_cap, nullptr, GL_DYNAMIC_STORAGE_BIT); m.visible_ssbo_capacity = new_cap; } - if (!visible_flat_.empty()) { + if (!m.visible_flat.empty()) { gl_->glNamedBufferSubData(m.visible_ssbo, 0, - visible_flat_.size() * sizeof(uint32_t), visible_flat_.data()); + m.visible_flat.size() * sizeof(uint32_t), m.visible_flat.data()); } // Upload indirect command buffer. - size_t ind_bytes = indirect_scratch_.size() * sizeof(DrawElementsIndirectCommand); + size_t ind_bytes = m.indirect_scratch.size() * sizeof(DrawElementsIndirectCommand); if (ind_bytes == 0) { cull_upload_ns_ += phase_timer.nsecsElapsed(); return; @@ -1532,7 +1542,7 @@ void ViewportWindow::cullAndUploadVisible(ModelGpuData& m, const float planes[6] gl_->glNamedBufferStorage(m.indirect_buffer, new_cap, nullptr, GL_DYNAMIC_STORAGE_BIT); m.indirect_capacity = new_cap; } - gl_->glNamedBufferSubData(m.indirect_buffer, 0, ind_bytes, indirect_scratch_.data()); + gl_->glNamedBufferSubData(m.indirect_buffer, 0, ind_bytes, m.indirect_scratch.data()); cull_upload_ns_ += phase_timer.nsecsElapsed(); } @@ -1608,7 +1618,7 @@ void ViewportWindow::render() { && last_cull_proj_ == proj_matrix_; const bool cull_this_frame = !camera_unchanged; if (cull_this_frame) { - hiz_reject_count_ = 0; + hiz_reject_count_.store(0, std::memory_order_relaxed); } else { ++cull_skipped_frames_; } @@ -1617,11 +1627,47 @@ void ViewportWindow::render() { // back and forth. Harmless when culling is off. gl_->glFrontFace(GL_CCW); + // Parallel cull: each model's CPU cull is independent (no shared mutable + // state other than the atomic timing counters), so we fan them out to + // std::async and join before the (serial, GL-touching) upload pass. + // IFC_CULL_THREADS=0 forces the single-threaded fallback. + static const bool mt_cull_enabled = []{ + const char* e = std::getenv("IFC_CULL_THREADS"); + return !(e && e[0] == '0'); + }(); + QElapsedTimer cull_wall_timer; + if (cull_this_frame) { + cull_wall_timer.start(); + std::vector cull_targets; + cull_targets.reserve(models_gpu_.size()); + for (auto& [mid, m] : models_gpu_) { + if (m.hidden || !m.ssbo || m.ssbo_instance_count == 0) continue; + cull_targets.push_back(&m); + } + if (mt_cull_enabled && cull_targets.size() > 1) { + std::vector> futs; + futs.reserve(cull_targets.size()); + for (ModelGpuData* mp : cull_targets) { + const float mpr = min_pixel_radius; + futs.emplace_back(std::async(std::launch::async, + [this, mp, &planes, focal_px, mpr]() { + cullModelCpu(*mp, planes, focal_px, mpr); + })); + } + for (auto& f : futs) f.get(); + } else { + for (ModelGpuData* mp : cull_targets) { + cullModelCpu(*mp, planes, focal_px, min_pixel_radius); + } + } + cull_wall_ns_ += cull_wall_timer.nsecsElapsed(); + } + for (auto& [model_id, m] : models_gpu_) { if (m.hidden || !m.ssbo || m.ssbo_instance_count == 0) continue; if (cull_this_frame) { - cullAndUploadVisible(m, planes, focal_px, min_pixel_radius); + uploadCullResults(m); } if (m.indirect_command_count == 0) continue; @@ -1741,24 +1787,29 @@ void ViewportWindow::render() { const double inv_frames = frames_in_window > 0 ? 1.0 / static_cast(frames_in_window) : 0.0; - const double clr_ms = cull_clear_ns_ * 1e-6 * inv_frames; - const double trv_ms = cull_traverse_ns_ * 1e-6 * inv_frames; - const double emt_ms = cull_emit_ns_ * 1e-6 * inv_frames; - const double upl_ms = cull_upload_ns_ * 1e-6 * inv_frames; - cull_clear_ns_ = cull_traverse_ns_ = cull_emit_ns_ = cull_upload_ns_ = 0; + const double clr_ms = cull_clear_ns_.load() * 1e-6 * inv_frames; + const double trv_ms = cull_traverse_ns_.load() * 1e-6 * inv_frames; + const double emt_ms = cull_emit_ns_.load() * 1e-6 * inv_frames; + const double upl_ms = cull_upload_ns_.load() * 1e-6 * inv_frames; + const double wall_ms = cull_wall_ns_ * 1e-6 * inv_frames; + cull_clear_ns_.store(0); + cull_traverse_ns_.store(0); + cull_emit_ns_.store(0); + cull_upload_ns_.store(0); + cull_wall_ns_ = 0; const uint32_t skipped = cull_skipped_frames_; cull_skipped_frames_ = 0; qDebug("[frame] %.1f fps %.2f ms obj %u/%u tri %u/%u " "meshes %u gl_draws %u sub_draws %u hiz_rej %u " - "cull[clr %.2f trv %.2f emt %.2f upl %.2f]ms skipped %u/%u " + "cull[wall %.2f | work: clr %.2f trv %.2f emt %.2f upl %.2f]ms skipped %u/%u " "vram %.1f MB (vbo %.1f + ebo %.1f + ssbo %.1f) models %zu (%zu hidden)", last_fps_, 1000.0f / last_fps_, visible_objects_, total_obj, visible_triangles_, total_tri, total_meshes, gl_draw_calls_, indirect_sub_draws_, - hiz_reject_count_, - clr_ms, trv_ms, emt_ms, upl_ms, + hiz_reject_count_.load(), + wall_ms, clr_ms, trv_ms, emt_ms, upl_ms, skipped, frames_in_window, (total_vbo + total_ebo + total_ssbo) / (1024.0*1024.0), total_vbo / (1024.0*1024.0), diff --git a/src/ifcviewer/ViewportWindow.h b/src/ifcviewer/ViewportWindow.h index ed6668cc116..30b9e8cfa19 100644 --- a/src/ifcviewer/ViewportWindow.h +++ b/src/ifcviewer/ViewportWindow.h @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include "BvhAccel.h" #include "InstancedGeometry.h" @@ -104,6 +106,15 @@ struct ModelGpuData { uint32_t indirect_command_count = 0; // total valid commands this frame uint32_t indirect_forward_count = 0; // first N are CCW-winding draws + // Per-model cull scratch — owned by the model so each cull job runs + // without sharing mutable state. Four buckets = {fwd, rev} × {LOD0, LOD1}. + std::vector> vis_fwd_lod0; + std::vector> vis_fwd_lod1; + std::vector> vis_rev_lod0; + std::vector> vis_rev_lod1; + std::vector visible_flat; + std::vector indirect_scratch; + bool finalized = false; bool hidden = false; }; @@ -215,6 +226,18 @@ class ViewportWindow : public QWindow { void cullAndUploadVisible(ModelGpuData& m, const float planes[6][4], float focal_px, float min_pixel_radius); + // Thread-safe: CPU-only cull (frustum + contribution + HiZ + bucketing + + // emit). Writes survivors into m.vis_* / m.visible_flat / m.indirect_scratch + // and sets m.indirect_forward_count / m.indirect_command_count / + // m.cached_visible_*. Touches no GL state and no ViewportWindow mutable + // state other than the atomic counters below — safe to run on a worker. + void cullModelCpu(ModelGpuData& m, const float planes[6][4], + float focal_px, float min_pixel_radius); + + // Main-thread only: uploads m.visible_flat / m.indirect_scratch into the + // model's SSBO + indirect buffer, growing them if needed. + void uploadCullResults(ModelGpuData& m); + // Mouse interaction void handleMousePress(QMouseEvent* event); void handleMouseRelease(QMouseEvent* event); @@ -268,17 +291,23 @@ class ViewportWindow : public QWindow { std::vector hiz_mip_h_; QMatrix4x4 hiz_vp_; bool hiz_vp_valid_ = false; - uint32_t hiz_reject_count_ = 0; // per-frame stat + std::atomic hiz_reject_count_{0}; // per-frame stat // Cull-phase timers. Accumulated across all frames in the current // 1-second stats window; divided by frame_count_ at print time to // give per-frame average ms. Reset each window. Lets us see where // CPU time actually goes: bucket clears vs BVH traversal vs emit vs // GPU upload. - uint64_t cull_clear_ns_ = 0; - uint64_t cull_traverse_ns_ = 0; - uint64_t cull_emit_ns_ = 0; - uint64_t cull_upload_ns_ = 0; + // Atomic so parallel cull workers can fetch_add into them without + // contending on a lock. clr/trv/emt are SUMS across all worker threads + // for the frame — they describe total CPU work, not wall-clock. The + // wall counter is measured once around the dispatch block in render() + // and is what actually determines frame time. + std::atomic cull_clear_ns_{0}; + std::atomic cull_traverse_ns_{0}; + std::atomic cull_emit_ns_{0}; + std::atomic cull_upload_ns_{0}; + uint64_t cull_wall_ns_ = 0; // main-thread only uint32_t cull_skipped_frames_ = 0; // Skip cullAndUploadVisible + buildHizPyramid when the camera and scene @@ -295,21 +324,6 @@ class ViewportWindow : public QWindow { uint32_t gl_draw_calls_ = 0; uint32_t indirect_sub_draws_ = 0; - // Reused scratch: visible-instance index lists per mesh, flattened into - // `visible_flat_` for upload. Both live in the parent object to avoid - // per-frame allocation. indirect_scratch_ is the matching array of - // DrawElementsIndirectCommand records — forward-declared as bytes so - // the header doesn't need the struct definition. - // Four buckets = {fwd, rev} × {LOD0, LOD1}. LOD1 buckets are only - // populated when the mesh has lod1_index_count > 0 and the projected - // pixel radius is below the LOD switch threshold. - std::vector> visible_by_mesh_fwd_lod0_; - std::vector> visible_by_mesh_fwd_lod1_; - std::vector> visible_by_mesh_rev_lod0_; - std::vector> visible_by_mesh_rev_lod1_; - std::vector visible_flat_; - std::vector indirect_scratch_; - // Camera QVector3D camera_target_{0, 0, 0}; QVector3D camera_eye_{0, 0, 0}; // world-space eye, set in updateCamera From c0e99a19fd2498612aafc7c3688942273f9b963d Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Wed, 15 Apr 2026 15:29:21 +1000 Subject: [PATCH 34/37] =?UTF-8?q?ifcviewer:=20README=20=E2=80=94=20documen?= =?UTF-8?q?t=20parallel=20per-model=20cull=20(Phase=203D)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the parallel cull bullet to the feature list, a Phase 3D section explaining the fan-out / scratch-ownership design + measured 4x speedup, and renumber the planned GPU compute cull to Phase 3E so it can cite 3D as the CPU algorithm being ported. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/README.md | 44 +++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/src/ifcviewer/README.md b/src/ifcviewer/README.md index 01ceef7ecfd..8c09ed661f7 100644 --- a/src/ifcviewer/README.md +++ b/src/ifcviewer/README.md @@ -60,6 +60,13 @@ engine with a Qt6 interface and OpenGL 4.5 rendering. - **BVH frustum culling over instances**: per-model BVH trees cull whole subtrees of placements with one frustum test. Falls back to a linear scan during progressive upload and for very small models (< 32 instances). +- **Parallel per-model cull:** each model's CPU cull (frustum + contribution + + HiZ + bucketing + indirect-command emit) is independent, so `render()` + fans them out via `std::async` and joins before the serial GL-upload + pass. On an 18-model scene this took wall-clock cull from ~25 ms to + ~5 ms. The cull scratch buffers live on `ModelGpuData` so each worker + owns its output storage; phase-timer counters are atomic for the same + reason. `IFC_CULL_THREADS=0` forces single-threaded fallback. - **Reflection-aware two-pass draw:** IFC placements can have negative- determinant transforms (mirrored families). These flip the screen-space winding of their triangles, which would make them vanish under @@ -692,13 +699,35 @@ thousands and the frame time drops accordingly. - **Transparent geometry would need special handling**, but the current renderer doesn't have any, so no-op for now. -#### 3D. GPU-side culling via compute (longer-term) +#### 3D. Parallel per-model cull (CPU, done) + +A cheaper intermediate step before going full-GPU: each model's cull is +independent (no shared mutable state beyond atomic timing counters), so +`render()` fans the per-model culls out to a `std::async` pool and joins +before the serial GL-upload pass. On the 18-model / 569 k-instance test +scene this took the cull from ~25 ms wall-clock to ~5 ms — roughly a 4× +speedup on an 8-core machine, tracking `std::thread::hardware_concurrency()` +up to the model count. Load balancing is static (one job per model); a +single massive model still bottlenecks to single-threaded speed and would +need intra-model partitioning, but in practice BIM projects are +multi-discipline so the coarse partition lands well. + +The stats line now reports `cull[wall X | work: clr Y trv Z emt W upl U]`: +`wall` is frame-time impact, the `work` numbers are per-thread sums showing +where CPU cycles went. `IFC_CULL_THREADS=0` forces single-threaded mode +for comparison. + +#### 3E. GPU-side culling via compute (longer-term) Push the cull loop to a compute shader reading the per-instance SSBO + frustum planes + HiZ pyramid, emitting the visible list and indirect -commands with atomic counters. Eliminates all CPU→GPU per-frame bytes -and lets 3C scale to millions of instances. Worth doing once 3A–3C -have stabilised the CPU-side algorithm we'd be porting. +commands with atomic counters. Three compute dispatches per model: (1) +count survivors per `(mesh, winding, LOD)` bucket, (2) prefix-sum the +counts into `baseInstance` offsets and write the indirect command buffer, +(3) re-test and compact survivors into the dense visible list. HiZ moves +to a GPU depth texture sampled directly in the shader, eliminating the +Phase 3C readback. Lets culling scale to millions of instances and +single-model scenes where Phase 3D can't parallelise. ### Planned follow-ups (post-Phase-3) @@ -716,6 +745,8 @@ Scene size Bottleneck Fix 500k+ tris / overview shot GPU vertex + raster Phase 3A contribution cull + Phase 3B LOD (done) multi-million + occluders redundant rasterisation Phase 3C HiZ (done, CPU readback) +many models, serial cull single-thread BVH trv Phase 3D parallel cull (done) +single giant model / <18 cores CPU BVH trv Phase 3E GPU cull (planned) ``` ## Roadmap @@ -732,12 +763,13 @@ multi-million + occluders redundant rasterisation Phase 3C HiZ (done, CPU - [x] Reflection-aware two-pass draw for mirrored placements - [x] Backface culling (user-toggleable, default on) - [x] `reorient-shells` enabled in iterator -- [x] Perf diagnostic env vars (`IFC_SKIP_MDI`, `IFC_MAX_SUBDRAWS`, `IFC_MIN_PX`, `IFC_LOD1_PX`, `IFC_NO_HIZ`, `IFC_HIZ_SIZE`) +- [x] Perf diagnostic env vars (`IFC_SKIP_MDI`, `IFC_MAX_SUBDRAWS`, `IFC_MIN_PX`, `IFC_LOD1_PX`, `IFC_NO_HIZ`, `IFC_HIZ_SIZE`, `IFC_CULL_THREADS`) - [x] Phase 3A — screen-space contribution culling - [x] Phase 3B — distance / contribution LOD (meshoptimizer `simplifySloppy`) - [x] Phase 3C — Hierarchical-Z occlusion culling (v1, CPU-side readback) +- [x] Phase 3D — Parallel per-model CPU cull (`std::async` fan-out) - [x] Quantized VBO (16 B/vert, sidecar v6) - [x] Event-driven rendering (zero idle CPU/GPU, cull skipped on still frames) -- [ ] **Phase 3D — GPU-side compute-shader culling** (next; replaces the readback) +- [ ] **Phase 3E — GPU-side compute-shader culling** (next; replaces the HiZ readback) - [ ] Vulkan/MoltenVK backend for macOS - [ ] Embedded Python scripting console From 6b496d802dc7102c4a1abc16ee959c42cf0ff854 Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Wed, 15 Apr 2026 17:46:47 +1000 Subject: [PATCH 35/37] ifcviewer: disable HiZ cull when camera has moved MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HiZ from last frame encodes depth from last frame's viewpoint. When the camera moves, projecting a current-frame AABB through the stored VP answers 'was this occluded last frame?' rather than 'is it occluded now?' — a self-reinforcing feedback loop where objects culled in prior frames never appear in any depth buffer and stay permanently hidden at certain camera angles. Fix: require hiz_vp_ == current VP for the HiZ test to apply. HiZ still helps static views (kicks in one frame after camera stops) but no longer produces false occlusions during orbit. The correct fix for orbit coverage is a depth pre-pass feeding fresh HiZ — planned as part of Phase 3E GPU compute cull. Co-Authored-By: Claude Opus 4.6 --- src/ifcviewer/ViewportWindow.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index 66525e28cf5..e96b45f9094 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -1388,7 +1388,21 @@ void ViewportWindow::cullModelCpu(ModelGpuData& m, const float planes[6][4], // HiZ occlusion is skipped entirely when the pick pass runs // (min_pixel_radius == 0 on that path), when the user disables it via // env var, or before the first pyramid has been built. - const bool hiz_on = hizEnabled() && min_pixel_radius > 0.0f && hiz_vp_valid_; + // + // Crucially, HiZ is also skipped when the stored VP (hiz_vp_, captured at + // the end of the previous frame) differs from this frame's VP — i.e. + // whenever the camera has moved. The stored depth buffer encodes what + // was visible from hiz_vp_'s viewpoint; projecting a current-frame AABB + // through that VP answers "was this occluded LAST frame?", which is only + // a correct proxy for "is this occluded NOW?" when the camera is static. + // Orbiting past a wall would otherwise leave objects persistently culled + // because prior frames' depth buffers only ever contained the wall (the + // objects behind it were themselves HiZ-culled, never drawn, so never in + // the buffer — a self-reinforcing feedback loop). On static views HiZ + // kicks in after a single frame of lag. + const QMatrix4x4 current_vp = proj_matrix_ * view_matrix_; + const bool hiz_vp_matches = hiz_vp_valid_ && hiz_vp_ == current_vp; + const bool hiz_on = hizEnabled() && min_pixel_radius > 0.0f && hiz_vp_matches; // Hot path: read the AABB from the compact bvh_items array (28 B stride) // rather than the wide InstanceCpu (104 B stride). Most instances fail From 1caf4496113b727bf0caa4d3dd3e990c3eccebcb Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Wed, 15 Apr 2026 18:30:03 +1000 Subject: [PATCH 36/37] ifcviewer: fix pick-pass cull corruption and cached-model ID collisions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two stability bugs: 1. Clicking an object left the scene with wrong shading until the camera moved. The pick pass re-culls every model with its own parameters (min_pixel_radius=0, no HiZ) and overwrites each model's visible_ssbo and indirect buffer. The next render() saw an unchanged camera, skipped the cull via the have_cached_cull_ shortcut, and drew the stale pick-pass buffers. Fix: invalidate have_cached_cull_ at the end of pickObjectAt(). 2. Loading two sidecar-cached models made the second model's picked properties resolve to the first model's elements. Sidecars store raw object_id / model_id values from the session that wrote them, and both files start at object_id=1, so element_map_ entries collided. Fix: on load, rebase every PackedElementInfo and InstanceCpu by (next_object_id_ - min_id_in_sidecar) and overwrite model_id with the freshly-assigned handle before the elements hit element_map_. Also document both in the README — the pick-pass note under 3A contribution culling, the sidecar rebase under the sidecar format section. --- src/ifcviewer/MainWindow.cpp | 27 ++++++++++++++++++++++----- src/ifcviewer/README.md | 15 +++++++++++++++ src/ifcviewer/ViewportWindow.cpp | 7 +++++++ 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/ifcviewer/MainWindow.cpp b/src/ifcviewer/MainWindow.cpp index 0e8162f0436..e75f7cf0dd1 100644 --- a/src/ifcviewer/MainWindow.cpp +++ b/src/ifcviewer/MainWindow.cpp @@ -248,11 +248,28 @@ void MainWindow::applySidecarData(ModelId mid, SidecarData data) { QElapsedTimer t; t.start(); - // Update next_object_id_ past all objects in this model before the - // extracted `elements` is moved out of `data`. - for (const auto& elem : data.elements) { - if (elem.object_id >= next_object_id_) - next_object_id_ = elem.object_id + 1; + // Sidecars store raw object_ids and model_ids from the session that wrote + // them. On load we must rebase both onto the current session's ID space, + // or two cached models collide (both starting at object_id=1, both + // claiming the original model_id). Offset by (next_object_id_ - min_id) + // so the first cached object takes the next free slot. + uint32_t min_oid = UINT32_MAX; + for (const auto& pe : data.elements) { + if (pe.object_id < min_oid) min_oid = pe.object_id; + } + uint32_t oid_offset = 0; + if (!data.elements.empty() && min_oid < UINT32_MAX) { + oid_offset = next_object_id_ - min_oid; + } + for (auto& pe : data.elements) { + pe.object_id += oid_offset; + pe.model_id = mid; + if (pe.object_id >= next_object_id_) + next_object_id_ = pe.object_id + 1; + } + for (auto& inst : data.instances) { + inst.object_id += oid_offset; + inst.model_id = mid; } // Hand off geometry to GPU in a single call. diff --git a/src/ifcviewer/README.md b/src/ifcviewer/README.md index 8c09ed661f7..70540abfb19 100644 --- a/src/ifcviewer/README.md +++ b/src/ifcviewer/README.md @@ -307,6 +307,14 @@ uint32_t + char[] string table Staleness check: `source_file_size` vs actual file size. Mismatched → reject and rebuild. Endianness marker rejects cross-arch caches. +Sidecars store the raw `object_id` / `model_id` values from the session +that wrote them. On load they are rebased onto the current session's ID +space (`object_id += next_object_id_ - min_id_in_sidecar`, `model_id` +overwritten with the freshly-assigned handle) before the elements hit +`element_map_` or the viewport. Without this, two cached models loaded +back-to-back collide — both start at `object_id=1` and the second model's +property lookups return the first model's data. + ### GPU Instancing pipeline (the central pillar) Everything above plugs into a single data-flow, worth documenting on its @@ -451,6 +459,13 @@ and per-instance level. Short-circuits when the camera is inside the AABB so nothing-you're-standing-next-to is ever lost. Pick pass uses threshold 0 so sub-pixel objects remain clickable. +Because the pick pass re-runs the cull with its own parameters (no +contribution cull, no HiZ) and writes into each model's shared +`visible_ssbo` / indirect buffer, `pickObjectAt()` must invalidate +`have_cached_cull_` on exit. Otherwise the next `render()` sees an +unchanged camera, skips the cull, and draws the pick-pass buffers — +the user sees obviously-wrong shading until they nudge the camera. + Sphere-based (centre = AABB midpoint, radius = half-diagonal, r_px = focal_px · radius / distance). Loses a little precision on very elongated bounds vs. 8-corner projection, but costs ~5× less per diff --git a/src/ifcviewer/ViewportWindow.cpp b/src/ifcviewer/ViewportWindow.cpp index e96b45f9094..a97714950a2 100644 --- a/src/ifcviewer/ViewportWindow.cpp +++ b/src/ifcviewer/ViewportWindow.cpp @@ -1282,6 +1282,13 @@ uint32_t ViewportWindow::pickObjectAt(int x, int y) { renderPickPass(); + // The pick pass overwrote each model's visible_ssbo / indirect_buffer with + // pick-specific cull params (no contribution cull, no HiZ). Invalidate + // the cached cull so the next render() rebuilds them with main-render + // params; otherwise the viewport draws with stale pick-pass buffers and + // shading looks wrong until the camera moves. + have_cached_cull_ = false; + int px = x * devicePixelRatio(); int py = (height() - y) * devicePixelRatio(); uint32_t pixel = 0; From 196f98440de93be1bb8a67ecf6164ca96377d5ee Mon Sep 17 00:00:00 2001 From: Dion Moult Date: Wed, 15 Apr 2026 18:31:17 +1000 Subject: [PATCH 37/37] =?UTF-8?q?ifcviewer:=20README=20=E2=80=94=20documen?= =?UTF-8?q?t=20HiZ=20disabled=20during=20camera=20motion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'Known caveats' bullet still described the old 1-frame-stale behavior. Since 6b496d802 the cull compares hiz_vp_ to the current VP and drops HiZ rejection whenever they differ, so HiZ only helps on still frames — orbiting gets no benefit. Call out the tradeoff and the planned same-frame-depth-pre-pass fix slated for Phase 3E. --- src/ifcviewer/README.md | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/ifcviewer/README.md b/src/ifcviewer/README.md index 70540abfb19..77ffefc40e7 100644 --- a/src/ifcviewer/README.md +++ b/src/ifcviewer/README.md @@ -694,11 +694,19 @@ thousands and the frame time drops accordingly. ##### Known caveats -- **1 frame stale.** The pyramid is aligned to last frame's view, so - when you whip the camera across the scene we may draw one frame of - stuff that the new view would have occluded. Invisible in practice - at 60 fps. We tried a 3-deep PBO ring for async readback (2-frame - stale) and it produced visible flicker on fast orbits — reverted. +- **Disabled while the camera moves.** The pyramid is aligned to the + VP matrix of the frame that produced it. On a moving camera the + stored VP no longer matches the current one, and reusing it would + pop objects in and out as the stale depth falsely claims they're + occluded. The cull now compares `hiz_vp_ == current_vp` and drops + HiZ rejection entirely when they differ, so HiZ only contributes on + still frames. The honest cost: orbiting — the exact motion where + the frame rate tends to dip — gets no HiZ help. A proper fix needs + a same-frame depth pre-pass (draw cheap depth, build HiZ from *that* + frame's VP, then issue the colour pass against it); deferred to the + GPU-compute cull rewrite in Phase 3E where we're touching this code + anyway. We also tried a 3-deep PBO ring for async readback (2-frame + stale) which produced visible flicker on fast orbits — reverted. - **Readback syncs the GPU.** `glGetTextureImage` is blocking. Measured cost is well under a millisecond at 256×128; not a bottleneck on the machines tested. Phase 3D's compute-shader cull