Skip to content

Commit e82ca0e

Browse files
committed
Less back and forths by returning semi count in page
1 parent 5910b39 commit e82ca0e

3 files changed

Lines changed: 33 additions & 3 deletions

File tree

src/ifcopenshell-python/ifcopenshell/geom/stats.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ class StatsCollector:
138138

139139
counters: list
140140

141+
num_semis: int = 0
142+
141143
def __init__(self):
142144
self.streamer = ifcopenshell_wrapper.InstanceStreamer()
143145
self.needs_data = True
@@ -151,17 +153,19 @@ def feedFromFile(self, f: Optional[IO[str]] = None):
151153
def feed(self, data: str):
152154
self.streamer.pushPage(data)
153155
self.needs_data = False
156+
self.num_semis = self.streamer.semicolonCount()
154157

155158
@staticmethod
156-
def fromFilePath(fn, page_size: int = 4096):
159+
def fromFilePath(fn, page_size: int = 102400):
157160
collector = StatsCollector()
158161
collector.page_size = page_size
159162
collector.feedFromFile(open(str(fn), encoding="ascii"))
160163
return collector
161164

162165
def next(self):
163-
if self.streamer.hasSemicolon():
166+
if self.num_semis > 0:
164167
if inst := self.streamer.readInstancePy(True):
168+
self.num_semis -= 1
165169
return inst["type"], dict(list(inst.items())[2:])
166170
else:
167171
self.finalized = True

src/ifcparse/IfcFile.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,9 @@ class IFC_PARSE_API InstanceStreamer {
139139

140140
bool hasSemicolon() const;
141141

142-
void pushPage(const std::string& page);
142+
size_t semicolonCount() const;
143+
144+
void pushPage(const std::string& page);
143145

144146
InstanceStreamer();
145147

src/ifcparse/IfcParse.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1371,6 +1371,30 @@ bool IfcParse::InstanceStreamer::hasSemicolon() const {
13711371
return false;
13721372
}
13731373

1374+
size_t IfcParse::InstanceStreamer::semicolonCount() const {
1375+
auto local_stream = stream_->clone();
1376+
auto local_lexer = IfcSpfLexer(&local_stream);
1377+
Token t;
1378+
size_t count = 0;
1379+
try {
1380+
t = local_lexer.Next();
1381+
} catch (const std::out_of_range&) {
1382+
return false;
1383+
}
1384+
while (t.type != Token_NONE) {
1385+
if (TokenFunc::isOperator(t, ';')) {
1386+
count++;
1387+
}
1388+
try {
1389+
t = local_lexer.Next();
1390+
} catch (const std::out_of_range&) {
1391+
// This most likely happens when a page boundary is contained within a string
1392+
break;
1393+
}
1394+
}
1395+
return count;
1396+
}
1397+
13741398
void IfcParse::InstanceStreamer::pushPage(const std::string& page)
13751399
{
13761400
stream_->pushNextPage(page);

0 commit comments

Comments
 (0)