Skip to content

Commit f7abadc

Browse files
committed
Ensure correct parsing and decoding of strings [#3395016]
1 parent 767f7b4 commit f7abadc

File tree

13 files changed

+438
-49
lines changed

13 files changed

+438
-49
lines changed

cmake/CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,14 @@ ELSE()
3636
MESSAGE(FATAL_ERROR "Unable to find library files, aborting")
3737
ENDIF()
3838

39+
FIND_LIBRARY(icu "icuuc" /usr/lib /usr/lib64 /usr/local/lib /usr/local/lib64)
40+
41+
IF(icu)
42+
MESSAGE(STATUS "ICU libraries found")
43+
ELSE()
44+
MESSAGE(FATAL_ERROR "Unable to find ICU library files, aborting")
45+
ENDIF()
46+
3947
INCLUDE(CheckIncludeFileCXX)
4048

4149
MACRO(CHECK_ADD_OCE_OCC_DEF INCLUDE)
@@ -68,6 +76,7 @@ ADD_LIBRARY(IfcParse STATIC
6876
../src/ifcparse/Ifc2x3.cpp
6977
../src/ifcparse/IfcUtil.cpp
7078
../src/ifcparse/IfcParse.cpp
79+
../src/ifcparse/IfcCharacterDecoder.cpp
7180
)
7281

7382
ADD_LIBRARY(IfcGeom STATIC
@@ -81,6 +90,7 @@ ADD_LIBRARY(IfcGeom STATIC
8190
../src/ifcgeom/IfcRegister.cpp
8291
)
8392

93+
TARGET_LINK_LIBRARIES(IfcParse icuuc)
8494
TARGET_LINK_LIBRARIES(IfcGeom IfcParse)
8595

8696
LINK_DIRECTORIES (${IfcOpenShell_BINARY_DIR} /usr/lib /usr/lib64 /usr/local/lib /usr/local/lib64)

src/ifcgeom/IfcGeomObjects.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <Poly_Array1OfTriangle.hxx>
3434
#include <StdFail_NotDone.hxx>
3535

36+
#include "../ifcparse/IfcException.h"
3637
#include "../ifcgeom/IfcGeomObjects.h"
3738
#include "../ifcgeom/IfcGeom.h"
3839

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
/********************************************************************************
2+
* *
3+
* This file is part of IfcOpenShell. *
4+
* *
5+
* IfcOpenShell is free software: you can redistribute it and/or modify *
6+
* it under the terms of the Lesser GNU General Public License as published by *
7+
* the Free Software Foundation, either version 3.0 of the License, or *
8+
* (at your option) any later version. *
9+
* *
10+
* IfcOpenShell is distributed in the hope that it will be useful, *
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13+
* Lesser GNU General Public License for more details. *
14+
* *
15+
* You should have received a copy of the Lesser GNU General Public License *
16+
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
17+
* *
18+
********************************************************************************/
19+
20+
/********************************************************************************
21+
* *
22+
* Implementation of character decoding as described in ISO 10303-21 table 2 and *
23+
* table 4 *
24+
* *
25+
********************************************************************************/
26+
27+
#include <string>
28+
#include <sstream>
29+
#include <iomanip>
30+
31+
#include "../ifcparse/IfcCharacterDecoder.h"
32+
#include "../ifcparse/IfcException.h"
33+
#include "../ifcparse/IfcFile.h"
34+
35+
#define FIRST_SOLIDUS (1 << 1)
36+
#define PAGE (1 << 2)
37+
#define ALPHABET (1 << 3)
38+
#define SECOND_SOLIDUS (1 << 4)
39+
#define ALPHABET_DEFINITION (1 << 5)
40+
#define APOSTROPHE (1 << 6)
41+
#define ARBITRARY (1 << 7)
42+
#define EXTENDED2 (1 << 8)
43+
#define EXTENDED4 (1 << 9)
44+
#define HEX(N) (1 << (9+N))
45+
#define THIRD_SOLIDUS (1 << 18)
46+
#define ENDEXTENDED_X (1 << 19)
47+
#define ENDEXTENDED_0 (1 << 20)
48+
#define FOURTH_SOLIDUS (1 << 21)
49+
#define IGNORED_DIRECTIVE (1 << 22)
50+
51+
// FIXME: These probably need to be less forgiving in terms of wrongly defined sequences
52+
#define EXPECTS_ALPHABET(S) (S & FIRST_SOLIDUS)
53+
#define EXPECTS_PAGE(S) (S & FIRST_SOLIDUS)
54+
#define EXPECTS_ARBITRARY(S) (S & FIRST_SOLIDUS)
55+
#define EXPECTS_N_OR_F(S) (S & FIRST_SOLIDUS && !(S & ARBITRARY) )
56+
#define EXPECTS_ARBITRARY2(S) (S & ARBITRARY && !(S & SECOND_SOLIDUS))
57+
#define EXPECTS_ALPHABET_DEFINITION(S) (S & FIRST_SOLIDUS && S & ALPHABET)
58+
#define EXPECTS_SOLIDUS(S) (S & ALPHABET_DEFINITION || S & PAGE || S & ARBITRARY || S & EXTENDED2 || S & EXTENDED4 || S & ENDEXTENDED_0 || S & IGNORED_DIRECTIVE || (S & EXTENDED4 && S & HEX(8)) || (S & EXTENDED2 && S & HEX(4)))
59+
#define EXPECTS_CHARACTER(S) (S & PAGE && S & SECOND_SOLIDUS)
60+
#define EXPECTS_HEX(S) (S & HEX(1) || S & HEX(3) || S & HEX(5) || S & HEX(6) || S & HEX(7) || (S & ARBITRARY && S & SECOND_SOLIDUS) || (S & EXTENDED2 && S & HEX(2)) || (S & EXTENDED4 && S & HEX(4)) )
61+
#define EXPECTS_ENDEXTENDED_X(S) (S & THIRD_SOLIDUS)
62+
#define EXPECTS_ENDEXTENDED_0(S) (S & ENDEXTENDED_X)
63+
64+
#define IS_VALID_ALPHABET_DEFINITION(C) (C >= 0x40 && C <= 0x4A)
65+
#define IS_HEXADECIMAL(C) ((C >= 0x30 && C <= 0x39 ) || (C >= 0x41 && C <= 0x46 ))
66+
#define HEX_TO_INT(C) ((C >= 0x30 && C <= 0x39 ) ? C - 0x30 : (C+10) - 0x41)
67+
#define CLEAR_HEX(C) (C &= ~(HEX(1)&HEX(2)&HEX(3)&HEX(4)&HEX(5)&HEX(6)&HEX(7)&HEX(8)))
68+
69+
using namespace IfcParse;
70+
71+
void IfcCharacterDecoder::addChar(std::stringstream& s,const UChar32& ch) {
72+
if ( destination ) {
73+
char* extraction_buffer = new char[4];
74+
UnicodeString(ch).extract(extraction_buffer,4,destination,status);
75+
s << extraction_buffer;
76+
delete extraction_buffer;
77+
} else {
78+
std::stringstream s2;
79+
s2 << "\\u" << std::hex << std::setw(4) << std::setfill('0') << (int) ch;
80+
s << s2.str();
81+
}
82+
}
83+
IfcCharacterDecoder::IfcCharacterDecoder(IfcParse::File* f) {
84+
file = f;
85+
if ( ! destination && mode == UTF8 ) {
86+
destination = ucnv_open("utf-8", &status);
87+
} else if ( ! destination && mode == LATIN ) {
88+
destination = ucnv_open("iso-8859-1", &status);
89+
}
90+
}
91+
IfcCharacterDecoder::~IfcCharacterDecoder() {
92+
if ( destination ) ucnv_close(destination);
93+
if ( converter ) ucnv_close(converter);
94+
}
95+
IfcCharacterDecoder::operator std::string() {
96+
unsigned int parse_state = 0;
97+
std::stringstream s;
98+
s.put('\'');
99+
char current_char;
100+
int codepage = 1;
101+
unsigned int hex = 0;
102+
unsigned int hex_count = 0;
103+
while ( current_char = file->Peek() ) {
104+
if ( EXPECTS_CHARACTER(parse_state) ) {
105+
if ( previous_codepage != codepage ) {
106+
if ( converter ) ucnv_close(converter);
107+
char encoder[11] = {'i','s','o','-','8','8','5','9','-',codepage + 0x30};
108+
converter = ucnv_open(encoder, &status);
109+
}
110+
const char characters[2] = { current_char + 0x80 };
111+
const char* char_array = &characters[0];
112+
UChar32 ch = ucnv_getNextUChar(converter,&char_array,char_array+1,&status);
113+
addChar(s,ch);
114+
parse_state = 0;
115+
} else if ( current_char == '\'' && ! parse_state ) {
116+
parse_state = APOSTROPHE;
117+
} else if ( current_char == '\\' && ! parse_state ) {
118+
parse_state = FIRST_SOLIDUS;
119+
} else if ( current_char == '\\' && EXPECTS_SOLIDUS(parse_state) ) {
120+
if ( parse_state & ALPHABET_DEFINITION ||
121+
parse_state & IGNORED_DIRECTIVE ||
122+
parse_state & ENDEXTENDED_0 ) parse_state = hex = hex_count = 0;
123+
else if ( parse_state & HEX(3) ) parse_state += THIRD_SOLIDUS;
124+
else parse_state += SECOND_SOLIDUS;
125+
} else if ( current_char == 'X' && EXPECTS_ENDEXTENDED_X(parse_state) ) {
126+
parse_state += ENDEXTENDED_X;
127+
} else if ( current_char == '0' && EXPECTS_ENDEXTENDED_0(parse_state) ) {
128+
parse_state += ENDEXTENDED_0;
129+
} else if ( current_char == 'X' && EXPECTS_ARBITRARY(parse_state) ) {
130+
parse_state += ARBITRARY;
131+
} else if ( current_char == '2' && EXPECTS_ARBITRARY2(parse_state) ) {
132+
parse_state += EXTENDED2;
133+
} else if ( current_char == '4' && EXPECTS_ARBITRARY2(parse_state) ) {
134+
parse_state += EXTENDED2 + EXTENDED4;
135+
} else if ( current_char == 'P' && EXPECTS_ALPHABET(parse_state) ) {
136+
parse_state += ALPHABET;
137+
} else if ( (current_char == 'N' || current_char == 'F') && EXPECTS_N_OR_F(parse_state) ) {
138+
parse_state += IGNORED_DIRECTIVE;
139+
} else if ( IS_VALID_ALPHABET_DEFINITION(current_char) && EXPECTS_ALPHABET_DEFINITION(parse_state) ) {
140+
codepage = current_char - 0x40;
141+
parse_state += ALPHABET_DEFINITION;
142+
} else if ( current_char == 'S' && EXPECTS_PAGE(parse_state) ) {
143+
parse_state += PAGE;
144+
} else if ( IS_HEXADECIMAL(current_char) && EXPECTS_HEX(parse_state) ) {
145+
hex <<= 4;
146+
parse_state += HEX((++hex_count));
147+
hex += HEX_TO_INT(current_char);
148+
if ( (hex_count == 2 && !(parse_state & EXTENDED2)) ||
149+
(hex_count == 4 && !(parse_state & EXTENDED4)) ||
150+
(hex_count == 8) ) {
151+
addChar(s,(UChar32) hex);
152+
if ( hex_count == 2 ) parse_state = 0;
153+
else CLEAR_HEX(parse_state);
154+
hex = hex_count = 0;
155+
}
156+
} else if ( parse_state && !(
157+
(current_char == '\\' && parse_state == FIRST_SOLIDUS) ||
158+
(current_char == '\'' && parse_state == APOSTROPHE)
159+
) ) {
160+
if ( parse_state == APOSTROPHE && current_char != '\'' ) break;
161+
throw IfcException("Invalid character encountered");
162+
} else {
163+
parse_state = hex = hex_count = 0;
164+
s.put(current_char);
165+
}
166+
file->Inc();
167+
}
168+
s.put('\'');
169+
return s.str();
170+
}
171+
172+
void IfcCharacterDecoder::dryRun() {
173+
unsigned int parse_state = 0;
174+
char current_char;
175+
unsigned int hex_count = 0;
176+
while ( current_char = file->Peek() ) {
177+
if ( EXPECTS_CHARACTER(parse_state) ) {
178+
parse_state = 0;
179+
} else if ( current_char == '\'' && ! parse_state ) {
180+
parse_state = APOSTROPHE;
181+
} else if ( current_char == '\\' && ! parse_state ) {
182+
parse_state = FIRST_SOLIDUS;
183+
} else if ( current_char == '\\' && EXPECTS_SOLIDUS(parse_state) ) {
184+
if ( parse_state & ALPHABET_DEFINITION ||
185+
parse_state & IGNORED_DIRECTIVE ||
186+
parse_state & ENDEXTENDED_0 ) parse_state = hex_count = 0;
187+
else if ( parse_state & HEX(3) ) parse_state += THIRD_SOLIDUS;
188+
else parse_state += SECOND_SOLIDUS;
189+
} else if ( current_char == 'X' && EXPECTS_ENDEXTENDED_X(parse_state) ) {
190+
parse_state += ENDEXTENDED_X;
191+
} else if ( current_char == '0' && EXPECTS_ENDEXTENDED_0(parse_state) ) {
192+
parse_state += ENDEXTENDED_0;
193+
} else if ( current_char == 'X' && EXPECTS_ARBITRARY(parse_state) ) {
194+
parse_state += ARBITRARY;
195+
} else if ( current_char == '2' && EXPECTS_ARBITRARY2(parse_state) ) {
196+
parse_state += EXTENDED2;
197+
} else if ( current_char == '4' && EXPECTS_ARBITRARY2(parse_state) ) {
198+
parse_state += EXTENDED2 + EXTENDED4;
199+
} else if ( current_char == 'P' && EXPECTS_ALPHABET(parse_state) ) {
200+
parse_state += ALPHABET;
201+
} else if ( (current_char == 'N' || current_char == 'F') && EXPECTS_N_OR_F(parse_state) ) {
202+
parse_state += IGNORED_DIRECTIVE;
203+
} else if ( IS_VALID_ALPHABET_DEFINITION(current_char) && EXPECTS_ALPHABET_DEFINITION(parse_state) ) {
204+
parse_state += ALPHABET_DEFINITION;
205+
} else if ( current_char == 'S' && EXPECTS_PAGE(parse_state) ) {
206+
parse_state += PAGE;
207+
} else if ( IS_HEXADECIMAL(current_char) && EXPECTS_HEX(parse_state) ) {
208+
parse_state += HEX((++hex_count));
209+
if ( (hex_count == 2 && !(parse_state & EXTENDED2)) ||
210+
(hex_count == 4 && !(parse_state & EXTENDED4)) ||
211+
(hex_count == 8) ) {
212+
if ( hex_count == 2 ) parse_state = 0;
213+
else CLEAR_HEX(parse_state);
214+
hex_count = 0;
215+
}
216+
} else if ( parse_state && !(
217+
(current_char == '\\' && parse_state == FIRST_SOLIDUS) ||
218+
(current_char == '\'' && parse_state == APOSTROPHE)
219+
) ) {
220+
if ( parse_state == APOSTROPHE && current_char != '\'' ) break;
221+
throw IfcException("Invalid character encountered");
222+
} else {
223+
parse_state = hex_count = 0;
224+
}
225+
file->Inc();
226+
}
227+
}
228+
229+
UConverter* IfcCharacterDecoder::destination = 0;
230+
UConverter* IfcCharacterDecoder::converter = 0;
231+
int IfcCharacterDecoder::previous_codepage = -1;
232+
UErrorCode IfcCharacterDecoder::status = U_ZERO_ERROR;
233+
IfcCharacterDecoder::ConversionMode IfcCharacterDecoder::mode = IfcCharacterDecoder::JSON;
234+

src/ifcparse/IfcCharacterDecoder.h

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/********************************************************************************
2+
* *
3+
* This file is part of IfcOpenShell. *
4+
* *
5+
* IfcOpenShell is free software: you can redistribute it and/or modify *
6+
* it under the terms of the Lesser GNU General Public License as published by *
7+
* the Free Software Foundation, either version 3.0 of the License, or *
8+
* (at your option) any later version. *
9+
* *
10+
* IfcOpenShell is distributed in the hope that it will be useful, *
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13+
* Lesser GNU General Public License for more details. *
14+
* *
15+
* You should have received a copy of the Lesser GNU General Public License *
16+
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
17+
* *
18+
********************************************************************************/
19+
20+
/********************************************************************************
21+
* *
22+
* Implementation of character decoding as described in ISO 10303-21 table 2 and *
23+
* table 4 *
24+
* *
25+
********************************************************************************/
26+
27+
#ifndef IFCCHARACTERDECODER_H
28+
#define IFCCHARACTERDECODER_H
29+
30+
#include <string>
31+
#include <sstream>
32+
33+
#include "unicode/ucnv.h"
34+
35+
#include "../ifcparse/IfcFile.h"
36+
37+
namespace IfcParse {
38+
39+
class IfcCharacterDecoder {
40+
private:
41+
IfcParse::File* file;
42+
static UConverter* destination;
43+
static UConverter* converter;
44+
static int previous_codepage;
45+
static UErrorCode status;
46+
void addChar(std::stringstream& s,const UChar32& ch);
47+
public:
48+
enum ConversionMode {UTF8,LATIN,JSON,PYTHON};
49+
static ConversionMode mode;
50+
IfcCharacterDecoder(IfcParse::File* file);
51+
~IfcCharacterDecoder();
52+
void dryRun();
53+
operator std::string();
54+
};
55+
56+
}
57+
58+
#endif

src/ifcparse/IfcException.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/********************************************************************************
2+
* *
3+
* This file is part of IfcOpenShell. *
4+
* *
5+
* IfcOpenShell is free software: you can redistribute it and/or modify *
6+
* it under the terms of the Lesser GNU General Public License as published by *
7+
* the Free Software Foundation, either version 3.0 of the License, or *
8+
* (at your option) any later version. *
9+
* *
10+
* IfcOpenShell is distributed in the hope that it will be useful, *
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13+
* Lesser GNU General Public License for more details. *
14+
* *
15+
* You should have received a copy of the Lesser GNU General Public License *
16+
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
17+
* *
18+
********************************************************************************/
19+
20+
#ifndef IFCEXCEPTION_H
21+
#define IFCEXCEPTION_H
22+
23+
#include <exception>
24+
#include <string>
25+
26+
namespace IfcParse {
27+
class IfcException : public std::exception {
28+
private:
29+
std::string error;
30+
public:
31+
IfcException(std::string e);
32+
~IfcException () throw ();
33+
const char* what() const throw();
34+
};
35+
}
36+
37+
#endif

0 commit comments

Comments
 (0)