@@ -87,7 +87,6 @@ void IfcCharacterDecoder::addChar(std::stringstream& s,const UChar32& ch) {
8787 s.put (substitution_character);
8888#endif
8989}
90- #include < iostream>
9190IfcCharacterDecoder::IfcCharacterDecoder (IfcParse::File* f) {
9291 file = f;
9392#ifdef HAVE_ICU
@@ -103,6 +102,10 @@ IfcCharacterDecoder::IfcCharacterDecoder(IfcParse::File* f) {
103102 } else if (mode == LATIN) {
104103 destination = ucnv_open (" iso-8859-1" , &status);
105104 }
105+ if (compatibility_charset.empty ()) {
106+ compatibility_charset = ucnv_getDefaultName ();
107+ }
108+ compatibility_converter = ucnv_open (compatibility_charset.c_str (), &status);
106109#endif
107110}
108111IfcCharacterDecoder::~IfcCharacterDecoder () {
@@ -121,6 +124,8 @@ IfcCharacterDecoder::operator std::string() {
121124 int codepage = 1 ;
122125 unsigned int hex = 0 ;
123126 unsigned int hex_count = 0 ;
127+ unsigned int old_hex = 0 ; // for compatibility_mode
128+
124129 while ( current_char = file->Peek () ) {
125130 if ( EXPECTS_CHARACTER (parse_state) ) {
126131#ifdef HAVE_ICU
@@ -171,14 +176,27 @@ IfcCharacterDecoder::operator std::string() {
171176 hex <<= 4 ;
172177 parse_state += HEX ((++hex_count));
173178 hex += HEX_TO_INT (current_char);
174- if ( (hex_count == 2 && !(parse_state & EXTENDED2)) ||
175- (hex_count == 4 && !(parse_state & EXTENDED4)) ||
176- (hex_count == 8 ) ) {
177- addChar (s,(UChar32) hex);
178- if ( hex_count == 2 ) parse_state = 0 ;
179- else CLEAR_HEX (parse_state);
180- hex = hex_count = 0 ;
181- }
179+ if ( (hex_count == 2 && !(parse_state & EXTENDED2)) ||
180+ (hex_count == 4 && !(parse_state & EXTENDED4)) ||
181+ (hex_count == 8 ) ) {
182+ if (compatibility_mode) {
183+ if (old_hex == 0 ) {
184+ old_hex = hex;
185+ } else {
186+ char characters[3 ] = { old_hex, hex };
187+ const char * char_array = &characters[0 ];
188+ UChar32 ch = ucnv_getNextUChar (compatibility_converter,&char_array,char_array+2 ,&status);
189+ addChar (s,ch);
190+ old_hex = 0 ;
191+ }
192+ }
193+ else {
194+ addChar (s,(UChar32) hex);
195+ }
196+ if ( hex_count == 2 ) parse_state = 0 ;
197+ else CLEAR_HEX (parse_state);
198+ hex = hex_count = 0 ;
199+ }
182200 } else if ( parse_state && !(
183201 (current_char == ' \\ ' && parse_state == FIRST_SOLIDUS) ||
184202 (current_char == ' \' ' && parse_state == APOSTROPHE)
@@ -256,12 +274,18 @@ void IfcCharacterDecoder::dryRun() {
256274#ifdef HAVE_ICU
257275UConverter* IfcCharacterDecoder::destination = 0 ;
258276UConverter* IfcCharacterDecoder::converter = 0 ;
277+ UConverter* IfcCharacterDecoder::compatibility_converter = 0 ;
259278int IfcCharacterDecoder::previous_codepage = -1 ;
260279UErrorCode IfcCharacterDecoder::status = U_ZERO_ERROR;
261280#endif
262281
263282// #ifdef HAVE_ICU
264283IfcCharacterDecoder::ConversionMode IfcCharacterDecoder::mode = IfcCharacterDecoder::JSON;
284+
285+ // Many BIM software (eg. Revit, ArchiCAD, ...) has wrong behavior
286+ bool IfcCharacterDecoder::compatibility_mode = false ;
287+ std::string IfcCharacterDecoder::compatibility_charset = " " ;
288+
265289// #else
266290char IfcCharacterDecoder::substitution_character = ' _' ;
267291// #endif
0 commit comments