@@ -2407,3 +2407,97 @@ bool MCUnicodeWildcardMatch(const void *source_chars, uindex_t source_length, bo
24072407
24082408 return true ;
24092409}
2410+
2411+ // //////////////////////////////////////////////////////////////////////////////
2412+
2413+ // Implement rules GB 6 - 8 based on Hangul syllable type
2414+ static bool __MCUnicodeIsHangulClusterBoundary (int32_t p_left, int32_t p_right)
2415+ {
2416+ switch (p_left)
2417+ {
2418+ case U_GCB_L:
2419+ return p_right == U_GCB_T;
2420+ case U_GCB_LV:
2421+ case U_GCB_V:
2422+ return p_right != U_GCB_V && p_right != U_GCB_T;
2423+ case U_GCB_LVT:
2424+ case U_GCB_T:
2425+ return p_right != U_GCB_T;
2426+ default :
2427+ MCUnreachable ();
2428+ }
2429+ }
2430+
2431+ static bool __MCUnicodeIsControl (int32_t p_gcb)
2432+ {
2433+ return p_gcb == U_GCB_CR || p_gcb == U_GCB_LF || p_gcb == U_GCB_CONTROL;
2434+ }
2435+
2436+ static bool __MCUnicodeIsHangulSyllable (int32_t p_gcb)
2437+ {
2438+ switch (p_gcb)
2439+ {
2440+ case U_GCB_L:
2441+ case U_GCB_LV:
2442+ case U_GCB_LVT:
2443+ case U_GCB_T:
2444+ case U_GCB_V:
2445+ return true ;
2446+ default :
2447+ break ;
2448+ }
2449+
2450+ return false ;
2451+ }
2452+
2453+ bool MCUnicodeIsGraphemeClusterBoundary (codepoint_t p_left, codepoint_t p_right)
2454+ {
2455+ int32_t t_left_gcb;
2456+ t_left_gcb = MCUnicodeGetIntegerProperty (p_left, kMCUnicodePropertyGraphemeClusterBreak );
2457+
2458+ int32_t t_right_gcb;
2459+ t_right_gcb = MCUnicodeGetIntegerProperty (p_right, kMCUnicodePropertyGraphemeClusterBreak );
2460+
2461+ // We treat CR LF as 2 graphemes, contrary to GB 3
2462+ /*
2463+ if (t_left_gcb == U_GCB_CR && t_right_gcb == U_GCB_LF)
2464+ return false;
2465+ */
2466+
2467+ // GB 4: Break after controls
2468+ if (__MCUnicodeIsControl (t_left_gcb))
2469+ return true ;
2470+
2471+ // GB 5: Break before controls
2472+ if (__MCUnicodeIsControl (t_right_gcb))
2473+ return true ;
2474+
2475+ // GB 6 - 8: Do not break Hangul syllable sequences.
2476+ if (__MCUnicodeIsHangulSyllable (t_left_gcb) && __MCUnicodeIsHangulSyllable (t_right_gcb))
2477+ {
2478+ if (!__MCUnicodeIsHangulClusterBoundary (t_left_gcb, t_right_gcb))
2479+ return false ;
2480+ }
2481+
2482+ // GB 8a: Do not break between regional indicator symbols.
2483+ if (t_left_gcb == U_GCB_REGIONAL_INDICATOR && t_right_gcb == U_GCB_REGIONAL_INDICATOR)
2484+ return false ;
2485+
2486+ // GB 9: Do not break before extending characters.
2487+ if (t_right_gcb == U_GCB_EXTEND)
2488+ return false ;
2489+
2490+ // GB 9a: Do not break before SpacingMarks
2491+ if (t_right_gcb == U_GCB_SPACING_MARK)
2492+ return false ;
2493+
2494+ // GB 9b: Do not break after Prepend characters
2495+ if (t_left_gcb == U_GCB_PREPEND)
2496+ return false ;
2497+
2498+ // GB 10: Otherwise, break everywhere.
2499+ return true ;
2500+ }
2501+
2502+ // //////////////////////////////////////////////////////////////////////////////
2503+
0 commit comments