@@ -159,6 +159,7 @@ type FormInfo struct {
159159 combinesBackward bool // May combine with rune on the left
160160 isOneWay bool // Never appears in result
161161 inDecomp bool // Some decompositions result in this char.
162+ suffixDecomp bool // Appears after first rune of a decomposition
162163 decomp Decomposition
163164 expandedDecomp Decomposition
164165}
@@ -397,8 +398,11 @@ func completeCharFields(form int) {
397398 f .isOneWay = f .isOneWay || hasCompatDecomp (c .codePoint )
398399 }
399400
400- for _ , r := range f .decomp {
401+ for i , r := range f .decomp {
401402 chars [r ].forms [form ].inDecomp = true
403+ if i > 0 {
404+ chars [r ].forms [form ].suffixDecomp = true
405+ }
402406 }
403407 }
404408
@@ -420,6 +424,35 @@ func completeCharFields(form int) {
420424 if isHangulWithoutJamoT (rune (i )) {
421425 f .combinesForward = true
422426 }
427+ if (i & 0xffff00 ) == JamoLBase {
428+ if JamoLBase <= i && i < JamoLEnd {
429+ f .combinesForward = true
430+ }
431+ if JamoVBase <= i && i < JamoVEnd {
432+ f .combinesBackward = true
433+ f .combinesForward = true
434+ }
435+ if JamoTBase <= i && i < JamoTEnd {
436+ f .combinesBackward = true
437+ }
438+ }
439+ }
440+
441+ // Phase 2½: backward combining propagation.
442+ for i := range chars {
443+ c := & chars [i ]
444+ f := & c .forms [form ]
445+
446+ // If the first rune of f's decomposition combines backward,
447+ // then f itself must be considered to combine backward.
448+ // This handles the "MaybeNo" runes introduced in Unicode 16.
449+ // https://www.unicode.org/reports/tr15/tr15-56.html#Contexts_Care
450+ if ! f .isOneWay && len (f .decomp ) > 0 {
451+ f0 := & chars [f .decomp [0 ]].forms [form ]
452+ if f0 .combinesBackward {
453+ f .combinesBackward = true
454+ }
455+ }
423456 }
424457
425458 // Phase 3: quick check values.
@@ -438,20 +471,6 @@ func completeCharFields(form int) {
438471 switch {
439472 case f .isOneWay :
440473 f .quickCheck [MComposed ] = QCNo
441- case (i & 0xffff00 ) == JamoLBase :
442- f .quickCheck [MComposed ] = QCYes
443- if JamoLBase <= i && i < JamoLEnd {
444- f .combinesForward = true
445- }
446- if JamoVBase <= i && i < JamoVEnd {
447- f .quickCheck [MComposed ] = QCMaybe
448- f .combinesBackward = true
449- f .combinesForward = true
450- }
451- if JamoTBase <= i && i < JamoTEnd {
452- f .quickCheck [MComposed ] = QCMaybe
453- f .combinesBackward = true
454- }
455474 case ! f .combinesBackward :
456475 f .quickCheck [MComposed ] = QCYes
457476 default :
@@ -574,20 +593,17 @@ func (m *decompSet) insert(key int, s string) {
574593}
575594
576595func printCharInfoTables (w io.Writer ) int {
577- mkstr := func (r rune , f * FormInfo ) (int , string ) {
596+ mkstr := func (r rune , f * FormInfo , c * Char ) (int , string ) {
578597 d := f .expandedDecomp
579598 s := string ([]rune (d ))
580- if max := 1 << 6 ; len (s ) >= max {
581- const msg = "%U: too many bytes in decomposition: %d >= %d"
582- log .Fatalf (msg , r , len (s ), max )
583- }
584- head := uint8 (len (s ))
585- if f .quickCheck [MComposed ] != QCYes {
586- head |= 0x40
599+ slen := len (s )
600+ if slen == 31 || slen == 32 || slen > 33 {
601+ log .Fatalf ("%U: too many bytes in decomposition: %d" , slen )
587602 }
588- if f . combinesForward {
589- head |= 0x80
603+ if slen == 33 {
604+ slen = 31
590605 }
606+ head := uint8 (slen ) | uint8 (makeEntry (f , c )>> 3 << 5 )
591607 s = string ([]byte {head }) + s
592608
593609 lccc := ccc (d [0 ])
@@ -609,7 +625,7 @@ func printCharInfoTables(w io.Writer) int {
609625 s += string ([]byte {tccc })
610626 index = endMulti
611627 for _ , r := range d [1 :] {
612- if ccc (r ) == 0 {
628+ if ccc (r ) == 0 && ! chars [ r ]. forms [ FCanonical ]. combinesBackward {
613629 index = firstCCC
614630 }
615631 }
@@ -643,10 +659,7 @@ func printCharInfoTables(w io.Writer) int {
643659 if len (f .expandedDecomp ) == 0 {
644660 continue
645661 }
646- if f .combinesBackward {
647- log .Fatalf ("%U: combinesBackward and decompose" , c .codePoint )
648- }
649- index , s := mkstr (c .codePoint , & f )
662+ index , s := mkstr (c .codePoint , & f , & c )
650663 decompSet .insert (index , s )
651664 }
652665 }
@@ -685,7 +698,7 @@ func printCharInfoTables(w io.Writer) int {
685698 f := c .forms [i ]
686699 d := f .expandedDecomp
687700 if len (d ) != 0 {
688- _ , key := mkstr (c .codePoint , & f )
701+ _ , key := mkstr (c .codePoint , & f , & c )
689702 trie .Insert (rune (r ), uint64 (positionMap [key ]))
690703 if c .ccc != ccc (d [0 ]) {
691704 // We assume the lead ccc of a decomposition !=0 in this case.
@@ -834,9 +847,6 @@ func verifyComputed() {
834847 if f .combinesBackward != isMaybe {
835848 log .Fatalf ("%U: NF*C QC must be Maybe if combinesBackward" , i )
836849 }
837- if len (f .decomp ) > 0 && f .combinesForward && isMaybe {
838- log .Fatalf ("%U: NF*C QC must be Yes or No if combinesForward and decomposes" , i )
839- }
840850
841851 if len (f .expandedDecomp ) != 0 {
842852 continue
0 commit comments