2121 v1.1, 1993. ISBN 0-201-57044-0.
2222"""
2323
24+ import base64
2425import binascii
2526import functools
27+ import itertools
2628import logging
2729import re
2830import string
3638_log = logging .getLogger (__name__ )
3739
3840
41+ def _make_tag (set ):
42+ """
43+ Hash set into a six-character tag made of uppercase letters
44+
45+ Useful for adding a tag into subsetted fonts while keeping the code
46+ reproducible. The function always returns the same value for the
47+ same set on the same exact Python version but is not guaranteed to
48+ not have collisions.
49+
50+ Parameters
51+ ----------
52+ set : iterable
53+ The set of glyphs present in a font subset
54+
55+ Returns
56+ -------
57+ str
58+ Six uppercase ASCII letters and a plus sign
59+ """
60+
61+ # freeze the set to make it hashable, interpret the hash as bytes
62+ array = struct .pack ("@q" , hash (frozenset (set )))
63+ # turn the bytes into characters with b32encode, which uses uppercase
64+ # letters and numbers from 2 to 7 - remap those arbitrarily
65+ trans = str .maketrans ('234567' , 'MTPLIB' , '=' )
66+ return (base64 .b32encode (array ).decode ('ascii' )
67+ .translate (trans )[:6 ] + '+' )
68+
69+
3970class _Token :
4071 """
4172 A token in a PostScript stream
@@ -627,8 +658,7 @@ def _parse_subrs(self, tokens, _data):
627658
628659 return array , next (tokens ).endpos ()
629660
630- @staticmethod
631- def _parse_charstrings (tokens , _data ):
661+ def _parse_charstrings (self , tokens , _data ):
632662 count_token = next (tokens )
633663 if not count_token .is_number ():
634664 raise RuntimeError (
@@ -650,7 +680,12 @@ def _parse_charstrings(tokens, _data):
650680 f"Token following /{ glyphname } in CharStrings definition "
651681 f"must be a number, was { nbytes_token } "
652682 )
653- next (tokens ) # usually RD or |-
683+ token = next (tokens )
684+ if not token .is_keyword (self ._abbr ['RD' ]):
685+ raise RuntimeError (
686+ "Token preceding charstring must be {self._abbr['RD']}, "
687+ f"was { token } "
688+ )
654689 binary_token = tokens .send (1 + nbytes_token .value ())
655690 charstrings [glyphname ] = binary_token .value ()
656691
@@ -681,16 +716,15 @@ def _parse_encoding(tokens, _data):
681716 continue
682717 encoding [index_token .value ()] = name_token .value ()
683718
684- @staticmethod
685- def _parse_othersubrs (tokens , data ):
719+ def _parse_othersubrs (self , tokens , data ):
686720 init_pos = None
687721 while True :
688722 token = next (tokens )
689723 if init_pos is None :
690724 init_pos = token .pos
691725 if token .is_delim ():
692726 _expression (token , tokens , data )
693- elif token .is_keyword ('def' , 'ND' , '|-' ):
727+ elif token .is_keyword ('def' , self . _abbr [ 'ND' ] ):
694728 return data [init_pos :token .endpos ()], token .endpos ()
695729
696730 def transform (self , effects ):
@@ -745,7 +779,7 @@ def transform(self, effects):
745779 fontmatrix = (
746780 '[%s]' % ' ' .join (_format_approx (x , 6 ) for x in array )
747781 )
748- replacements = (
782+ newparts = self . _replace (
749783 [(x , '/FontName/%s def' % fontname )
750784 for x in self ._pos ['FontName' ]]
751785 + [(x , '/ItalicAngle %a def' % italicangle )
@@ -755,11 +789,40 @@ def transform(self, effects):
755789 + [(x , '' ) for x in self ._pos .get ('UniqueID' , [])]
756790 )
757791
792+ return Type1Font ((
793+ newparts [0 ],
794+ self ._encrypt (newparts [1 ], 'eexec' ),
795+ self .parts [2 ]
796+ ))
797+
798+ def _replace (self , replacements ):
799+ """
800+ Change the font according to `replacements`
801+
802+ Parameters
803+ ----------
804+ replacements : list of ((int, int), str)
805+ Each element is ((pos0, pos1), replacement) where pos0 and
806+ pos1 are indices to the original font data (parts[0] and the
807+ decrypted part concatenated). The data in the interval
808+ pos0:pos1 will be replaced by the replacement text. To
809+ accommodate binary data, the replacement is taken to be in
810+ Latin-1 encoding.
811+
812+ The case where pos0 is inside parts[0] and pos1 inside
813+ the decrypted part is not supported.
814+
815+ Returns
816+ -------
817+ (bytes, bytes)
818+ The new parts[0] and decrypted part (which needs to be
819+ encrypted in the transformed font).
820+ """
758821 data = bytearray (self .parts [0 ])
759822 data .extend (self .decrypted )
760823 len0 = len (self .parts [0 ])
761824 for (pos0 , pos1 ), value in sorted (replacements , reverse = True ):
762- data [pos0 :pos1 ] = value .encode ('ascii' , 'replace ' )
825+ data [pos0 :pos1 ] = value .encode ('latin-1 ' )
763826 if pos0 < len (self .parts [0 ]):
764827 if pos1 >= len (self .parts [0 ]):
765828 raise RuntimeError (
@@ -769,12 +832,211 @@ def transform(self, effects):
769832 len0 += len (value ) - pos1 + pos0
770833
771834 data = bytes (data )
835+ return data [:len0 ], data [len0 :]
836+
837+ def subset (self , characters ):
838+ """
839+ Return a new font that only defines the given characters.
840+
841+ Parameters
842+ ----------
843+ characters : sequence of bytes
844+ The subset of characters to include
845+
846+ Returns
847+ -------
848+ `Type1Font`
849+ """
850+
851+ characters = set (characters )
852+ encoding = {code : glyph
853+ for code , glyph in self .prop ['Encoding' ].items ()
854+ if code in characters }
855+ encoding [0 ] = '.notdef'
856+ # todo and done include strings (glyph names)
857+ todo = set (encoding .values ())
858+ done = set ()
859+ seen_subrs = {0 , 1 , 2 , 3 }
860+ while todo - done :
861+ glyph = next (iter (todo - done ))
862+ called_glyphs , called_subrs , _ , _ = self ._simulate (glyph , [], [])
863+ todo .update (called_glyphs )
864+ seen_subrs .update (called_subrs )
865+ done .add (glyph )
866+
867+ fontname = _make_tag (todo ) + self .prop ['FontName' ]
868+ charstrings = self ._subset_charstrings (todo )
869+ subrs = self ._subset_subrs (seen_subrs )
870+ newparts = self ._replace (
871+ [(x , '/FontName/%s def' % fontname )
872+ for x in self ._pos ['FontName' ]]
873+ + [(self ._pos ['CharStrings' ][0 ], charstrings ),
874+ (self ._pos ['Subrs' ][0 ], subrs ),
875+ (self ._pos ['Encoding' ][0 ], self ._subset_encoding (encoding ))
876+ ] + [(x , '' ) for x in self ._pos .get ('UniqueID' , [])]
877+ )
772878 return Type1Font ((
773- data [: len0 ],
774- self ._encrypt (data [ len0 : ], 'eexec' ),
879+ newparts [ 0 ],
880+ self ._encrypt (newparts [ 1 ], 'eexec' ),
775881 self .parts [2 ]
776882 ))
777883
884+ @staticmethod
885+ def _charstring_tokens (data ):
886+ data = iter (data )
887+ for byte in data :
888+ if 32 <= byte <= 246 :
889+ yield byte - 139
890+ elif 247 <= byte <= 250 :
891+ byte2 = next (data )
892+ yield (byte - 247 ) * 256 + byte2 + 108
893+ elif 251 <= byte <= 254 :
894+ byte2 = next (data )
895+ yield - (byte - 251 )* 256 - byte2 - 108
896+ elif byte == 255 :
897+ bs = itertools .islice (data , 4 )
898+ yield struct .unpack ('>i' , bs )[0 ]
899+ elif byte == 12 :
900+ byte1 = next (data )
901+ yield {
902+ 0 : 'dotsection' ,
903+ 1 : 'vstem3' ,
904+ 2 : 'hstem3' ,
905+ 6 : 'seac' ,
906+ 7 : 'sbw' ,
907+ 12 : 'div' ,
908+ 16 : 'callothersubr' ,
909+ 17 : 'pop' ,
910+ 33 : 'setcurrentpoint'
911+ }[byte1 ]
912+ else :
913+ yield {
914+ 1 : 'hstem' ,
915+ 3 : 'vstem' ,
916+ 4 : 'vmoveto' ,
917+ 5 : 'rlineto' ,
918+ 6 : 'hlineto' ,
919+ 7 : 'vlineto' ,
920+ 8 : 'rrcurveto' ,
921+ 9 : 'closepath' ,
922+ 10 : 'callsubr' ,
923+ 11 : 'return' ,
924+ 13 : 'hsbw' ,
925+ 14 : 'endchar' ,
926+ 21 : 'rmoveto' ,
927+ 22 : 'hmoveto' ,
928+ 30 : 'vhcurveto' ,
929+ 31 : 'hvcurveto'
930+ }[byte ]
931+
932+ def _step (self , buildchar_stack , postscript_stack , opcode ):
933+ if isinstance (opcode , int ):
934+ return set (), set (), buildchar_stack + [opcode ], postscript_stack
935+ elif opcode in {
936+ 'hsbw' , 'sbw' , 'closepath' , 'hlineto' , 'hmoveto' , 'hcurveto' ,
937+ 'hvcurveto' , 'rlineto' , 'rmoveto' , 'rrcurveto' , 'vhcurveto' ,
938+ 'vlineto' , 'vmoveto' , 'dotsection' , 'hstem' , 'hstem3' , 'vstem' ,
939+ 'vstem3' , 'setcurrentpoint'
940+ }:
941+ return set (), set (), [], postscript_stack
942+ elif opcode == 'seac' :
943+ codes = buildchar_stack [3 :5 ]
944+ glyphs = [self .prop ['Encoding' ][x ] for x in codes ]
945+ return set (glyphs ), set (), [], postscript_stack
946+ elif opcode == 'div' :
947+ num1 , num2 = buildchar_stack [- 2 :]
948+ return (
949+ set (),
950+ set (),
951+ buildchar_stack [- 2 :] + [num1 / num2 ], postscript_stack
952+ )
953+ elif opcode == 'callothersubr' :
954+ othersubr = buildchar_stack [- 1 ]
955+ n = buildchar_stack [- 2 ]
956+ args = buildchar_stack [- 2 - n :- 2 ]
957+ if othersubr == 3 : # Section 8.1 in Type-1 spec
958+ postscript_stack .append (args [0 ])
959+ else :
960+ postscript_stack .extend (args [::- 1 ])
961+ return set (), set (), buildchar_stack [:- n - 2 ], postscript_stack
962+ elif opcode == 'callsubr' :
963+ subr = buildchar_stack [- 1 ]
964+ glyphs , subrs , new_bc_stack , new_ps_stack = \
965+ self ._simulate (subr , buildchar_stack [:- 1 ], postscript_stack )
966+ return set (), subrs | {subr }, new_bc_stack , new_ps_stack
967+ elif opcode == 'pop' :
968+ return (
969+ set (),
970+ set (),
971+ buildchar_stack + [postscript_stack [- 1 ]], postscript_stack [:- 1 ]
972+ )
973+ else :
974+ raise RuntimeError (f'opcode { opcode } ' )
975+
976+ def _simulate (self , glyph_or_subr , buildchar_stack , postscript_stack ):
977+ if isinstance (glyph_or_subr , str ):
978+ program = self .prop ['CharStrings' ][glyph_or_subr ]
979+ glyphs = {glyph_or_subr }
980+ subrs = set ()
981+ else :
982+ program = self .prop ['Subrs' ][glyph_or_subr ]
983+ glyphs = set ()
984+ subrs = {glyph_or_subr }
985+ for opcode in self ._charstring_tokens (program ):
986+ if opcode in ('return' , 'endchar' ):
987+ return glyphs , subrs , buildchar_stack , postscript_stack
988+ newglyphs , newsubrs , buildchar_stack , postscript_stack = \
989+ self ._step (buildchar_stack , postscript_stack , opcode )
990+ glyphs .update (newglyphs )
991+ subrs .update (newsubrs )
992+
993+ def _subset_encoding (self , encoding ):
994+ result = [
995+ '/Encoding 256 array\n 0 1 255 { 1 index exch /.notdef put } for'
996+ ]
997+ result .extend (
998+ f'dup { i } /{ glyph } put'
999+ for i , glyph in sorted (encoding .items ())
1000+ if glyph != '.notdef'
1001+ )
1002+ result .extend ('readonly def\n ' )
1003+ return '\n ' .join (result )
1004+
1005+ def _subset_charstrings (self , glyphs ):
1006+ result = [f'/CharStrings { len (glyphs )} dict dup begin' ]
1007+ encrypted = [self ._encrypt (self .prop ['CharStrings' ][glyph ],
1008+ 'charstring' ,
1009+ self .prop .get ('lenIV' , 4 )
1010+ ).decode ('latin-1' )
1011+ for glyph in glyphs ]
1012+ RD , ND = self ._abbr ['RD' ], self ._abbr ['ND' ]
1013+ result .extend (
1014+ f'/{ glyph } { len (enc )} { RD } { enc } { ND } '
1015+ for glyph , enc in zip (glyphs , encrypted )
1016+ )
1017+ result .append ('end\n ' )
1018+ return '\n ' .join (result )
1019+
1020+ def _subset_subrs (self , indices ):
1021+ # we can't remove subroutines, we just replace unused ones with a stub
1022+ n_subrs = len (self .prop ['Subrs' ])
1023+ result = [f'/Subrs { n_subrs } array' ]
1024+ lenIV = self .prop .get ('lenIV' , 4 )
1025+ stub = self ._encrypt (b'\x0b ' , 'charstring' , lenIV ).decode ('latin-1' )
1026+ encrypted = [
1027+ self ._encrypt (self .prop ['Subrs' ][i ], 'charstring' , lenIV
1028+ ).decode ('latin-1' )
1029+ if i in indices else stub
1030+ for i in range (n_subrs )
1031+ ]
1032+ RD , ND , NP = self ._abbr ['RD' ], self ._abbr ['ND' ], self ._abbr ['NP' ]
1033+ result .extend (
1034+ f'dup { i } { len (enc )} { RD } { enc } { NP } '
1035+ for i , enc in enumerate (encrypted )
1036+ )
1037+ result .extend ((ND , '' ))
1038+ return '\n ' .join (result )
1039+
7781040
7791041_StandardEncoding = {
7801042 ** {ord (letter ): letter for letter in string .ascii_letters },
0 commit comments