2121 v1.1, 1993. ISBN 0-201-57044-0.
2222"""
2323
24+ import base64
2425import binascii
2526import logging
2627import re
3536_log = logging .getLogger (__name__ )
3637
3738
39+ def _make_tag (set ):
40+ """
41+ Hash set into a six-character tag make of uppercase letters
42+
43+ Useful for adding a tag into subsetted fonts while keeping the code
44+ reproducible. The function always returns the same value for the
45+ same set on the same exact Python version but is not guaranteed to
46+ not have collisions.
47+
48+ Parameters
49+ ----------
50+ set : iterable
51+ The set of glyphs present in a font subset
52+
53+ Returns
54+ -------
55+ bytes
56+ Six uppercase ASCII letters
57+ """
58+
59+ # freeze the set to make it hashable, interpret the hash as bytes
60+ array = struct .pack ("@q" , hash (frozenset (set )))
61+ # turn the bytes into characters with b32encode, which uses uppercase
62+ # letters and numbers from 2 to 7 - remap those arbitrarily
63+ trans = bytes .maketrans (b'234567' , b'MTPLIB' )
64+ return base64 .b32encode (array ).translate (trans , delete = b'=' )[:6 ]
65+
66+
3867class _Token :
68+
3969 """
4070 A token in a PostScript stream
4171
@@ -485,6 +515,15 @@ def convert(x): return x.decode('ascii', 'replace')
485515 except StopIteration :
486516 break
487517
518+ # there are some standard abbreviations whose names vary
519+ # so detect them
520+ if value == b'{noaccess def}' :
521+ self ._abbr ['ND' ] = key .encode ('ascii' )
522+ elif value == b'{noaccess put}' :
523+ self ._abbr ['NP' ] = key .encode ('ascii' )
524+ elif value == b'{string currentfile exch readstring pop}' :
525+ self ._abbr ['RD' ] = key .encode ('ascii' )
526+
488527 # sometimes noaccess def and readonly def are abbreviated
489528 if kw .is_name (b'def' , self ._abbr ['ND' ], self ._abbr ['NP' ]):
490529 prop [key ] = value
@@ -556,13 +595,16 @@ def _parse_subrs(self, tokens, _data):
556595 "Token preceding subr must be RD or equivalent, "
557596 f"was { token } "
558597 )
598+ if not token .is_name (self ._abbr ['RD' ]):
599+ raise RuntimeError (
600+ f"Token preceding subr must be RD or equivalent, was { token } "
601+ )
559602 binary_token = tokens .send (1 + nbytes_token .numeric_value ())
560603 array [index_token .numeric_value ()] = binary_token .value [1 :]
561604
562605 return array , next (tokens ).endpos ()
563606
564- @staticmethod
565- def _parse_charstrings (tokens , _data ):
607+ def _parse_charstrings (self , tokens , _data ):
566608 count_token = next (tokens )
567609 if not count_token .is_number ():
568610 raise RuntimeError (
@@ -587,7 +629,11 @@ def _parse_charstrings(tokens, _data):
587629 f"Token following /{ glyphname } in CharStrings definition "
588630 f"must be a number, was { nbytes_token } "
589631 )
590- token = next (tokens ) # usually RD or |-
632+ token = next (tokens )
633+ if not token .is_name (self ._abbr ['RD' ]):
634+ raise RuntimeError (
635+ f"Token preceding charstring must be RD or equivalent, was { token } "
636+ )
591637 binary_token = tokens .send (1 + nbytes_token .numeric_value ())
592638 charstrings [glyphname ] = binary_token .value [1 :]
593639
@@ -620,16 +666,15 @@ def _parse_encoding(tokens, _data):
620666 encoding [index_token .numeric_value ()] = \
621667 name_token .value [1 :].decode ('ascii' , 'replace' )
622668
623- @staticmethod
624- def _parse_othersubrs (tokens , data ):
669+ def _parse_othersubrs (self , tokens , data ):
625670 init_pos = None
626671 while True :
627672 token = next (tokens )
628673 if init_pos is None :
629674 init_pos = token .pos
630675 if token .is_delim ():
631676 _expression (token , tokens , data )
632- elif token .value in (b'def' , b 'ND', b'|-' ):
677+ elif token .value in (b'def' , self . _abbr [ 'ND' ] ):
633678 return data [init_pos :token .endpos ()], token .endpos ()
634679
635680 def transform (self , effects ):
@@ -684,7 +729,7 @@ def transform(self, effects):
684729 fontmatrix = (
685730 '[%s]' % ' ' .join (_format_approx (x , 6 ) for x in array )
686731 ).encode ('ascii' )
687- replacements = (
732+ newparts = self . _replace (
688733 [(x , b'/FontName/%s def' % fontname )
689734 for x in self ._pos ['FontName' ]]
690735 + [(x , b'/ItalicAngle %a def' % italicangle )
@@ -694,6 +739,9 @@ def transform(self, effects):
694739 + [(x , b'' ) for x in self ._pos .get ('UniqueID' , [])]
695740 )
696741
742+ return Type1Font ((newparts [0 ], self ._encrypt (newparts [1 ], 'eexec' ), self .parts [2 ]))
743+
744+ def _replace (self , replacements ):
697745 data = bytearray (self .parts [0 ])
698746 data .extend (self .decrypted )
699747 len0 = len (self .parts [0 ])
@@ -708,11 +756,192 @@ def transform(self, effects):
708756 len0 += len (value ) - pos1 + pos0
709757
710758 data = bytes (data )
711- return Type1Font ((
712- data [:len0 ],
713- self ._encrypt (data [len0 :], 'eexec' ),
714- self .parts [2 ]
715- ))
759+ return data [:len0 ], data [len0 :]
760+
761+ def subset (self , characters ):
762+ """
763+ Return a new font that only defines the given characters.
764+
765+ Parameters
766+ ----------
767+ characters : sequence of bytes
768+ The subset of characters to include
769+
770+ Returns
771+ -------
772+ `Type1Font`
773+ """
774+
775+ characters = set (characters )
776+ encoding = {code : glyph
777+ for code , glyph in self .prop ['Encoding' ].items ()
778+ if code in characters }
779+ encoding [0 ] = '.notdef'
780+ # todo and done include strings (glyph names)
781+ todo = set (encoding .values ())
782+ done = set ()
783+ seen_subrs = {0 , 1 , 2 , 3 }
784+ while todo - done :
785+ glyph = next (iter (todo - done ))
786+ called_glyphs , called_subrs , _ , _ = self ._simulate (glyph , [], [])
787+ todo .update (called_glyphs )
788+ seen_subrs .update (called_subrs )
789+ done .add (glyph )
790+
791+ fontname = _make_tag (todo ) + b'+' + self .prop ['FontName' ].encode ('ascii' )
792+ charstrings = self ._subset_charstrings (todo )
793+ subrs = self ._subset_subrs (seen_subrs )
794+ newparts = self ._replace (
795+ [(x , b'/FontName/%s def' % fontname ) for x in self ._pos ['FontName' ]] + [
796+ (self ._pos ['CharStrings' ][0 ], charstrings ),
797+ (self ._pos ['Subrs' ][0 ], subrs ),
798+ (self ._pos ['Encoding' ][0 ], self ._subset_encoding (encoding ))
799+ ] + [(x , b'' ) for x in self ._pos .get ('UniqueID' , [])]
800+ )
801+ return Type1Font ((newparts [0 ], self ._encrypt (newparts [1 ], 'eexec' ), self .parts [2 ]))
802+
803+ @staticmethod
804+ def _charstring_tokens (data ):
805+ data = iter (data )
806+ for byte in data :
807+ if 32 <= byte <= 246 :
808+ yield byte - 139
809+ elif 247 <= byte <= 250 :
810+ byte2 = next (data )
811+ yield (byte - 247 ) * 256 + byte2 + 108
812+ elif 251 <= byte <= 254 :
813+ byte2 = next (data )
814+ yield - (byte - 251 )* 256 - byte2 - 108
815+ elif byte == 255 :
816+ bs = itertools .islice (data , 4 )
817+ yield struct .unpack ('>i' , bs )[0 ]
818+ elif byte == 12 :
819+ byte1 = next (data )
820+ yield {
821+ 0 : 'dotsection' ,
822+ 1 : 'vstem3' ,
823+ 2 : 'hstem3' ,
824+ 6 : 'seac' ,
825+ 7 : 'sbw' ,
826+ 12 : 'div' ,
827+ 16 : 'callothersubr' ,
828+ 17 : 'pop' ,
829+ 33 : 'setcurrentpoint'
830+ }[byte1 ]
831+ else :
832+ yield {
833+ 1 : 'hstem' ,
834+ 3 : 'vstem' ,
835+ 4 : 'vmoveto' ,
836+ 5 : 'rlineto' ,
837+ 6 : 'hlineto' ,
838+ 7 : 'vlineto' ,
839+ 8 : 'rrcurveto' ,
840+ 9 : 'closepath' ,
841+ 10 : 'callsubr' ,
842+ 11 : 'return' ,
843+ 13 : 'hsbw' ,
844+ 14 : 'endchar' ,
845+ 21 : 'rmoveto' ,
846+ 22 : 'hmoveto' ,
847+ 30 : 'vhcurveto' ,
848+ 31 : 'hvcurveto'
849+ }[byte ]
850+
851+ def _step (self , buildchar_stack , postscript_stack , opcode ):
852+ if isinstance (opcode , int ):
853+ return set (), set (), buildchar_stack + [opcode ], postscript_stack
854+ elif opcode in {'hsbw' , 'sbw' , 'closepath' , 'hlineto' , 'hmoveto' , 'hcurveto' , 'hvcurveto' ,
855+ 'rlineto' , 'rmoveto' , 'rrcurveto' , 'vhcurveto' , 'vlineto' , 'vmoveto' ,
856+ 'dotsection' , 'hstem' , 'hstem3' , 'vstem' , 'vstem3' , 'setcurrentpoint' }:
857+ return set (), set (), [], postscript_stack
858+ elif opcode == 'seac' :
859+ codes = buildchar_stack [3 :5 ]
860+ glyphs = [self .prop ['Encoding' ][x ] for x in codes ]
861+ return set (glyphs ), set (), [], postscript_stack
862+ elif opcode == 'div' :
863+ num1 , num2 = buildchar_stack [- 2 :]
864+ return set (), set (), buildchar_stack [- 2 :] + [num1 / num2 ], postscript_stack
865+ elif opcode == 'callothersubr' :
866+ othersubr = buildchar_stack [- 1 ]
867+ n = buildchar_stack [- 2 ]
868+ args = buildchar_stack [- 2 - n :- 2 ]
869+ if othersubr == 3 : # Section 8.1 in Type-1 spec
870+ postscript_stack .append (args [0 ])
871+ else :
872+ postscript_stack .extend (args [::- 1 ])
873+ return set (), set (), buildchar_stack [:- n - 2 ], postscript_stack
874+ elif opcode == 'callsubr' :
875+ subr = buildchar_stack [- 1 ]
876+ glyphs , subrs , new_bc_stack , new_ps_stack = \
877+ self ._simulate (subr , buildchar_stack [:- 1 ], postscript_stack )
878+ return set (), subrs | {subr }, new_bc_stack , new_ps_stack
879+ elif opcode == 'pop' :
880+ return set (), set (), buildchar_stack + [postscript_stack [- 1 ]], postscript_stack [:- 1 ]
881+ else :
882+ raise RuntimeError (f'opcode { opcode } ' )
883+
884+ def _simulate (self , glyph_or_subr , buildchar_stack , postscript_stack ):
885+ if isinstance (glyph_or_subr , str ):
886+ program = self .prop ['CharStrings' ][glyph_or_subr ]
887+ glyphs = {glyph_or_subr }
888+ subrs = set ()
889+ else :
890+ program = self .prop ['Subrs' ][glyph_or_subr ]
891+ glyphs = set ()
892+ subrs = {glyph_or_subr }
893+ for opcode in self ._charstring_tokens (program ):
894+ if opcode in ('return' , 'endchar' ):
895+ return glyphs , subrs , buildchar_stack , postscript_stack
896+ newglyphs , newsubrs , buildchar_stack , postscript_stack = \
897+ self ._step (buildchar_stack , postscript_stack , opcode )
898+ glyphs .update (newglyphs )
899+ subrs .update (newsubrs )
900+
901+ def _subset_encoding (self , encoding ):
902+ data = bytearray (b'/Encoding 256 array\n 0 1 255 { 1 index exch /.notdef put } for\n ' )
903+ for i , glyph in sorted (encoding .items ()):
904+ if glyph == '.notdef' :
905+ continue
906+ data .extend (f'dup { i } /{ glyph } put\n ' .encode ('ascii' ))
907+ data .extend (b'readonly def\n ' )
908+ return bytes (data )
909+
910+ def _subset_charstrings (self , glyphs ):
911+ data = bytearray (f'/CharStrings { len (glyphs )} dict dup begin\n ' .encode ('ascii' ))
912+ for glyph in glyphs :
913+ enc = self ._encrypt (self .prop ['CharStrings' ][glyph ], 'charstring' , self .prop .get ('lenIV' , 4 ))
914+ data .extend (f'/{ glyph } { len (enc )} ' .encode ('ascii' ))
915+ data .extend (self ._abbr ["RD" ])
916+ data .extend (b' ' )
917+ data .extend (enc )
918+ data .extend (b' ' )
919+ data .extend (self ._abbr ["ND" ])
920+ data .extend (b'\n ' )
921+ data .extend (b'end\n ' )
922+ return bytes (data )
923+
924+ def _subset_subrs (self , indices ):
925+ # we can't remove subroutines, we just replace unused ones with a stub
926+ n_subrs = len (self .prop ['Subrs' ])
927+ data = bytearray (f'/Subrs { n_subrs } array\n ' .encode ('ascii' ))
928+ for i in range (n_subrs ):
929+ if i in indices :
930+ sub = self .prop ['Subrs' ][i ]
931+ else :
932+ sub = bytes ([11 ])
933+ enc = self ._encrypt (sub , 'charstring' , self .prop .get ('lenIV' , 4 ))
934+ data .extend (f'dup { i } { len (enc )} ' .encode ('ascii' ))
935+ data .extend (self ._abbr ['RD' ])
936+ data .extend (b' ' )
937+ data .extend (enc )
938+ data .extend (b' ' )
939+ data .extend (self ._abbr ['NP' ])
940+ data .extend (b'\n ' )
941+
942+ data .extend (self ._abbr ['ND' ])
943+ data .extend (b'\n ' )
944+ return bytes (data )
716945
717946
718947StandardEncoding = {
0 commit comments