2121 v1.1, 1993. ISBN 0-201-57044-0.
2222"""
2323
24+ import base64
2425import binascii
2526import logging
2627import re
3435_log = logging .getLogger (__name__ )
3536
3637
38+ def _make_tag (set ):
39+ """
40+ Hash set into a six-character tag make of uppercase letters
41+
42+ Useful for adding a tag into subsetted fonts while keeping the code
43+ reproducible. The function always returns the same value for the
44+ same set on the same exact Python version but is not guaranteed to
45+ not have collisions.
46+
47+ Parameters
48+ ----------
49+ set : iterable
50+ The set of glyphs present in a font subset
51+
52+ Returns
53+ -------
54+ bytes
55+ Six uppercase ASCII letters
56+ """
57+
58+ # freeze the set to make it hashable, interpret the hash as bytes
59+ array = struct .pack ("@q" , hash (frozenset (set )))
60+ # turn the bytes into characters with b32encode, which uses uppercase
61+ # letters and numbers from 2 to 7 - remap those arbitrarily
62+ trans = bytes .maketrans (b'234567' , b'MTPLIB' )
63+ return base64 .b32encode (array ).translate (trans , delete = b'=' )[:6 ]
64+
65+
3766class _Token :
67+
3868 """
3969 A token in a PostScript stream
4070
@@ -489,6 +519,15 @@ def convert(x): return x.decode('ascii', 'replace')
489519 except StopIteration :
490520 break
491521
522+ # there are some standard abbreviations whose names vary
523+ # so detect them
524+ if value == b'{noaccess def}' :
525+ self ._abbr ['ND' ] = key .encode ('ascii' )
526+ elif value == b'{noaccess put}' :
527+ self ._abbr ['NP' ] = key .encode ('ascii' )
528+ elif value == b'{string currentfile exch readstring pop}' :
529+ self ._abbr ['RD' ] = key .encode ('ascii' )
530+
492531 # sometimes noaccess def and readonly def are abbreviated
493532 if kw .is_name (b'def' , self ._abbr ['ND' ], self ._abbr ['NP' ]):
494533 prop [key ] = value
@@ -560,13 +599,16 @@ def _parse_subrs(self, tokens, _data):
560599 "Token preceding subr must be RD or equivalent, "
561600 f"was { token } "
562601 )
602+ if not token .is_name (self ._abbr ['RD' ]):
603+ raise RuntimeError (
604+ f"Token preceding subr must be RD or equivalent, was { token } "
605+ )
563606 binary_token = tokens .send (1 + nbytes_token .numeric_value ())
564607 array [index_token .numeric_value ()] = binary_token .value [1 :]
565608
566609 return array , next (tokens ).endpos ()
567610
568- @staticmethod
569- def _parse_charstrings (tokens , _data ):
611+ def _parse_charstrings (self , tokens , _data ):
570612 count_token = next (tokens )
571613 if not count_token .is_number ():
572614 raise RuntimeError (
@@ -591,7 +633,11 @@ def _parse_charstrings(tokens, _data):
591633 f"Token following /{ glyphname } in CharStrings definition "
592634 f"must be a number, was { nbytes_token } "
593635 )
594- token = next (tokens ) # usually RD or |-
636+ token = next (tokens )
637+ if not token .is_name (self ._abbr ['RD' ]):
638+ raise RuntimeError (
639+ f"Token preceding charstring must be RD or equivalent, was { token } "
640+ )
595641 binary_token = tokens .send (1 + nbytes_token .numeric_value ())
596642 charstrings [glyphname ] = binary_token .value [1 :]
597643
@@ -624,16 +670,15 @@ def _parse_encoding(tokens, _data):
624670 encoding [index_token .numeric_value ()] = \
625671 name_token .value [1 :].decode ('ascii' , 'replace' )
626672
627- @staticmethod
628- def _parse_othersubrs (tokens , data ):
673+ def _parse_othersubrs (self , tokens , data ):
629674 init_pos = None
630675 while True :
631676 token = next (tokens )
632677 if init_pos is None :
633678 init_pos = token .pos
634679 if token .is_delim ():
635680 _expression (token , tokens , data )
636- elif token .value in (b'def' , b 'ND', b'|-' ):
681+ elif token .value in (b'def' , self . _abbr [ 'ND' ] ):
637682 return data [init_pos :token .endpos ()], token .endpos ()
638683
639684 def transform (self , effects ):
@@ -688,7 +733,7 @@ def transform(self, effects):
688733 fontmatrix = (
689734 '[%s]' % ' ' .join (_format_approx (x , 6 ) for x in array )
690735 ).encode ('ascii' )
691- replacements = (
736+ newparts = self . _replace (
692737 [(x , b'/FontName/%s def' % fontname )
693738 for x in self ._pos ['FontName' ]]
694739 + [(x , b'/ItalicAngle %a def' % italicangle )
@@ -698,6 +743,9 @@ def transform(self, effects):
698743 + [(x , b'' ) for x in self ._pos .get ('UniqueID' , [])]
699744 )
700745
746+ return Type1Font ((newparts [0 ], self ._encrypt (newparts [1 ], 'eexec' ), self .parts [2 ]))
747+
748+ def _replace (self , replacements ):
701749 data = bytearray (self .parts [0 ])
702750 data .extend (self .decrypted )
703751 len0 = len (self .parts [0 ])
@@ -712,11 +760,192 @@ def transform(self, effects):
712760 len0 += len (value ) - pos1 + pos0
713761
714762 data = bytes (data )
715- return Type1Font ((
716- data [:len0 ],
717- self ._encrypt (data [len0 :], 'eexec' ),
718- self .parts [2 ]
719- ))
763+ return data [:len0 ], data [len0 :]
764+
765+ def subset (self , characters ):
766+ """
767+ Return a new font that only defines the given characters.
768+
769+ Parameters
770+ ----------
771+ characters : sequence of bytes
772+ The subset of characters to include
773+
774+ Returns
775+ -------
776+ `Type1Font`
777+ """
778+
779+ characters = set (characters )
780+ encoding = {code : glyph
781+ for code , glyph in self .prop ['Encoding' ].items ()
782+ if code in characters }
783+ encoding [0 ] = '.notdef'
784+ # todo and done include strings (glyph names)
785+ todo = set (encoding .values ())
786+ done = set ()
787+ seen_subrs = {0 , 1 , 2 , 3 }
788+ while todo - done :
789+ glyph = next (iter (todo - done ))
790+ called_glyphs , called_subrs , _ , _ = self ._simulate (glyph , [], [])
791+ todo .update (called_glyphs )
792+ seen_subrs .update (called_subrs )
793+ done .add (glyph )
794+
795+ fontname = _make_tag (todo ) + b'+' + self .prop ['FontName' ].encode ('ascii' )
796+ charstrings = self ._subset_charstrings (todo )
797+ subrs = self ._subset_subrs (seen_subrs )
798+ newparts = self ._replace (
799+ [(x , b'/FontName/%s def' % fontname ) for x in self ._pos ['FontName' ]] + [
800+ (self ._pos ['CharStrings' ][0 ], charstrings ),
801+ (self ._pos ['Subrs' ][0 ], subrs ),
802+ (self ._pos ['Encoding' ][0 ], self ._subset_encoding (encoding ))
803+ ] + [(x , b'' ) for x in self ._pos .get ('UniqueID' , [])]
804+ )
805+ return Type1Font ((newparts [0 ], self ._encrypt (newparts [1 ], 'eexec' ), self .parts [2 ]))
806+
807+ @staticmethod
808+ def _charstring_tokens (data ):
809+ data = iter (data )
810+ for byte in data :
811+ if 32 <= byte <= 246 :
812+ yield byte - 139
813+ elif 247 <= byte <= 250 :
814+ byte2 = next (data )
815+ yield (byte - 247 ) * 256 + byte2 + 108
816+ elif 251 <= byte <= 254 :
817+ byte2 = next (data )
818+ yield - (byte - 251 )* 256 - byte2 - 108
819+ elif byte == 255 :
820+ bs = itertools .islice (data , 4 )
821+ yield struct .unpack ('>i' , bs )[0 ]
822+ elif byte == 12 :
823+ byte1 = next (data )
824+ yield {
825+ 0 : 'dotsection' ,
826+ 1 : 'vstem3' ,
827+ 2 : 'hstem3' ,
828+ 6 : 'seac' ,
829+ 7 : 'sbw' ,
830+ 12 : 'div' ,
831+ 16 : 'callothersubr' ,
832+ 17 : 'pop' ,
833+ 33 : 'setcurrentpoint'
834+ }[byte1 ]
835+ else :
836+ yield {
837+ 1 : 'hstem' ,
838+ 3 : 'vstem' ,
839+ 4 : 'vmoveto' ,
840+ 5 : 'rlineto' ,
841+ 6 : 'hlineto' ,
842+ 7 : 'vlineto' ,
843+ 8 : 'rrcurveto' ,
844+ 9 : 'closepath' ,
845+ 10 : 'callsubr' ,
846+ 11 : 'return' ,
847+ 13 : 'hsbw' ,
848+ 14 : 'endchar' ,
849+ 21 : 'rmoveto' ,
850+ 22 : 'hmoveto' ,
851+ 30 : 'vhcurveto' ,
852+ 31 : 'hvcurveto'
853+ }[byte ]
854+
855+ def _step (self , buildchar_stack , postscript_stack , opcode ):
856+ if isinstance (opcode , int ):
857+ return set (), set (), buildchar_stack + [opcode ], postscript_stack
858+ elif opcode in {'hsbw' , 'sbw' , 'closepath' , 'hlineto' , 'hmoveto' , 'hcurveto' , 'hvcurveto' ,
859+ 'rlineto' , 'rmoveto' , 'rrcurveto' , 'vhcurveto' , 'vlineto' , 'vmoveto' ,
860+ 'dotsection' , 'hstem' , 'hstem3' , 'vstem' , 'vstem3' , 'setcurrentpoint' }:
861+ return set (), set (), [], postscript_stack
862+ elif opcode == 'seac' :
863+ codes = buildchar_stack [3 :5 ]
864+ glyphs = [self .prop ['Encoding' ][x ] for x in codes ]
865+ return set (glyphs ), set (), [], postscript_stack
866+ elif opcode == 'div' :
867+ num1 , num2 = buildchar_stack [- 2 :]
868+ return set (), set (), buildchar_stack [- 2 :] + [num1 / num2 ], postscript_stack
869+ elif opcode == 'callothersubr' :
870+ othersubr = buildchar_stack [- 1 ]
871+ n = buildchar_stack [- 2 ]
872+ args = buildchar_stack [- 2 - n :- 2 ]
873+ if othersubr == 3 : # Section 8.1 in Type-1 spec
874+ postscript_stack .append (args [0 ])
875+ else :
876+ postscript_stack .extend (args [::- 1 ])
877+ return set (), set (), buildchar_stack [:- n - 2 ], postscript_stack
878+ elif opcode == 'callsubr' :
879+ subr = buildchar_stack [- 1 ]
880+ glyphs , subrs , new_bc_stack , new_ps_stack = \
881+ self ._simulate (subr , buildchar_stack [:- 1 ], postscript_stack )
882+ return set (), subrs | {subr }, new_bc_stack , new_ps_stack
883+ elif opcode == 'pop' :
884+ return set (), set (), buildchar_stack + [postscript_stack [- 1 ]], postscript_stack [:- 1 ]
885+ else :
886+ raise RuntimeError (f'opcode { opcode } ' )
887+
888+ def _simulate (self , glyph_or_subr , buildchar_stack , postscript_stack ):
889+ if isinstance (glyph_or_subr , str ):
890+ program = self .prop ['CharStrings' ][glyph_or_subr ]
891+ glyphs = {glyph_or_subr }
892+ subrs = set ()
893+ else :
894+ program = self .prop ['Subrs' ][glyph_or_subr ]
895+ glyphs = set ()
896+ subrs = {glyph_or_subr }
897+ for opcode in self ._charstring_tokens (program ):
898+ if opcode in ('return' , 'endchar' ):
899+ return glyphs , subrs , buildchar_stack , postscript_stack
900+ newglyphs , newsubrs , buildchar_stack , postscript_stack = \
901+ self ._step (buildchar_stack , postscript_stack , opcode )
902+ glyphs .update (newglyphs )
903+ subrs .update (newsubrs )
904+
905+ def _subset_encoding (self , encoding ):
906+ data = bytearray (b'/Encoding 256 array\n 0 1 255 { 1 index exch /.notdef put } for\n ' )
907+ for i , glyph in sorted (encoding .items ()):
908+ if glyph == '.notdef' :
909+ continue
910+ data .extend (f'dup { i } /{ glyph } put\n ' .encode ('ascii' ))
911+ data .extend (b'readonly def\n ' )
912+ return bytes (data )
913+
914+ def _subset_charstrings (self , glyphs ):
915+ data = bytearray (f'/CharStrings { len (glyphs )} dict dup begin\n ' .encode ('ascii' ))
916+ for glyph in glyphs :
917+ enc = self ._encrypt (self .prop ['CharStrings' ][glyph ], 'charstring' , self .prop .get ('lenIV' , 4 ))
918+ data .extend (f'/{ glyph } { len (enc )} ' .encode ('ascii' ))
919+ data .extend (self ._abbr ["RD" ])
920+ data .extend (b' ' )
921+ data .extend (enc )
922+ data .extend (b' ' )
923+ data .extend (self ._abbr ["ND" ])
924+ data .extend (b'\n ' )
925+ data .extend (b'end\n ' )
926+ return bytes (data )
927+
928+ def _subset_subrs (self , indices ):
929+ # we can't remove subroutines, we just replace unused ones with a stub
930+ n_subrs = len (self .prop ['Subrs' ])
931+ data = bytearray (f'/Subrs { n_subrs } array\n ' .encode ('ascii' ))
932+ for i in range (n_subrs ):
933+ if i in indices :
934+ sub = self .prop ['Subrs' ][i ]
935+ else :
936+ sub = bytes ([11 ])
937+ enc = self ._encrypt (sub , 'charstring' , self .prop .get ('lenIV' , 4 ))
938+ data .extend (f'dup { i } { len (enc )} ' .encode ('ascii' ))
939+ data .extend (self ._abbr ['RD' ])
940+ data .extend (b' ' )
941+ data .extend (enc )
942+ data .extend (b' ' )
943+ data .extend (self ._abbr ['NP' ])
944+ data .extend (b'\n ' )
945+
946+ data .extend (self ._abbr ['ND' ])
947+ data .extend (b'\n ' )
948+ return bytes (data )
720949
721950
722951StandardEncoding = {
0 commit comments