#!/usr/bin/env python # # Copyright 2010-2012, Google Inc. # Author: Mikhail Kashkin (mkashkin@gmail.com) # Author: Raph Levien (@gmail.com) # Author: Dave Crossland (dave@understandinglimited.com) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Version 1.01 Released 2012-03-27 # # A script for subsetting a font, using FontForge. See README for details. # TODO 2013-04-08 ensure the menu files are as compact as possible by default, similar to subset.pl # TODO 2013-05-22 in Arimo, the latin subset doesn't include ; but the greek does. why on earth is this happening? from __future__ import print_function import fontforge import sys import getopt import os import struct def log_namelist(nam, unicode): if nam and isinstance(unicode, int): print("0x%0.4X" % unicode, fontforge.nameFromUnicode(unicode), file=nam) def select_with_refs(font, unicode, newfont, pe = None, nam = None): newfont.selection.select(('more', 'unicode'), unicode) log_namelist(nam, unicode) if pe: print("SelectMore(%d)" % unicode, file=pe) try: for ref in font[unicode].references: newfont.selection.select(('more',), ref[0]) log_namelist(nam, ref[0]) if pe: print('SelectMore("%s")' % ref[0], file=pe) except: print('Resolving references on u+%04x failed' % unicode) def subset_font_raw(font_in, font_out, unicodes, opts): if '--namelist' in opts: # 2010-12-06 DC To allow setting namelist filenames, # change getopt.gnu_getopt from namelist to namelist= # and invert comments on following 2 lines # nam_fn = opts['--namelist'] nam_fn = font_out + '.nam' nam = file(nam_fn, 'w') else: nam = None if '--script' in opts: pe_fn = "/tmp/script.pe" pe = file(pe_fn, 'w') else: pe = None font = fontforge.open(font_in) if pe: print('Open("' + font_in + '")', file=pe) extract_vert_to_script(font_in, pe) for i in unicodes: select_with_refs(font, i, font, pe, nam) addl_glyphs = [] if '--nmr' in opts: addl_glyphs.append('nonmarkingreturn') if '--null' in opts: addl_glyphs.append('.null') if '--nd' in opts: addl_glyphs.append('.notdef') for glyph in addl_glyphs: font.selection.select(('more',), glyph) if nam: print("0x%0.4X" % fontforge.unicodeFromName(glyph), glyph, file=nam) if pe: print('SelectMore("%s")' % glyph, file=pe) flags = () if '--opentype-features' in opts: flags += ('opentype',) if '--simplify' in opts: font.simplify() font.round() flags += ('omit-instructions',) if '--strip_names' in opts: font.sfnt_names = () if '--new' in opts: font.copy() new = fontforge.font() new.encoding = font.encoding new.em = font.em new.layers['Fore'].is_quadratic = font.layers['Fore'].is_quadratic for i in unicodes: select_with_refs(font, i, new, pe, nam) new.paste() # This is a hack - it should have been taken care of above. font.selection.select('space') font.copy() new.selection.select('space') new.paste() new.sfnt_names = font.sfnt_names font = new else: font.selection.invert() print("SelectInvert()", file=pe) font.cut() print("Clear()", file=pe) if '--move-display' in opts: print("Moving display glyphs into unicode ranges...") font.familyname += " Display" font.fullname += " Display" font.fontname += "Display" font.appendSFNTName('English (US)', 'Family', font.familyname) font.appendSFNTName('English (US)', 16, font.familyname) font.appendSFNTName('English (US)', 17, 'Display') font.appendSFNTName('English (US)', 'Fullname', font.fullname) for glname in unicodes: font.selection.none() if isinstance(glname, str): if glname.endswith('.display'): font.selection.select(glname) font.copy() font.selection.none() newgl = glname.replace('.display','') font.selection.select(newgl) font.paste() font.selection.select(glname) font.cut() if nam: print("Writing NameList", end="") nam.close() if pe: print('Generate("' + font_out + '")', file=pe) pe.close() os.system("fontforge -script " + pe_fn) else: font.generate(font_out, flags = flags) font.close() if '--roundtrip' in opts: # FontForge apparently contains a bug where it incorrectly calculates # the advanceWidthMax in the hhea table, and a workaround is to open # and re-generate font2 = fontforge.open(font_out) font2.generate(font_out, flags = flags) def subset_font(font_in, font_out, unicodes, opts): font_out_raw = font_out if not font_out_raw.endswith('.ttf'): font_out_raw += '.ttf'; subset_font_raw(font_in, font_out_raw, unicodes, opts) if font_out != font_out_raw: os.rename(font_out_raw, font_out) # 2011-02-14 DC this needs to only happen with --namelist is used # os.rename(font_out_raw + '.nam', font_out + '.nam') def getsubset(subset, font_in): subsets = subset.split('+') quotes = [0x2013] # endash quotes += [0x2014] # emdash quotes += [0x2018] # quoteleft quotes += [0x2019] # quoteright quotes += [0x201A] # quotesinglbase quotes += [0x201C] # quotedblleft quotes += [0x201D] # quotedblright quotes += [0x201E] # quotedblbase quotes += [0x2022] # bullet quotes += [0x2039] # guilsinglleft quotes += [0x203A] # guilsinglright latin = range(0x20, 0x7f) # Basic Latin (A-Z, a-z, numbers) latin += range(0xa0, 0x100) # Western European symbols and diacritics latin += [0x20ac] # Euro latin += [0x0152] # OE latin += [0x0153] # oe latin += [0x003b] # semicolon latin += [0x00b7] # periodcentered latin += [0x0131] # dotlessi latin += [0x02c6] # circumflex latin += [0x02da] # ring latin += [0x02dc] # tilde latin += [0x2074] # foursuperior latin += [0x2215] # divison slash latin += [0x2044] # fraction slash latin += [0xe0ff] # PUA: Font logo latin += [0xeffd] # PUA: Font version number latin += [0xf000] # PUA: font ppem size indicator: run `ftview -f 1255 10 Ubuntu-Regular.ttf` to see it in action! result = quotes if 'menu' in subset: font = fontforge.open(font_in) result = map(ord, font.familyname) result += [0x0020] if 'latin' in subset: result += latin if 'latin-ext' in subset: # These ranges include Extended A, B, C, D, and Additional with the # exception of Vietnamese, which is a separate range result += (range(0x100, 0x370) + range(0x1d00, 0x1ea0) + range(0x1ef2, 0x1f00) + range(0x2070, 0x20d0) + range(0x2c60, 0x2c80) + range(0xa700, 0xa800)) if 'vietnamese' in subset: # 2011-07-16 DC: Charset from http://vietunicode.sourceforge.net/charset/ + U+1ef9 from Fontaine result += [0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00C8, 0x00C9, 0x00CA, 0x00CC, 0x00CD, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D9, 0x00DA, 0x00DD, 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E8, 0x00E9, 0x00EA, 0x00EC, 0x00ED, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F9, 0x00FA, 0x00FD, 0x0102, 0x0103, 0x0110, 0x0111, 0x0128, 0x0129, 0x0168, 0x0169, 0x01A0, 0x01A1, 0x01AF, 0x01B0, 0x20AB] + range(0x1EA0, 0x1EFA) if 'greek' in subset: # Could probably be more aggressive here and exclude archaic characters, # but lack data result += range(0x370, 0x400) if 'greek-ext' in subset: result += range(0x370, 0x400) + range(0x1f00, 0x2000) if 'cyrillic' in subset: # Based on character frequency analysis result += range(0x400, 0x460) + [0x490, 0x491, 0x4b0, 0x4b1, 0x2116] if 'cyrillic-ext' in subset: result += (range(0x400, 0x530) + [0x20b4, 0x2116] + # 0x2116 is the russian No, a number abbreviation similar to the latin #, suggested by Alexei Vanyashin range(0x2de0, 0x2e00) + range(0xa640, 0xa6a0)) if 'arabic' in subset: # Based on Droid Arabic Kufi 1.0 result += [0x000D, 0x0020, 0x0621, 0x0627, 0x062D, 0x062F, 0x0631, 0x0633, 0x0635, 0x0637, 0x0639, 0x0643, 0x0644, 0x0645, 0x0647, 0x0648, 0x0649, 0x0640, 0x066E, 0x066F, 0x0660, 0x0661, 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668, 0x0669, 0x06F4, 0x06F5, 0x06F6, 0x06BE, 0x06D2, 0x06A9, 0x06AF, 0x06BA, 0x066A, 0x061F, 0x060C, 0x061B, 0x066B, 0x066C, 0x066D, 0x064B, 0x064D, 0x064E, 0x064F, 0x064C, 0x0650, 0x0651, 0x0652, 0x0653, 0x0654, 0x0655, 0x0670, 0x0656, 0x0615, 0x0686, 0x0623, 0x0625, 0x0622, 0x0671, 0x0628, 0x067E, 0x062A, 0x062B, 0x0679, 0x0629, 0x062C, 0x062E, 0x0630, 0x0688, 0x0632, 0x0691, 0x0698, 0x0634, 0x0636, 0x0638, 0x063A, 0x0641, 0x0642, 0x0646, 0x06D5, 0x06C0, 0x0624, 0x064A, 0x06CC, 0x06D3, 0x0626, 0x06C2, 0x06C1, 0x06C3, 0x06F0, 0x06F1, 0x06F2, 0x06F3, 0x06F9, 0x06F7, 0x06F8, 0xFC63, 0x0672, 0x0673, 0x0675, 0x0676, 0x0677, 0x0678, 0x067A, 0x067B, 0x067C, 0x067D, 0x067F, 0x0680, 0x0681, 0x0682, 0x0683, 0x0684, 0x0685, 0x0687, 0x0689, 0x068A, 0x068B, 0x068C, 0x068D, 0x068E, 0x068F, 0x0690, 0x0692, 0x0693, 0x0694, 0x0695, 0x0696, 0x0697, 0x0699, 0x069A, 0x069B, 0x069C, 0x069D, 0x069E, 0x069F, 0x06A0, 0x06A1, 0x06A2, 0x06A3, 0x06A5, 0x06A6, 0x06A7, 0x06A8, 0x06AA, 0x06AB, 0x06AC, 0x06AD, 0x06AE, 0x06B0, 0x06B1, 0x06B2, 0x06B3, 0x06B4, 0x06B5, 0x06B6, 0x06B7, 0x06B8, 0x06B9, 0x06BB, 0x06BC, 0x06BD, 0x06BF, 0x06C4, 0x06C5, 0x06CD, 0x06D6, 0x06D7, 0x06D8, 0x06D9, 0x06DA, 0x06DB, 0x06DC, 0x06DF, 0x06E1, 0x06E2, 0x06E3, 0x06E4, 0x06E5, 0x06E6, 0x06E7, 0x06E8, 0x06EA, 0x06EB, 0x06ED, 0x06FB, 0x06FC, 0x06FD, 0x06FE, 0x0600, 0x0601, 0x0602, 0x0603, 0x060E, 0x060F, 0x0610, 0x0611, 0x0612, 0x0613, 0x0614, 0x0657, 0x0658, 0x06EE, 0x06EF, 0x06FF, 0x060B, 0x061E, 0x0659, 0x065A, 0x065B, 0x065C, 0x065D, 0x065E, 0x0750, 0x0751, 0x0752, 0x0753, 0x0754, 0x0755, 0x0756, 0x0757, 0x0758, 0x0759, 0x075A, 0x075B, 0x075C, 0x075D, 0x075E, 0x075F, 0x0760, 0x0761, 0x0762, 0x0763, 0x0764, 0x0765, 0x0766, 0x0767, 0x0768, 0x0769, 0x076A, 0x076B, 0x076C, 0x076D, 0x06A4, 0x06C6, 0x06C7, 0x06C8, 0x06C9, 0x06CA, 0x06CB, 0x06CF, 0x06CE, 0x06D0, 0x06D1, 0x06D4, 0x06FA, 0x06DD, 0x06DE, 0x06E0, 0x06E9, 0x060D, 0xFD3E, 0xFD3F, 0x25CC, # Added from https://groups.google.com/d/topic/googlefontdirectory-discuss/MwlMWMPNCXs/discussion 0x063b, 0x063c, 0x063d, 0x063e, 0x063f, 0x0620, 0x0674, 0x0674, 0x06EC] if 'dejavu-ext' in subset: # add all glyphnames ending in .display font = fontforge.open(font_in) for glyph in font.glyphs(): if glyph.glyphname.endswith('.display'): result.append(glyph.glyphname) # print(result) return result # code for extracting vertical metrics from a TrueType font class Sfnt: def __init__(self, data): version, numTables, _, _, _ = struct.unpack('>IHHHH', data[:12]) self.tables = {} for i in range(numTables): tag, checkSum, offset, length = struct.unpack('>4sIII', data[12 + 16 * i: 28 + 16 * i]) self.tables[tag] = data[offset: offset + length] def hhea(self): r = {} d = self.tables['hhea'] r['Ascender'], r['Descender'], r['LineGap'] = struct.unpack('>hhh', d[4:10]) return r def os2(self): r = {} d = self.tables['OS/2'] r['fsSelection'], = struct.unpack('>H', d[62:64]) r['sTypoAscender'], r['sTypoDescender'], r['sTypoLineGap'] = struct.unpack('>hhh', d[68:74]) r['usWinAscender'], r['usWinDescender'] = struct.unpack('>HH', d[74:78]) return r def set_os2(pe, name, val): print('SetOS2Value("' + name + '", %d)' % val, file=pe) def set_os2_vert(pe, name, val): set_os2(pe, name + 'IsOffset', 0) set_os2(pe, name, val) # Extract vertical metrics data directly out of font file, and emit # script code to set the values in the generated font. This is a (rather # ugly) workaround for the issue described in: # http://sourceforge.net/mailarchive/forum.php?thread_name=20100906085718.GB1907%40khaled-laptop&forum_name=fontforge-users def extract_vert_to_script(font_in, pe): data = file(font_in, 'rb').read() sfnt = Sfnt(data) hhea = sfnt.hhea() os2 = sfnt.os2() set_os2_vert(pe, "WinAscent", os2['usWinAscender']) set_os2_vert(pe, "WinDescent", os2['usWinDescender']) set_os2_vert(pe, "TypoAscent", os2['sTypoAscender']) set_os2_vert(pe, "TypoDescent", os2['sTypoDescender']) set_os2_vert(pe, "HHeadAscent", hhea['Ascender']) set_os2_vert(pe, "HHeadDescent", hhea['Descender']) def main(argv): optlist, args = getopt.gnu_getopt(argv, '', ['string=', 'strip_names', 'opentype-features', 'simplify', 'new', 'script', 'nmr', 'roundtrip', 'subset=', 'namelist', 'null', 'nd', 'move-display']) font_in, font_out = args opts = dict(optlist) if '--string' in opts: subset = map(ord, opts['--string']) else: subset = getsubset(opts.get('--subset', 'latin'), font_in) subset_font(font_in, font_out, subset, opts) if __name__ == '__main__': main(sys.argv[1:])