1010from __future__ import unicode_literals
1111
1212import argparse
13- import re
1413import sys
1514
15+ import regex
16+
1617__all__ = ['titlecase' ]
1718__version__ = '0.12.0'
1819
1920SMALL = r'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\.?|via|vs\.?'
2021PUNCT = r"""!"“#$%&'‘()*+,\-–‒—―./:;?@[\\\]_`{|}~"""
2122
22- SMALL_WORDS = re .compile (r'^(%s)$' % SMALL , re .I )
23- INLINE_PERIOD = re .compile (r'[a-z ][.][a-z ]' , re .I )
24- UC_ELSEWHERE = re .compile (r'[%s]*?[a-zA-Z ]+[A-Z ]+?' % PUNCT )
25- CAPFIRST = re .compile (r"^[%s]*?([A-Za-z ])" % PUNCT )
26- SMALL_FIRST = re .compile (r'^([%s]*)(%s)\b' % (PUNCT , SMALL ), re .I )
27- SMALL_LAST = re .compile (r'\b(%s)[%s]?$' % (SMALL , PUNCT ), re .I )
28- SUBPHRASE = re .compile (r'([:.;?!\-–‒—―][ ])(%s)' % SMALL )
29- APOS_SECOND = re .compile (r"^[dol]{1}['‘]{1}[a-z ]+(?:['s]{2})?$" , re .I )
30- UC_INITIALS = re .compile (r"^(?:[A-Z ]{1}\.{1}|[A-Z ]{1}\.{1}[A-Z ]{1})+$" )
31- MAC_MC = re .compile (r"^([Mm]c|MC)(\w.+)" )
23+ SMALL_WORDS = regex .compile (r'^(%s)$' % SMALL , regex .I )
24+ INLINE_PERIOD = regex .compile (r'[\p{Letter} ][.][\p{Letter} ]' , regex .I )
25+ UC_ELSEWHERE = regex .compile (r'[%s]*?[\p{Letter} ]+[\p{Uppercase_Letter} ]+?' % PUNCT )
26+ CAPFIRST = regex .compile (r"^[%s]*?([\p{Letter} ])" % PUNCT )
27+ SMALL_FIRST = regex .compile (r'^([%s]*)(%s)\b' % (PUNCT , SMALL ), regex .I )
28+ SMALL_LAST = regex .compile (r'\b(%s)[%s]?$' % (SMALL , PUNCT ), regex .I )
29+ SUBPHRASE = regex .compile (r'([:.;?!\-–‒—―][ ])(%s)' % SMALL )
30+ APOS_SECOND = regex .compile (r"^[dol]{1}['‘]{1}[\p{Letter} ]+(?:['s]{2})?$" , regex .I )
31+ UC_INITIALS = regex .compile (r"^(?:[\p{Uppercase_Letter} ]{1}\.{1}|[\p{Uppercase_Letter} ]{1}\.{1}[\p{Uppercase_Letter} ]{1})+$" )
32+ MAC_MC = regex .compile (r"^([Mm]c|MC)(\w.+)" )
3233
3334
3435class Immutable (object ):
@@ -57,10 +58,10 @@ def set_small_word_list(small=SMALL):
5758 global SMALL_FIRST
5859 global SMALL_LAST
5960 global SUBPHRASE
60- SMALL_WORDS = re .compile (r'^(%s)$' % small , re .I )
61- SMALL_FIRST = re .compile (r'^([%s]*)(%s)\b' % (PUNCT , small ), re .I )
62- SMALL_LAST = re .compile (r'\b(%s)[%s]?$' % (small , PUNCT ), re .I )
63- SUBPHRASE = re .compile (r'([:.;?!][ ])(%s)' % small )
61+ SMALL_WORDS = regex .compile (r'^(%s)$' % small , regex .I )
62+ SMALL_FIRST = regex .compile (r'^([%s]*)(%s)\b' % (PUNCT , small ), regex .I )
63+ SMALL_LAST = regex .compile (r'\b(%s)[%s]?$' % (small , PUNCT ), regex .I )
64+ SUBPHRASE = regex .compile (r'([:.;?!][ ])(%s)' % small )
6465
6566
6667def titlecase (text , callback = None , small_first_last = True ):
@@ -75,11 +76,11 @@ def titlecase(text, callback=None, small_first_last=True):
7576
7677 """
7778
78- lines = re .split ('[\r \n ]+' , text )
79+ lines = regex .split ('[\r \n ]+' , text )
7980 processed = []
8081 for line in lines :
8182 all_caps = line .upper () == line
82- words = re .split ('[\t ]' , line )
83+ words = regex .split ('[\t ]' , line )
8384 tc_line = []
8485 for word in words :
8586 if callback :
0 commit comments