|
17 | 17 | import binascii |
18 | 18 | import codecs |
19 | 19 | from base64 import b64decode |
20 | | -from urllib.parse import unquote_to_bytes |
| 20 | +from urllib.parse import quote, unquote_to_bytes |
21 | 21 | from urllib.request import pathname2url |
22 | 22 | from ipaddress import IPv6Address, AddressValueError |
23 | 23 | from datetime import datetime, timezone |
@@ -159,6 +159,39 @@ def id_to_datetime_legacy(id): |
159 | 159 | return None |
160 | 160 |
|
161 | 161 |
|
| 162 | +def validate_filename(filename, force_ascii=False): |
| 163 | + """Transliterates the given string to be a safe filename |
| 164 | +
|
| 165 | + See also: scrapbook.validateFilename of WebScrapBook. |
| 166 | + """ |
| 167 | + fn = filename |
| 168 | + |
| 169 | + # control chars are bad for filename |
| 170 | + fn = re.sub(r'[\x00-\x1F\x7F]+', '', fn) |
| 171 | + |
| 172 | + # leading/trailing spaces and dots are not allowed on Windows |
| 173 | + fn = re.sub(r'^\.', '_.', fn) |
| 174 | + fn = re.sub(r'^ +', '', fn) |
| 175 | + fn = re.sub(r'[. ]+$', '', fn) |
| 176 | + |
| 177 | + # bad chars on most OS |
| 178 | + fn = re.sub(r'[:"?*\\/|]', '_', fn) |
| 179 | + |
| 180 | + # bad chars on Windows, replace with adequate direction |
| 181 | + fn = fn.replace('<', '(').replace('>', ')') |
| 182 | + |
| 183 | + # "~" is not allowed by browser.downloads |
| 184 | + fn = fn.replace('~', '-') |
| 185 | + |
| 186 | + if force_ascii: |
| 187 | + fn = quote(fn, safe="""!_#$&'()*+,-./:;<=>?@[\\]^_`{|}~""") |
| 188 | + |
| 189 | + # prevent empty filename |
| 190 | + fn = fn or "_" |
| 191 | + |
| 192 | + return fn |
| 193 | + |
| 194 | + |
162 | 195 | ######################################################################### |
163 | 196 | # String handling |
164 | 197 | ######################################################################### |
|
0 commit comments