From af0a76128e476c0a4bfb58d4c0469958f8b230af Mon Sep 17 00:00:00 2001
From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com>
Date: Fri, 12 Jun 2026 10:42:24 +0300
Subject: [PATCH 1/5] Update some libs to `3.14.6`
---
Lib/http/cookies.py | 6 +-
Lib/imaplib.py | 2 +-
Lib/json/__init__.py | 8 +-
Lib/json/tool.py | 3 +-
Lib/locale.py | 477 ++++++++++++++++++++++++----------
Lib/ntpath.py | 16 +-
Lib/os.py | 40 +--
Lib/pydoc_data/module_docs.py | 2 +-
Lib/pydoc_data/topics.py | 56 +++-
Lib/rlcompleter.py | 17 +-
Lib/shutil.py | 16 +-
Lib/tarfile.py | 51 ++--
12 files changed, 480 insertions(+), 214 deletions(-)
diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py
index 5c5b14788dc..abebb4b69fd 100644
--- a/Lib/http/cookies.py
+++ b/Lib/http/cookies.py
@@ -391,18 +391,18 @@ def __repr__(self):
return '<%s: %s>' % (self.__class__.__name__, self.OutputString())
def js_output(self, attrs=None):
- import base64
+ import urllib.parse
# Print javascript
output_string = self.OutputString(attrs)
if _has_control_character(output_string):
raise CookieError("Control characters are not allowed in cookies")
# Base64-encode value to avoid template
# injection in cookie values.
- output_encoded = base64.b64encode(output_string.encode('utf-8')).decode("ascii")
+ output_encoded = urllib.parse.quote(output_string, safe='', encoding='utf-8')
return """
""" % (output_encoded,)
diff --git a/Lib/imaplib.py b/Lib/imaplib.py
index cbe129b3e7c..e84ffb2eecf 100644
--- a/Lib/imaplib.py
+++ b/Lib/imaplib.py
@@ -706,7 +706,7 @@ def login(self, user, password):
"""
typ, dat = self._simple_command('LOGIN', user, self._quote(password))
if typ != 'OK':
- raise self.error(dat[-1])
+ raise self.error(dat[-1].decode('UTF-8', 'replace'))
self.state = 'AUTH'
return typ, dat
diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py
index 9eaa4f3fbc1..800cc37f6af 100644
--- a/Lib/json/__init__.py
+++ b/Lib/json/__init__.py
@@ -143,8 +143,8 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True,
If ``indent`` is a non-negative integer, then JSON array elements and
object members will be pretty-printed with that indent level. An indent
- level of 0 will only insert newlines. ``None`` is the most compact
- representation.
+ level of 0 will only insert newlines. ``None`` is the default and gives
+ a representation with no newlines inserted.
If specified, ``separators`` should be an ``(item_separator,
key_separator)`` tuple. The default is ``(', ', ': ')`` if *indent* is
@@ -207,8 +207,8 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
If ``indent`` is a non-negative integer, then JSON array elements and
object members will be pretty-printed with that indent level. An indent
- level of 0 will only insert newlines. ``None`` is the most compact
- representation.
+ level of 0 will only insert newlines. ``None`` is the default and gives
+ a representation with no newlines inserted.
If specified, ``separators`` should be an ``(item_separator,
key_separator)`` tuple. The default is ``(', ', ': ')`` if *indent* is
diff --git a/Lib/json/tool.py b/Lib/json/tool.py
index 1967817add8..0cabbdba85a 100644
--- a/Lib/json/tool.py
+++ b/Lib/json/tool.py
@@ -88,7 +88,8 @@ def main():
infile = open(options.infile, encoding='utf-8')
try:
if options.json_lines:
- objs = (json.loads(line) for line in infile)
+ lines = infile.readlines()
+ objs = (json.loads(line) for line in lines)
else:
objs = (json.load(infile),)
finally:
diff --git a/Lib/locale.py b/Lib/locale.py
index dfedc6386cb..498af087354 100644
--- a/Lib/locale.py
+++ b/Lib/locale.py
@@ -1496,8 +1496,8 @@ def getpreferredencoding(do_setlocale=True):
# This maps Windows language identifiers to locale strings.
#
# This list has been updated from
-# http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp
-# to include every locale up to Windows Vista.
+# https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/70feba9f-294e-491e-b6eb-56532684c37f
+# to include every locale up to protocol revision 16.0 (2024-04-23).
#
# NOTE: this mapping is incomplete. If your language is missing, please
# submit a bug report as detailed in the Python devguide at:
@@ -1507,10 +1507,15 @@ def getpreferredencoding(do_setlocale=True):
#
windows_locale = {
- 0x0436: "af_ZA", # Afrikaans
- 0x041c: "sq_AL", # Albanian
- 0x0484: "gsw_FR",# Alsatian - France
+ 0x0036: "af", # Afrikaans
+ 0x0436: "af_ZA", # Afrikaans - South Africa
+ 0x001c: "sq", # Albanian
+ 0x041c: "sq_AL", # Albanian - Albania
+ 0x0084: "gsw", # Alsatian
+ 0x0484: "gsw_FR", # Alsatian - France
+ 0x005e: "am", # Amharic
0x045e: "am_ET", # Amharic - Ethiopia
+ 0x0001: "ar", # Arabic
0x0401: "ar_SA", # Arabic - Saudi Arabia
0x0801: "ar_IQ", # Arabic - Iraq
0x0c01: "ar_EG", # Arabic - Egypt
@@ -1524,39 +1529,72 @@ def getpreferredencoding(do_setlocale=True):
0x2c01: "ar_JO", # Arabic - Jordan
0x3001: "ar_LB", # Arabic - Lebanon
0x3401: "ar_KW", # Arabic - Kuwait
- 0x3801: "ar_AE", # Arabic - United Arab Emirates
+ 0x3801: "ar_AE", # Arabic - U.A.E.
0x3c01: "ar_BH", # Arabic - Bahrain
0x4001: "ar_QA", # Arabic - Qatar
- 0x042b: "hy_AM", # Armenian
+ 0x002b: "hy", # Armenian
+ 0x042b: "hy_AM", # Armenian - Armenia
+ 0x004d: "as", # Assamese
0x044d: "as_IN", # Assamese - India
- 0x042c: "az_AZ", # Azeri - Latin
- 0x082c: "az_AZ", # Azeri - Cyrillic
- 0x046d: "ba_RU", # Bashkir
- 0x042d: "eu_ES", # Basque - Russia
- 0x0423: "be_BY", # Belarusian
- 0x0445: "bn_IN", # Begali
- 0x201a: "bs_BA", # Bosnian - Cyrillic
- 0x141a: "bs_BA", # Bosnian - Latin
+ 0x002c: "az", # Azerbaijani (Latin)
+ 0x742c: "az", # Azerbaijani (Cyrillic)
+ 0x782c: "az", # Azerbaijani (Latin)
+ 0x042c: "az_AZ", # Azerbaijani (Latin) - Azerbaijan
+ 0x0045: "bn", # Bangla
+ 0x0445: "bn_IN", # Bangla - India
+ 0x0845: "bn_BD", # Bangla - Bangladesh
+ 0x006d: "ba", # Bashkir
+ 0x046d: "ba_RU", # Bashkir - Russia
+ 0x002d: "eu", # Basque
+ 0x042d: "eu_ES", # Basque - Spain
+ 0x0023: "be", # Belarusian
+ 0x0423: "be_BY", # Belarusian - Belarus
+ 0x641a: "bs", # Bosnian (Cyrillic)
+ 0x681a: "bs", # Bosnian (Latin)
+ 0x141a: "bs_BA", # Bosnian (Latin) - Bosnia and Herzegovina
+ 0x201a: "bs_BA", # Bosnian (Cyrillic) - Bosnia and Herzegovina
+ 0x781a: "bs", # Bosnian (Latin)
+ 0x007e: "br", # Breton
0x047e: "br_FR", # Breton - France
- 0x0402: "bg_BG", # Bulgarian
-# 0x0455: "my_MM", # Burmese - Not supported
- 0x0403: "ca_ES", # Catalan
- 0x0004: "zh_CHS",# Chinese - Simplified
- 0x0404: "zh_TW", # Chinese - Taiwan
- 0x0804: "zh_CN", # Chinese - PRC
- 0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R.
- 0x1004: "zh_SG", # Chinese - Singapore
- 0x1404: "zh_MO", # Chinese - Macao S.A.R.
- 0x7c04: "zh_CHT",# Chinese - Traditional
+ 0x0002: "bg", # Bulgarian
+ 0x0402: "bg_BG", # Bulgarian - Bulgaria
+ 0x0055: "my", # Burmese
+ 0x0455: "my_MM", # Burmese - Myanmar
+ 0x0003: "ca", # Catalan
+ 0x0403: "ca_ES", # Catalan - Spain
+ 0x0803: "ca_ES", # Valencian - Spain
+ 0x0092: "ku", # Central Kurdish
+ 0x7c92: "ku", # Central Kurdish
+ 0x0492: "ku_IQ", # Central Kurdish - Iraq
+ 0x005c: "chr", # Cherokee
+ 0x7c5c: "chr", # Cherokee
+ 0x045c: "chr_US", # Cherokee - United States
+ 0x0004: "zh", # Chinese (Simplified)
+ 0x7804: "zh", # Chinese (Simplified)
+ 0x7c04: "zh", # Chinese (Traditional)
+ 0x0404: "zh_TW", # Chinese (Traditional) - Taiwan
+ 0x0804: "zh_CN", # Chinese (Simplified) - People's Republic of China
+ 0x0c04: "zh_HK", # Chinese (Traditional) - Hong Kong S.A.R.
+ 0x1004: "zh_SG", # Chinese (Simplified) - Singapore
+ 0x1404: "zh_MO", # Chinese (Traditional) - Macao S.A.R.
+ 0x0083: "co", # Corsican
0x0483: "co_FR", # Corsican - France
- 0x041a: "hr_HR", # Croatian
- 0x101a: "hr_BA", # Croatian - Bosnia
- 0x0405: "cs_CZ", # Czech
- 0x0406: "da_DK", # Danish
- 0x048c: "gbz_AF",# Dari - Afghanistan
- 0x0465: "div_MV",# Divehi - Maldives
- 0x0413: "nl_NL", # Dutch - The Netherlands
+ 0x001a: "hr", # Croatian
+ 0x041a: "hr_HR", # Croatian - Croatia
+ 0x101a: "hr_BA", # Croatian (Latin) - Bosnia and Herzegovina
+ 0x0005: "cs", # Czech
+ 0x0405: "cs_CZ", # Czech - Czech Republic
+ 0x0006: "da", # Danish
+ 0x0406: "da_DK", # Danish - Denmark
+ 0x008c: "prs", # Dari
+ 0x048c: "prs_AF", # Dari - Afghanistan
+ 0x0065: "dv", # Divehi
+ 0x0465: "dv_MV", # Divehi - Maldives
+ 0x0013: "nl", # Dutch
+ 0x0413: "nl_NL", # Dutch - Netherlands
0x0813: "nl_BE", # Dutch - Belgium
+ 0x0c51: "dz_BT", # Dzongkha - Bhutan
+ 0x0009: "en", # English
0x0409: "en_US", # English - United States
0x0809: "en_GB", # English - United Kingdom
0x0c09: "en_AU", # English - Australia
@@ -1564,122 +1602,248 @@ def getpreferredencoding(do_setlocale=True):
0x1409: "en_NZ", # English - New Zealand
0x1809: "en_IE", # English - Ireland
0x1c09: "en_ZA", # English - South Africa
- 0x2009: "en_JA", # English - Jamaica
- 0x2409: "en_CB", # English - Caribbean
+ 0x2009: "en_JM", # English - Jamaica
0x2809: "en_BZ", # English - Belize
- 0x2c09: "en_TT", # English - Trinidad
+ 0x2c09: "en_TT", # English - Trinidad and Tobago
0x3009: "en_ZW", # English - Zimbabwe
- 0x3409: "en_PH", # English - Philippines
+ 0x3409: "en_PH", # English - Republic of the Philippines
+ 0x3c09: "en_HK", # English - Hong Kong
0x4009: "en_IN", # English - India
0x4409: "en_MY", # English - Malaysia
- 0x4809: "en_IN", # English - Singapore
- 0x0425: "et_EE", # Estonian
- 0x0438: "fo_FO", # Faroese
- 0x0464: "fil_PH",# Filipino
- 0x040b: "fi_FI", # Finnish
+ 0x4809: "en_SG", # English - Singapore
+ 0x4c09: "en_AE", # English - United Arab Emirates
+ 0x0025: "et", # Estonian
+ 0x0425: "et_EE", # Estonian - Estonia
+ 0x0038: "fo", # Faroese
+ 0x0438: "fo_FO", # Faroese - Faroe Islands
+ 0x0064: "fil", # Filipino
+ 0x0464: "fil_PH", # Filipino - Philippines
+ 0x000b: "fi", # Finnish
+ 0x040b: "fi_FI", # Finnish - Finland
+ 0x000c: "fr", # French
0x040c: "fr_FR", # French - France
0x080c: "fr_BE", # French - Belgium
0x0c0c: "fr_CA", # French - Canada
0x100c: "fr_CH", # French - Switzerland
0x140c: "fr_LU", # French - Luxembourg
- 0x180c: "fr_MC", # French - Monaco
+ 0x180c: "fr_MC", # French - Principality of Monaco
+ 0x1c0c: "fr_029", # French - Caribbean
+ 0x200c: "fr_RE", # French - Reunion
+ 0x240c: "fr_CD", # French - Congo, DRC
+ 0x280c: "fr_SN", # French - Senegal
+ 0x2c0c: "fr_CM", # French - Cameroon
+ 0x300c: "fr_CI", # French - Côte d'Ivoire
+ 0x340c: "fr_ML", # French - Mali
+ 0x380c: "fr_MA", # French - Morocco
+ 0x3c0c: "fr_HT", # French - Haiti
+ 0x0062: "fy", # Frisian
0x0462: "fy_NL", # Frisian - Netherlands
- 0x0456: "gl_ES", # Galician
- 0x0437: "ka_GE", # Georgian
+ 0x0067: "ff", # Fulah
+ 0x7c67: "ff", # Fulah (Latin)
+ 0x0467: "ff_NG",
+ 0x0867: "ff_SN", # Fulah - Senegal
+ 0x0056: "gl", # Galician
+ 0x0456: "gl_ES", # Galician - Spain
+ 0x0037: "ka", # Georgian
+ 0x0437: "ka_GE", # Georgian - Georgia
+ 0x0007: "de", # German
0x0407: "de_DE", # German - Germany
0x0807: "de_CH", # German - Switzerland
0x0c07: "de_AT", # German - Austria
0x1007: "de_LU", # German - Luxembourg
0x1407: "de_LI", # German - Liechtenstein
- 0x0408: "el_GR", # Greek
+ 0x0008: "el", # Greek
+ 0x0408: "el_GR", # Greek - Greece
+ 0x006f: "kl", # Greenlandic
0x046f: "kl_GL", # Greenlandic - Greenland
- 0x0447: "gu_IN", # Gujarati
- 0x0468: "ha_NG", # Hausa - Latin
- 0x040d: "he_IL", # Hebrew
- 0x0439: "hi_IN", # Hindi
- 0x040e: "hu_HU", # Hungarian
- 0x040f: "is_IS", # Icelandic
- 0x0421: "id_ID", # Indonesian
- 0x045d: "iu_CA", # Inuktitut - Syllabics
- 0x085d: "iu_CA", # Inuktitut - Latin
+ 0x0074: "gn", # Guarani
+ 0x0474: "gn_PY", # Guarani - Paraguay
+ 0x0047: "gu", # Gujarati
+ 0x0447: "gu_IN", # Gujarati - India
+ 0x0068: "ha", # Hausa (Latin)
+ 0x7c68: "ha", # Hausa (Latin)
+ 0x0468: "ha_NG", # Hausa (Latin) - Nigeria
+ 0x0075: "haw", # Hawaiian
+ 0x0475: "haw_US", # Hawaiian - United States
+ 0x000d: "he", # Hebrew
+ 0x040d: "he_IL", # Hebrew - Israel
+ 0x0039: "hi", # Hindi
+ 0x0439: "hi_IN", # Hindi - India
+ 0x000e: "hu", # Hungarian
+ 0x040e: "hu_HU", # Hungarian - Hungary
+ 0x000f: "is", # Icelandic
+ 0x040f: "is_IS", # Icelandic - Iceland
+ 0x0070: "ig", # Igbo
+ 0x0470: "ig_NG", # Igbo - Nigeria
+ 0x0021: "id", # Indonesian
+ 0x0421: "id_ID", # Indonesian - Indonesia
+ 0x005d: "iu", # Inuktitut (Latin)
+ 0x785d: "iu", # Inuktitut (Syllabics)
+ 0x7c5d: "iu", # Inuktitut (Latin)
+ 0x045d: "iu_CA", # Inuktitut (Syllabics) - Canada
+ 0x085d: "iu_CA", # Inuktitut (Latin) - Canada
+ 0x003c: "ga", # Irish
0x083c: "ga_IE", # Irish - Ireland
+ 0x0010: "it", # Italian
0x0410: "it_IT", # Italian - Italy
0x0810: "it_CH", # Italian - Switzerland
- 0x0411: "ja_JP", # Japanese
+ 0x0011: "ja", # Japanese
+ 0x0411: "ja_JP", # Japanese - Japan
+ 0x004b: "kn", # Kannada
0x044b: "kn_IN", # Kannada - India
- 0x043f: "kk_KZ", # Kazakh
- 0x0453: "kh_KH", # Khmer - Cambodia
- 0x0486: "qut_GT",# K'iche - Guatemala
+ 0x0471: "kr_NG", # Kanuri (Latin) - Nigeria
+ 0x0060: "ks", # Kashmiri
+ 0x0460: "ks", # Kashmiri - Perso_Arabic
+ 0x0860: "ks_IN", # Kashmiri (Devanagari) - India
+ 0x003f: "kk", # Kazakh
+ 0x043f: "kk_KZ", # Kazakh - Kazakhstan
+ 0x0053: "km", # Khmer
+ 0x0453: "km_KH", # Khmer - Cambodia
+ 0x0087: "rw", # Kinyarwanda
0x0487: "rw_RW", # Kinyarwanda - Rwanda
- 0x0457: "kok_IN",# Konkani
- 0x0412: "ko_KR", # Korean
- 0x0440: "ky_KG", # Kyrgyz
- 0x0454: "lo_LA", # Lao - Lao PDR
- 0x0426: "lv_LV", # Latvian
- 0x0427: "lt_LT", # Lithuanian
- 0x082e: "dsb_DE",# Lower Sorbian - Germany
- 0x046e: "lb_LU", # Luxembourgish
- 0x042f: "mk_MK", # FYROM Macedonian
+ 0x0041: "sw", # Kiswahili
+ 0x0441: "sw_KE", # Kiswahili - Kenya
+ 0x0057: "kok", # Konkani
+ 0x0457: "kok_IN", # Konkani - India
+ 0x0012: "ko", # Korean
+ 0x0412: "ko_KR", # Korean - Korea
+ 0x0040: "ky", # Kyrgyz
+ 0x0440: "ky_KG", # Kyrgyz - Kyrgyzstan
+ 0x0054: "lo", # Lao
+ 0x0454: "lo_LA", # Lao - Lao P.D.R.
+ 0x0476: "la_VA", # Latin - Vatican City
+ 0x0026: "lv", # Latvian
+ 0x0426: "lv_LV", # Latvian - Latvia
+ 0x0027: "lt", # Lithuanian
+ 0x0427: "lt_LT", # Lithuanian - Lithuania
+ 0x7c2e: "dsb", # Lower Sorbian
+ 0x082e: "dsb_DE", # Lower Sorbian - Germany
+ 0x006e: "lb", # Luxembourgish
+ 0x046e: "lb_LU", # Luxembourgish - Luxembourg
+ 0x002f: "mk", # Macedonian
+ 0x042f: "mk_MK", # Macedonian - North Macedonia
+ 0x003e: "ms", # Malay
0x043e: "ms_MY", # Malay - Malaysia
0x083e: "ms_BN", # Malay - Brunei Darussalam
+ 0x004c: "ml", # Malayalam
0x044c: "ml_IN", # Malayalam - India
- 0x043a: "mt_MT", # Maltese
- 0x0481: "mi_NZ", # Maori
- 0x047a: "arn_CL",# Mapudungun
- 0x044e: "mr_IN", # Marathi
- 0x047c: "moh_CA",# Mohawk - Canada
- 0x0450: "mn_MN", # Mongolian - Cyrillic
- 0x0850: "mn_CN", # Mongolian - PRC
- 0x0461: "ne_NP", # Nepali
- 0x0414: "nb_NO", # Norwegian - Bokmal
- 0x0814: "nn_NO", # Norwegian - Nynorsk
+ 0x003a: "mt", # Maltese
+ 0x043a: "mt_MT", # Maltese - Malta
+ 0x0081: "mi", # Maori
+ 0x0481: "mi_NZ", # Maori - New Zealand
+ 0x007a: "arn", # Mapudungun
+ 0x047a: "arn_CL", # Mapudungun - Chile
+ 0x004e: "mr", # Marathi
+ 0x044e: "mr_IN", # Marathi - India
+ 0x007c: "moh", # Mohawk
+ 0x047c: "moh_CA", # Mohawk - Canada
+ 0x0050: "mn", # Mongolian (Cyrillic)
+ 0x7850: "mn", # Mongolian (Cyrillic)
+ 0x7c50: "mn", # Mongolian (Traditional Mongolian)
+ 0x0450: "mn_MN", # Mongolian (Cyrillic) - Mongolia
+ 0x0c50: "mn_MN", # Mongolian (Traditional Mongolian) - Mongolia
+ 0x0061: "ne", # Nepali
+ 0x0461: "ne_NP", # Nepali - Nepal
+ 0x0861: "ne_IN", # Nepali - India
+ 0x0014: "no", # Norwegian (Bokmal)
+ 0x0414: "nb_NO", # Norwegian (Bokmal) - Norway
+ 0x0814: "nn_NO", # Norwegian (Nynorsk) - Norway
+ 0x7814: "nn", # Norwegian (Nynorsk)
+ 0x7c14: "nb", # Norwegian (Bokmal)
+ 0x0082: "oc", # Occitan
0x0482: "oc_FR", # Occitan - France
- 0x0448: "or_IN", # Oriya - India
+ 0x0048: "or", # Odia
+ 0x0448: "or_IN", # Odia - India
+ 0x0072: "om", # Oromo
+ 0x0472: "om_ET", # Oromo - Ethiopia
+ 0x0063: "ps", # Pashto
0x0463: "ps_AF", # Pashto - Afghanistan
- 0x0429: "fa_IR", # Persian
- 0x0415: "pl_PL", # Polish
+ 0x0029: "fa", # Persian
+ 0x0429: "fa_IR", # Persian - Iran
+ 0x0015: "pl", # Polish
+ 0x0415: "pl_PL", # Polish - Poland
+ 0x0016: "pt", # Portuguese
0x0416: "pt_BR", # Portuguese - Brazil
0x0816: "pt_PT", # Portuguese - Portugal
- 0x0446: "pa_IN", # Punjabi
- 0x046b: "quz_BO",# Quechua (Bolivia)
- 0x086b: "quz_EC",# Quechua (Ecuador)
- 0x0c6b: "quz_PE",# Quechua (Peru)
+ 0x0046: "pa", # Punjabi
+ 0x7c46: "pa", # Punjabi
+ 0x0446: "pa_IN", # Punjabi - India
+ 0x0846: "pa_PK", # Punjabi - Islamic Republic of Pakistan
+ 0x006b: "quz", # Quechua
+ 0x046b: "quz_BO", # Quechua - Bolivia
+ 0x086b: "quz_EC", # Quechua - Ecuador
+ 0x0c6b: "quz_PE", # Quechua - Peru
+ 0x0018: "ro", # Romanian
0x0418: "ro_RO", # Romanian - Romania
- 0x0417: "rm_CH", # Romansh
- 0x0419: "ru_RU", # Russian
- 0x243b: "smn_FI",# Sami Finland
- 0x103b: "smj_NO",# Sami Norway
- 0x143b: "smj_SE",# Sami Sweden
- 0x043b: "se_NO", # Sami Northern Norway
- 0x083b: "se_SE", # Sami Northern Sweden
- 0x0c3b: "se_FI", # Sami Northern Finland
- 0x203b: "sms_FI",# Sami Skolt
- 0x183b: "sma_NO",# Sami Southern Norway
- 0x1c3b: "sma_SE",# Sami Southern Sweden
- 0x044f: "sa_IN", # Sanskrit
- 0x0c1a: "sr_SP", # Serbian - Cyrillic
- 0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic
- 0x081a: "sr_SP", # Serbian - Latin
- 0x181a: "sr_BA", # Serbian - Bosnia Latin
+ 0x0818: "ro_MD", # Romanian - Moldova
+ 0x0017: "rm", # Romansh
+ 0x0417: "rm_CH", # Romansh - Switzerland
+ 0x0019: "ru", # Russian
+ 0x0419: "ru_RU", # Russian - Russia
+ 0x0819: "ru_MD", # Russian - Moldova
+ 0x0085: "sah", # Sakha
+ 0x0485: "sah_RU", # Sakha - Russia
+ 0x003b: "se", # Sami (Northern)
+ 0x043b: "se_NO", # Sami (Northern) - Norway
+ 0x083b: "se_SE", # Sami (Northern) - Sweden
+ 0x0c3b: "se_FI", # Sami (Northern) - Finland
+ 0x7c3b: "smj", # Sami (Lule)
+ 0x103b: "smj_NO", # Sami (Lule) - Norway
+ 0x143b: "smj_SE", # Sami (Lule) - Sweden
+ 0x783b: "sma", # Sami (Southern)
+ 0x183b: "sma_NO", # Sami (Southern) - Norway
+ 0x1c3b: "sma_SE", # Sami (Southern) - Sweden
+ 0x743b: "sms", # Sami (Skolt)
+ 0x203b: "sms_FI", # Sami (Skolt) - Finland
+ 0x703b: "smn", # Sami (Inari)
+ 0x243b: "smn_FI", # Sami (Inari) - Finland
+ 0x004f: "sa", # Sanskrit
+ 0x044f: "sa_IN", # Sanskrit - India
+ 0x0091: "gd", # Scottish Gaelic
+ 0x0491: "gd_GB", # Scottish Gaelic - United Kingdom
+ 0x6c1a: "sr", # Serbian (Cyrillic)
+ 0x701a: "sr", # Serbian (Latin)
+ 0x7c1a: "sr", # Serbian (Latin)
+ 0x081a: "sr_CS", # Serbian (Latin) - Serbia and Montenegro (Former)
+ 0x0c1a: "sr_CS", # Serbian (Cyrillic) - Serbia and Montenegro (Former)
+ 0x181a: "sr_BA", # Serbian (Latin) - Bosnia and Herzegovina
+ 0x1c1a: "sr_BA", # Serbian (Cyrillic) - Bosnia and Herzegovina
+ 0x241a: "sr_RS", # Serbian (Latin) - Serbia
+ 0x281a: "sr_RS", # Serbian (Cyrillic) - Serbia
+ 0x2c1a: "sr_ME", # Serbian (Latin) - Montenegro
+ 0x301a: "sr_ME", # Serbian (Cyrillic) - Montenegro
+ 0x006c: "nso", # Sesotho sa Leboa
+ 0x046c: "nso_ZA", # Sesotho sa Leboa - South Africa
+ 0x0032: "tn", # Setswana
+ 0x0432: "tn_ZA", # Setswana - South Africa
+ 0x0832: "tn_BW", # Setswana - Botswana
+ 0x0059: "sd", # Sindhi
+ 0x7c59: "sd", # Sindhi
+ 0x0859: "sd_PK", # Sindhi - Islamic Republic of Pakistan
+ 0x005b: "si", # Sinhala
0x045b: "si_LK", # Sinhala - Sri Lanka
- 0x046c: "ns_ZA", # Northern Sotho
- 0x0432: "tn_ZA", # Setswana - Southern Africa
- 0x041b: "sk_SK", # Slovak
- 0x0424: "sl_SI", # Slovenian
+ 0x001b: "sk", # Slovak
+ 0x041b: "sk_SK", # Slovak - Slovakia
+ 0x0024: "sl", # Slovenian
+ 0x0424: "sl_SI", # Slovenian - Slovenia
+ 0x0477: "so_SO", # Somali - Somalia
+ 0x0030: "st", # Sotho
+ 0x0430: "st_ZA", # Sotho - South Africa
+ 0x000a: "es", # Spanish
0x040a: "es_ES", # Spanish - Spain
0x080a: "es_MX", # Spanish - Mexico
- 0x0c0a: "es_ES", # Spanish - Spain (Modern)
+ 0x0c0a: "es_ES", # Spanish - Spain
0x100a: "es_GT", # Spanish - Guatemala
0x140a: "es_CR", # Spanish - Costa Rica
0x180a: "es_PA", # Spanish - Panama
0x1c0a: "es_DO", # Spanish - Dominican Republic
- 0x200a: "es_VE", # Spanish - Venezuela
+ 0x200a: "es_VE", # Spanish - Bolivarian Republic of Venezuela
0x240a: "es_CO", # Spanish - Colombia
0x280a: "es_PE", # Spanish - Peru
0x2c0a: "es_AR", # Spanish - Argentina
0x300a: "es_EC", # Spanish - Ecuador
0x340a: "es_CL", # Spanish - Chile
- 0x380a: "es_UR", # Spanish - Uruguay
+ 0x380a: "es_UY", # Spanish - Uruguay
0x3c0a: "es_PY", # Spanish - Paraguay
0x400a: "es_BO", # Spanish - Bolivia
0x440a: "es_SV", # Spanish - El Salvador
@@ -1687,36 +1851,87 @@ def getpreferredencoding(do_setlocale=True):
0x4c0a: "es_NI", # Spanish - Nicaragua
0x500a: "es_PR", # Spanish - Puerto Rico
0x540a: "es_US", # Spanish - United States
-# 0x0430: "", # Sutu - Not supported
- 0x0441: "sw_KE", # Swahili
+ 0x5c0a: "es_CU", # Spanish - Cuba
+ 0x001d: "sv", # Swedish
0x041d: "sv_SE", # Swedish - Sweden
0x081d: "sv_FI", # Swedish - Finland
- 0x045a: "syr_SY",# Syriac
- 0x0428: "tg_TJ", # Tajik - Cyrillic
- 0x085f: "tmz_DZ",# Tamazight - Latin
- 0x0449: "ta_IN", # Tamil
- 0x0444: "tt_RU", # Tatar
- 0x044a: "te_IN", # Telugu
- 0x041e: "th_TH", # Thai
- 0x0851: "bo_BT", # Tibetan - Bhutan
- 0x0451: "bo_CN", # Tibetan - PRC
- 0x041f: "tr_TR", # Turkish
- 0x0442: "tk_TM", # Turkmen - Cyrillic
- 0x0480: "ug_CN", # Uighur - Arabic
- 0x0422: "uk_UA", # Ukrainian
- 0x042e: "wen_DE",# Upper Sorbian - Germany
- 0x0420: "ur_PK", # Urdu
+ 0x005a: "syr", # Syriac
+ 0x045a: "syr_SY", # Syriac - Syria
+ 0x0028: "tg", # Tajik (Cyrillic)
+ 0x7c28: "tg", # Tajik (Cyrillic)
+ 0x0428: "tg_TJ", # Tajik (Cyrillic) - Tajikistan
+ 0x005f: "tzm", # Tamazight (Latin)
+ 0x785f: "tzm",
+ 0x7c5f: "tzm", # Tamazight (Latin)
+ 0x085f: "tzm_DZ", # Tamazight (Latin) - Algeria
+ 0x045f: "tzm_MA", # Central Atlas Tamazight (Arabic) - Morocco
+ 0x105f: "tzm_MA",
+ 0x0049: "ta", # Tamil
+ 0x0449: "ta_IN", # Tamil - India
+ 0x0849: "ta_LK", # Tamil - Sri Lanka
+ 0x0044: "tt", # Tatar
+ 0x0444: "tt_RU", # Tatar - Russia
+ 0x004a: "te", # Telugu
+ 0x044a: "te_IN", # Telugu - India
+ 0x001e: "th", # Thai
+ 0x041e: "th_TH", # Thai - Thailand
+ 0x0051: "bo", # Tibetan
+ 0x0451: "bo_CN", # Tibetan - People's Republic of China
+ 0x0073: "ti", # Tigrinya
+ 0x0473: "ti_ET", # Tigrinya - Ethiopia
+ 0x0873: "ti_ER", # Tigrinya - Eritrea
+ 0x0031: "ts", # Tsonga
+ 0x0431: "ts_ZA", # Tsonga - South Africa
+ 0x001f: "tr", # Turkish
+ 0x041f: "tr_TR", # Turkish - Turkey
+ 0x0042: "tk", # Turkmen
+ 0x0442: "tk_TM", # Turkmen - Turkmenistan
+ 0x0022: "uk", # Ukrainian
+ 0x0422: "uk_UA", # Ukrainian - Ukraine
+ 0x002e: "hsb", # Upper Sorbian
+ 0x042e: "hsb_DE", # Upper Sorbian - Germany
+ 0x0020: "ur", # Urdu
+ 0x0420: "ur_PK", # Urdu - Islamic Republic of Pakistan
0x0820: "ur_IN", # Urdu - India
- 0x0443: "uz_UZ", # Uzbek - Latin
- 0x0843: "uz_UZ", # Uzbek - Cyrillic
- 0x042a: "vi_VN", # Vietnamese
- 0x0452: "cy_GB", # Welsh
+ 0x0080: "ug", # Uyghur
+ 0x0480: "ug_CN", # Uyghur - People's Republic of China
+ 0x0043: "uz", # Uzbek (Latin)
+ 0x7843: "uz", # Uzbek (Cyrillic)
+ 0x7c43: "uz", # Uzbek (Latin)
+ 0x0443: "uz_UZ", # Uzbek (Latin) - Uzbekistan
+ 0x0033: "ve", # Venda
+ 0x0433: "ve_ZA", # Venda - South Africa
+ 0x002a: "vi", # Vietnamese
+ 0x042a: "vi_VN", # Vietnamese - Vietnam
+ 0x0052: "cy", # Welsh
+ 0x0452: "cy_GB", # Welsh - United Kingdom
+ 0x0088: "wo", # Wolof
0x0488: "wo_SN", # Wolof - Senegal
+ 0x0034: "xh", # Xhosa
0x0434: "xh_ZA", # Xhosa - South Africa
- 0x0485: "sah_RU",# Yakut - Cyrillic
- 0x0478: "ii_CN", # Yi - PRC
+ 0x0078: "ii", # Yi
+ 0x0478: "ii_CN", # Yi - People's Republic of China
+ 0x043d: "yi_001", # Yiddish - World
+ 0x006a: "yo", # Yoruba
0x046a: "yo_NG", # Yoruba - Nigeria
- 0x0435: "zu_ZA", # Zulu
+ 0x0035: "zu", # Zulu
+ 0x0435: "zu_ZA", # Zulu - South Africa
+ 0x0086: "qut",
+
+# 0x0001007f: "x-IV-mathan", # math alphanumeric sorting
+ 0x00010407: "de_DE",
+ 0x0001040e: "hu_HU",
+ 0x00010437: "ka_GE",
+ 0x00020804: "zh_CN",
+ 0x00021004: "zh_SG",
+ 0x00021404: "zh_MO",
+ 0x00030404: "zh_TW",
+ 0x00040404: "zh_TW",
+ 0x00040411: "ja_JP",
+ 0x00040c04: "zh_HK",
+ 0x00041404: "zh_MO",
+ 0x00050804: "zh_CN",
+ 0x00051004: "zh_SG",
}
def _print_locale():
diff --git a/Lib/ntpath.py b/Lib/ntpath.py
index 01f060e70be..eb127ec2632 100644
--- a/Lib/ntpath.py
+++ b/Lib/ntpath.py
@@ -152,12 +152,14 @@ def splitdrive(p):
It is always true that:
result[0] + result[1] == p
- If the path contained a drive letter, drive_or_unc will contain everything
- up to and including the colon. e.g. splitdrive("c:/dir") returns ("c:", "/dir")
+ If the path contained a drive letter, drive_or_unc will contain
+ everything up to and including the colon. e.g. splitdrive("c:/dir")
+ returns ("c:", "/dir")
- If the path contained a UNC path, the drive_or_unc will contain the host name
- and share up to but not including the fourth directory separator character.
- e.g. splitdrive("//host/computer/dir") returns ("//host/computer", "/dir")
+ If the path contained a UNC path, the drive_or_unc will contain the
+ host name and share up to but not including the fourth directory
+ separator character. e.g. splitdrive("//host/computer/dir") returns
+ ("//host/computer", "/dir")
Paths cannot contain both a drive letter and a UNC path.
@@ -222,8 +224,8 @@ def splitroot(p):
def split(p):
"""Split a pathname.
- Return tuple (head, tail) where tail is everything after the final slash.
- Either part may be empty."""
+ Return tuple (head, tail) where tail is everything after the final
+ slash. Either part may be empty."""
p = os.fspath(p)
seps = _get_bothseps(p)
d, r, p = splitroot(p)
diff --git a/Lib/os.py b/Lib/os.py
index ac03b416390..9bb00f45f01 100644
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -211,11 +211,11 @@ def _add(str, fn):
def makedirs(name, mode=0o777, exist_ok=False):
"""makedirs(name [, mode=0o777][, exist_ok=False])
- Super-mkdir; create a leaf directory and all intermediate ones. Works like
- mkdir, except that any intermediate path segment (not just the rightmost)
- will be created if it does not exist. If the target directory already
- exists, raise an OSError if exist_ok is False. Otherwise no exception is
- raised. This is recursive.
+ Super-mkdir; create a leaf directory and all intermediate ones. Works
+ like mkdir, except that any intermediate path segment (not just the
+ rightmost) will be created if it does not exist. If the target
+ directory already exists, raise an OSError if exist_ok is False.
+ Otherwise no exception is raised. This is recursive.
"""
head, tail = path.split(name)
@@ -303,12 +303,12 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
dirpath, dirnames, filenames
dirpath is a string, the path to the directory. dirnames is a list of
- the names of the subdirectories in dirpath (including symlinks to directories,
- and excluding '.' and '..').
+ the names of the subdirectories in dirpath (including symlinks to
+ directories, and excluding '.' and '..').
filenames is a list of the names of the non-directory files in dirpath.
- Note that the names in the lists are just names, with no path components.
- To get a full path (which begins with top) to a file or directory in
- dirpath, do os.path.join(dirpath, name).
+ Note that the names in the lists are just names, with no path
+ components. To get a full path (which begins with top) to a file or
+ directory in dirpath, do os.path.join(dirpath, name).
If optional arg 'topdown' is true or not specified, the triple for a
directory is generated before the triples for any of its subdirectories
@@ -318,13 +318,13 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
When topdown is true, the caller can modify the dirnames list in-place
(e.g., via del or slice assignment), and walk will only recurse into the
- subdirectories whose names remain in dirnames; this can be used to prune the
- search, or to impose a specific order of visiting. Modifying dirnames when
- topdown is false has no effect on the behavior of os.walk(), since the
- directories in dirnames have already been generated by the time dirnames
- itself is generated. No matter the value of topdown, the list of
- subdirectories is retrieved before the tuples for the directory and its
- subdirectories are generated.
+ subdirectories whose names remain in dirnames; this can be used to prune
+ the search, or to impose a specific order of visiting. Modifying
+ dirnames when topdown is false has no effect on the behavior of
+ os.walk(), since the directories in dirnames have already been generated
+ by the time dirnames itself is generated. No matter the value of
+ topdown, the list of subdirectories is retrieved before the tuples for
+ the directory and its subdirectories are generated.
By default errors from the os.scandir() call are ignored. If
optional arg 'onerror' is specified, it should be a function; it
@@ -449,9 +449,9 @@ def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=
The advantage of fwalk() over walk() is that it's safe against symlink
races (when follow_symlinks is False).
- If dir_fd is not None, it should be a file descriptor open to a directory,
- and top should be relative; top will then be relative to that directory.
- (dir_fd is always supported for fwalk.)
+ If dir_fd is not None, it should be a file descriptor open to
+ a directory, and top should be relative; top will then be relative to
+ that directory. (dir_fd is always supported for fwalk.)
Caution:
Since fwalk() yields file descriptors, those are only valid until the
diff --git a/Lib/pydoc_data/module_docs.py b/Lib/pydoc_data/module_docs.py
index 67a1ba769af..69b9046f49f 100644
--- a/Lib/pydoc_data/module_docs.py
+++ b/Lib/pydoc_data/module_docs.py
@@ -1,4 +1,4 @@
-# Autogenerated by Sphinx on Sun May 10 13:21:26 2026
+# Autogenerated by Sphinx on Wed Jun 10 13:03:46 2026
# as part of the release process.
module_docs = {
diff --git a/Lib/pydoc_data/topics.py b/Lib/pydoc_data/topics.py
index e91c8a5cb94..05d283c9540 100644
--- a/Lib/pydoc_data/topics.py
+++ b/Lib/pydoc_data/topics.py
@@ -1,4 +1,4 @@
-# Autogenerated by Sphinx on Sun May 10 13:21:26 2026
+# Autogenerated by Sphinx on Wed Jun 10 13:03:46 2026
# as part of the release process.
topics = {
@@ -2227,9 +2227,9 @@ def foo():
The match statement is used for pattern matching. Syntax:
match_stmt: 'match' subject_expr ":" NEWLINE INDENT case_block+ DEDENT
- subject_expr: `!star_named_expression` "," `!star_named_expressions`?
- | `!named_expression`
- case_block: 'case' patterns [guard] ":" `!block`
+ subject_expr: flexible_expression "," [flexible_expression_list [',']]
+ | assignment_expression
+ case_block: 'case' patterns [guard] ":" suite
Note:
@@ -2320,7 +2320,7 @@ def foo():
Guards
------
- guard: "if" `!named_expression`
+ guard: "if" assignment_expression
A "guard" (which is part of the "case") must succeed for code inside
the "case" block to execute. It takes the form: "if" followed by an
@@ -5772,7 +5772,8 @@ class of the instance or a *non-virtual base class* thereof. The
| | is not supported. |
+-----------+------------------------------------------------------------+
-For a locale aware separator, use the "'n'" presentation type instead.
+For a locale-aware separator, use the "'n'" float presentation type or
+integer presentation type instead.
Changed in version 3.1: Added the "','" option (see also **PEP 378**).
@@ -5818,7 +5819,10 @@ class of the instance or a *non-virtual base class* thereof. The
+-----------+------------------------------------------------------------+
| "'n'" | Number. This is the same as "'d'", except that it uses the |
| | current locale setting to insert the appropriate digit |
- | | group separators. |
+ | | group separators. Note that the default locale is not the |
+ | | system locale. Depending on your use case, you may wish to |
+ | | set "LC_NUMERIC" with "locale.setlocale()" before using |
+ | | "'n'". |
+-----------+------------------------------------------------------------+
| None | The same as "'d'". |
+-----------+------------------------------------------------------------+
@@ -5892,7 +5896,10 @@ class of the instance or a *non-virtual base class* thereof. The
+-----------+------------------------------------------------------------+
| "'n'" | Number. This is the same as "'g'", except that it uses the |
| | current locale setting to insert the appropriate digit |
- | | group separators for the integral part of a number. |
+ | | group separators for the integral part of a number. Note |
+ | | that the default locale is not the system locale. |
+ | | Depending on your use case, you may wish to set |
+ | | "LC_NUMERIC" with "locale.setlocale()" before using "'n'". |
+-----------+------------------------------------------------------------+
| "'%'" | Percentage. Multiplies the number by 100 and displays in |
| | fixed ("'f'") format, followed by a percent sign. |
@@ -10198,9 +10205,22 @@ class is used in a class pattern with positional arguments, each
decimal characters and digits that need special handling, such as
the compatibility superscript digits. This covers digits which
cannot be used to form numbers in base 10, like the Kharosthi
- numbers. Formally, a digit is a character that has the property
+ numbers. Formally, a digit is a character that has the property
value Numeric_Type=Digit or Numeric_Type=Decimal.
+ For example:
+
+ >>> '0123456789'.isdigit()
+ True
+ >>> '٠١٢٣٤٥٦٧٨٩'.isdigit() # Arabic-Indic digits zero to nine
+ True
+ >>> '⅕'.isdigit() # Vulgar fraction one fifth
+ False
+ >>> '²'.isdecimal(), '²'.isdigit(), '²'.isnumeric()
+ (False, True, True)
+
+ See also "isdecimal()" and "isnumeric()".
+
str.isidentifier()
Return "True" if the string is a valid identifier according to the
@@ -10236,15 +10256,14 @@ class is used in a class pattern with positional arguments, each
>>> '0123456789'.isnumeric()
True
- >>> '٠١٢٣٤٥٦٧٨٩'.isnumeric() # Arabic-indic digit zero to nine
+ >>> '٠١٢٣٤٥٦٧٨٩'.isnumeric() # Arabic-Indic digits zero to nine
True
>>> '⅕'.isnumeric() # Vulgar fraction one fifth
True
>>> '²'.isdecimal(), '²'.isdigit(), '²'.isnumeric()
(False, True, True)
- See also "isdecimal()" and "isdigit()". Numeric characters are a
- superset of decimal numbers.
+ See also "isdecimal()" and "isdigit()".
str.isprintable()
@@ -10626,7 +10645,7 @@ class is used in a class pattern with positional arguments, each
>>> " foo ".split(maxsplit=0)
['foo ']
- See also "join()".
+ See also "join()" and "rsplit()".
str.splitlines(keepends=False)
@@ -10716,6 +10735,8 @@ class is used in a class pattern with positional arguments, each
not a prefix or suffix; rather, all combinations of its values are
stripped.
+ Whitespace characters are defined by "str.isspace()".
+
For example:
>>> ' spacious '.strip()
@@ -13311,6 +13332,9 @@ class dict(iterable, /, **kwargs)
insertion order. This behavior was an implementation detail of
CPython from 3.6.
+ Dictionaries are generic over two types, signifying (respectively)
+ the types of the dictionary’s keys and values.
+
These are the operations that dictionaries support (and therefore,
custom mapping types should support too):
@@ -14015,6 +14039,8 @@ class list(iterable=(), /)
Many other operations also produce lists, including the "sorted()"
built-in.
+ Lists are generic over the types of their items.
+
Lists implement all of the common and mutable sequence operations.
Lists also provide the following additional method:
@@ -14105,6 +14131,10 @@ class tuple(iterable=(), /)
Tuples implement all of the common sequence operations.
+ Tuples are generic over the types of their contents. For more
+ information, refer to the typing documentation on annotating
+ tuples.
+
For heterogeneous collections of data where access by name is clearer
than access by index, "collections.namedtuple()" may be a more
appropriate choice than a simple tuple object.
diff --git a/Lib/rlcompleter.py b/Lib/rlcompleter.py
index 23eb0020f42..e75dd0a9e3d 100644
--- a/Lib/rlcompleter.py
+++ b/Lib/rlcompleter.py
@@ -34,6 +34,7 @@
import inspect
import keyword
import re
+import types
import __main__
import warnings
@@ -178,14 +179,14 @@ def attr_matches(self, text):
if (word[:n] == attr and
not (noprefix and word[:n+1] == noprefix)):
match = "%s.%s" % (expr, word)
- if isinstance(getattr(type(thisobject), word, None),
- property):
- # bpo-44752: thisobject.word is a method decorated by
- # `@property`. What follows applies a postfix if
- # thisobject.word is callable, but know we know that
- # this is not callable (because it is a property).
- # Also, getattr(thisobject, word) will evaluate the
- # property method, which is not desirable.
+
+ class_attr = getattr(type(thisobject), word, None)
+ if isinstance(
+ class_attr,
+ (property, types.GetSetDescriptorType, types.MemberDescriptorType)
+ ) or (hasattr(class_attr, '__get__') and not callable(class_attr)):
+ # Avoid evaluating descriptors, which could run
+ # arbitrary code or raise exceptions.
matches.append(match)
continue
if (value := getattr(thisobject, word, None)) is not None:
diff --git a/Lib/shutil.py b/Lib/shutil.py
index b7608f7edfc..a3a112f6954 100644
--- a/Lib/shutil.py
+++ b/Lib/shutil.py
@@ -885,10 +885,14 @@ def move(src, dst, copy_function=copy2):
If dst already exists but is not a directory, it may be overwritten
depending on os.rename() semantics.
- If the destination is on our current filesystem, then rename() is used.
- Otherwise, src is copied to the destination and then removed. Symlinks are
- recreated under the new name if os.rename() fails because of cross
- filesystem renames.
+ os.rename() is preferably used if the source and destination are on the
+ same filesystem. In case os.rename() fails due to OSError (e.g. the user
+ has write permission to *dst* file but not to its parent directory),
+ this method falls back to using *copy_function* silently.
+ Symlinks are also recreated under the new name if os.rename() fails
+ because of cross filesystem renames.
+
+ It's recommended to use os.rename() if atomic move is strictly required.
The optional `copy_function` argument is a callable that will be used
to copy the source or it will be delegated to `copytree`.
@@ -940,8 +944,8 @@ def move(src, dst, copy_function=copy2):
return real_dst
def _destinsrc(src, dst):
- src = os.path.abspath(src)
- dst = os.path.abspath(dst)
+ src = os.path.realpath(src)
+ dst = os.path.realpath(dst)
if not src.endswith(os.path.sep):
src += os.path.sep
if not dst.endswith(os.path.sep):
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 414aefe9744..e6734db24f6 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -498,7 +498,7 @@ def _init_read_gz(self):
if flag & 4:
xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
- self.read(xlen)
+ self.__read(xlen)
if flag & 8:
while True:
s = self.__read(1)
@@ -830,16 +830,22 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
if member.islnk() or member.issym():
if os.path.isabs(member.linkname):
raise AbsoluteLinkError(member)
+ # A link member that resolves to the destination directory itself
+ # would replace it with a (sym)link, redirecting the destination
+ # for all subsequent members.
+ if target_path == dest_path:
+ raise OutsideDestinationError(member, target_path)
normalized = os.path.normpath(member.linkname)
if normalized != member.linkname:
new_attrs['linkname'] = normalized
if member.issym():
- target_path = os.path.join(dest_path,
- os.path.dirname(name),
- member.linkname)
+ # The symlink is created at `name` with trailing separators
+ # stripped, so its target is relative to the directory
+ # containing that path.
+ link_dir = os.path.dirname(name.rstrip('/' + os.sep))
+ target_path = os.path.join(dest_path, link_dir, normalized)
else:
- target_path = os.path.join(dest_path,
- member.linkname)
+ target_path = os.path.join(dest_path, normalized)
target_path = os.path.realpath(target_path,
strict=os.path.ALLOW_MISSING)
if os.path.commonpath([target_path, dest_path]) != dest_path:
@@ -893,11 +899,14 @@ class TarInfo(object):
size = 'Size in bytes.',
mtime = 'Time of last modification.',
chksum = 'Header checksum.',
- type = ('File type. type is usually one of these constants: '
- 'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
- 'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
+ type = ('File type. type is usually one of these constants: '
+ 'REGTYPE,\n'
+ 'AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
+ 'CONTTYPE, CHRTYPE,\n'
+ 'BLKTYPE, GNUTYPE_SPARSE.'),
linkname = ('Name of the target file name, which is only present '
- 'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
+ 'in TarInfo\n'
+ 'objects of type LNKTYPE and SYMTYPE.'),
uname = 'User name.',
gname = 'Group name.',
devmajor = 'Device major number.',
@@ -905,7 +914,8 @@ class TarInfo(object):
offset = 'The tar header starts here.',
offset_data = "The file's data starts here.",
pax_headers = ('A dictionary containing key-value pairs of an '
- 'associated pax extended header.'),
+ 'associated pax\n'
+ 'extended header.'),
sparse = 'Sparse member information.',
_tarfile = None,
_sparse_structs = None,
@@ -2267,10 +2277,11 @@ def gettarinfo(self, name=None, arcname=None, fileobj=None):
return tarinfo
def list(self, verbose=True, *, members=None):
- """Print a table of contents to sys.stdout. If 'verbose' is False, only
- the names of the members are printed. If it is True, an 'ls -l'-like
- output is produced. 'members' is optional and must be a subset of the
- list returned by getmembers().
+ """Print a table of contents to sys.stdout.
+
+ If 'verbose' is False, only the names of the members are printed.
+ If it is True, an 'ls -l'-like output is produced. 'members' is
+ optional and must be a subset of the list returned by getmembers().
"""
# Convert tarinfo type to stat type.
type2mode = {REGTYPE: stat.S_IFREG, SYMTYPE: stat.S_IFLNK,
@@ -2361,10 +2372,12 @@ def add(self, name, arcname=None, recursive=True, *, filter=None):
self.addfile(tarinfo)
def addfile(self, tarinfo, fileobj=None):
- """Add the TarInfo object 'tarinfo' to the archive. If 'tarinfo' represents
- a non zero-size regular file, the 'fileobj' argument should be a binary file,
- and tarinfo.size bytes are read from it and added to the archive.
- You can create TarInfo objects directly, or by using gettarinfo().
+ """Add the TarInfo object 'tarinfo' to the archive.
+
+ If 'tarinfo' represents a non zero-size regular file, the 'fileobj'
+ argument should be a binary file, and tarinfo.size bytes are read
+ from it and added to the archive. You can create TarInfo objects
+ directly, or by using gettarinfo().
"""
self._check("awx")
From 940f11343fcb935768e9a698a8e090c770c5216e Mon Sep 17 00:00:00 2001
From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com>
Date: Fri, 12 Jun 2026 10:45:52 +0300
Subject: [PATCH 2/5] Update test related
---
Lib/test/datetimetester.py | 30 ++++++++++++++++++++++++++++++
Lib/test/support/__init__.py | 15 +++++++++++++++
2 files changed, 45 insertions(+)
diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py
index c6534adb1fc..c1bb6138a1b 100644
--- a/Lib/test/datetimetester.py
+++ b/Lib/test/datetimetester.py
@@ -7313,6 +7313,36 @@ def func():
self.assertEqual(out, b"a" * 8)
self.assertEqual(err, b"")
+ @support.cpython_only
+ @support.subTests(("setup", "call"), [
+ ("obj = _datetime.timedelta", "obj(seconds=2)"),
+ ("obj = _datetime.timedelta(seconds=2)", "obj.total_seconds()"),
+ ("obj = _datetime.date(2026, 6, 7)", "obj.isocalendar()"),
+ ])
+ def test_static_datetime_types_outlive_collected_module(self, setup, call):
+ # gh-151039: This code used to crash
+ script = f"""if True:
+ import sys, gc
+ import _datetime
+
+ {setup} # static C type, survives the module
+ del sys.modules['_datetime']
+ del _datetime
+ sys.modules['_datetime'] = None # block re-import
+ gc.collect() # module object is collected
+
+ try:
+ {call} # used to be a segmentation fault
+ except ImportError:
+ pass
+ else:
+ raise AssertionError("ImportError not raised")
+ """
+ rc, out, err = script_helper.assert_python_ok("-c", script)
+ self.assertEqual(rc, 0)
+ self.assertEqual(out, b'')
+ self.assertEqual(err, b'')
+
def load_tests(loader, standard_tests, pattern):
standard_tests.addTest(ZoneInfoCompleteTest())
diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py
index 6635ec3474e..701d34bba2d 100644
--- a/Lib/test/support/__init__.py
+++ b/Lib/test/support/__init__.py
@@ -3227,3 +3227,18 @@ def control_characters_c0() -> list[str]:
C0 control characters defined as the byte range 0x00-0x1F, and 0x7F.
"""
return [chr(c) for c in range(0x00, 0x20)] + ["\x7F"]
+
+
+STATUS_DLL_INIT_FAILED = 0xC0000142
+def skip_on_low_desktop_heap_memory_subprocess(returncode):
+ if sys.platform not in ('win32', 'cygwin'):
+ return
+ # On Windows, STATUS_DLL_INIT_FAILED is a generic error code that could
+ # come from any of the DLLs being loaded when a new Python process is
+ # created. In practice, it's likely a memory allocation failure in the
+ # desktop heap memory which caused the DLL init failure, especially on
+ # process created with CREATE_NEW_CONSOLE creation flag. See the article:
+ # https://learn.microsoft.com/en-us/troubleshoot/windows-server/performance/desktop-heap-limitation-out-of-memory
+ if returncode == STATUS_DLL_INIT_FAILED:
+ raise unittest.SkipTest('gh-150436: DLL init failed, likely because '
+ 'of low desktop heap memory')
From bacd63d61056106a676f906fdfe069a2ebeae73f Mon Sep 17 00:00:00 2001
From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com>
Date: Fri, 12 Jun 2026 10:48:05 +0300
Subject: [PATCH 3/5] Update more tests
---
Lib/test/test_http_cookies.py | 31 ++++++++++++++++---------------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/Lib/test/test_http_cookies.py b/Lib/test/test_http_cookies.py
index ab01d68d1d3..2379dad1976 100644
--- a/Lib/test/test_http_cookies.py
+++ b/Lib/test/test_http_cookies.py
@@ -1,11 +1,11 @@
# Simple test suite for http/cookies.py
-import base64
import copy
import unittest
import doctest
from http import cookies
import pickle
from test import support
+import urllib.parse
class CookieTests(unittest.TestCase):
@@ -131,8 +131,8 @@ def test_unquote(self):
@support.requires_resource('cpu')
def test_unquote_large(self):
- # n = 10**6
- n = 10**4 # XXX: RUSTPYTHON; This takes more than 10 minutes to run. lower to 4
+ n = 10**6
+ n = 10**4 # TODO: RUSTPYTHON; This takes more than 10 minutes to run. lower to 4
for encoded in r'\\', r'\134':
with self.subTest(encoded):
data = 'a="b=' + encoded*n + ';"'
@@ -153,19 +153,19 @@ def test_load(self):
self.assertEqual(C.output(['path']),
'Set-Cookie: Customer="WILE_E_COYOTE"; Path=/acme')
- cookie_encoded = base64.b64encode(b'Customer="WILE_E_COYOTE"; Path=/acme; Version=1').decode('ascii')
+ cookie_encoded = urllib.parse.quote('Customer="WILE_E_COYOTE"; Path=/acme; Version=1', safe='', encoding='utf-8')
self.assertEqual(C.js_output(), fr"""
""")
- cookie_encoded = base64.b64encode(b'Customer="WILE_E_COYOTE"; Path=/acme').decode('ascii')
+ cookie_encoded = urllib.parse.quote('Customer="WILE_E_COYOTE"; Path=/acme', safe='', encoding='utf-8')
self.assertEqual(C.js_output(['path']), fr"""
""")
@@ -270,19 +270,19 @@ def test_quoted_meta(self):
self.assertEqual(C.output(['path']),
'Set-Cookie: Customer="WILE_E_COYOTE"; Path=/acme')
- expected_encoded_cookie = base64.b64encode(b'Customer=\"WILE_E_COYOTE\"; Path=/acme; Version=1').decode('ascii')
+ expected_encoded_cookie = urllib.parse.quote('Customer=\"WILE_E_COYOTE\"; Path=/acme; Version=1', safe='', encoding='utf-8')
self.assertEqual(C.js_output(), fr"""
""")
- expected_encoded_cookie = base64.b64encode(b'Customer=\"WILE_E_COYOTE\"; Path=/acme').decode('ascii')
+ expected_encoded_cookie = urllib.parse.quote('Customer=\"WILE_E_COYOTE\"; Path=/acme', safe='', encoding='utf-8')
self.assertEqual(C.js_output(['path']), fr"""
""")
@@ -373,13 +373,14 @@ def test_setter(self):
self.assertEqual(
M.output(),
"Set-Cookie: %s=%s; Path=/foo" % (i, "%s_coded_val" % i))
- expected_encoded_cookie = base64.b64encode(
- ("%s=%s; Path=/foo" % (i, "%s_coded_val" % i)).encode("ascii")
- ).decode('ascii')
+ expected_encoded_cookie = urllib.parse.quote(
+ "%s=%s; Path=/foo" % (i, "%s_coded_val" % i),
+ safe='', encoding='utf-8',
+ )
expected_js_output = """
""" % (expected_encoded_cookie,)
From 9e853e79dd60489892512c23fd7843a3944c42bc Mon Sep 17 00:00:00 2001
From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com>
Date: Fri, 12 Jun 2026 10:50:38 +0300
Subject: [PATCH 4/5] Update tests
---
Lib/test/test_json/json_lines.jsonl | 2 +
Lib/test/test_json/test_speedups.py | 61 +++++++++
Lib/test/test_json/test_tool.py | 9 ++
Lib/test/test_os.py | 2 -
Lib/test/test_rlcompleter.py | 52 ++++++++
Lib/test/test_shutil.py | 17 +++
Lib/test/test_tarfile.py | 184 ++++++++++++++++++++++------
7 files changed, 289 insertions(+), 38 deletions(-)
create mode 100644 Lib/test/test_json/json_lines.jsonl
diff --git a/Lib/test/test_json/json_lines.jsonl b/Lib/test/test_json/json_lines.jsonl
new file mode 100644
index 00000000000..d2f29211195
--- /dev/null
+++ b/Lib/test/test_json/json_lines.jsonl
@@ -0,0 +1,2 @@
+{"ingredients":["frog", "water", "chocolate", "glucose"]}
+{"ingredients":["chocolate","steel bolts"]}
diff --git a/Lib/test/test_json/test_speedups.py b/Lib/test/test_json/test_speedups.py
index 370a2539d10..26c63eef600 100644
--- a/Lib/test/test_json/test_speedups.py
+++ b/Lib/test/test_json/test_speedups.py
@@ -1,4 +1,5 @@
from test.test_json import CTest
+from test.support import gc_collect
import unittest # XXX: RUSTPYTHON; importing to be able to skip tests
@@ -117,3 +118,63 @@ def test_current_indent_level(self):
self.assertEqual(enc(['spam', {'ham': 'eggs'}], 3)[0], expected2)
self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}], 3.0)
self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}])
+
+ def test_mutate_dict_items_during_encode(self):
+ # gh-142831: Clearing the items list via a re-entrant key encoder
+ # must not cause a use-after-free. BadDict.items() returns a
+ # mutable list; encode_str clears it while iterating.
+ items = None
+
+ class BadDict(dict):
+ def items(self):
+ nonlocal items
+ items = [("boom", object())]
+ return items
+
+ cleared = False
+ def encode_str(obj):
+ nonlocal items, cleared
+ if items is not None:
+ items.clear()
+ items = None
+ cleared = True
+ gc_collect()
+ return '"x"'
+
+ encoder = self.json.encoder.c_make_encoder(
+ None, lambda o: "null",
+ encode_str, None,
+ ": ", ", ", False,
+ False, True
+ )
+
+ # Must not crash (use-after-free under ASan before fix)
+ encoder(BadDict(real=1), 0)
+ self.assertTrue(cleared)
+
+ def test_mutate_list_during_encode(self):
+ # gh-142831: Clearing a list mid-iteration via the default
+ # callback must not cause a use-after-free.
+ call_count = 0
+ lst = [object() for _ in range(10)]
+
+ def default(obj):
+ nonlocal call_count
+ call_count += 1
+ if call_count == 3:
+ lst.clear()
+ gc_collect()
+ return None
+
+ encoder = self.json.encoder.c_make_encoder(
+ None, default,
+ self.json.encoder.c_encode_basestring, None,
+ ": ", ", ", False,
+ False, True
+ )
+
+ # Must not crash (use-after-free under ASan before fix)
+ encoder(lst, 0)
+ # Verify the mutation path was actually hit and the loop
+ # stopped iterating after the list was cleared.
+ self.assertEqual(call_count, 3)
diff --git a/Lib/test/test_json/test_tool.py b/Lib/test/test_json/test_tool.py
index 7b5d217a215..0a96b318b15 100644
--- a/Lib/test/test_json/test_tool.py
+++ b/Lib/test/test_json/test_tool.py
@@ -1,4 +1,5 @@
import errno
+import pathlib
import os
import sys
import textwrap
@@ -157,6 +158,14 @@ def test_jsonlines(self):
self.assertEqual(process.stdout, self.jsonlines_expect)
self.assertEqual(process.stderr, '')
+ @force_not_colorized
+ def test_jsonlines_from_file(self):
+ jsonl = pathlib.Path(__file__).parent / 'json_lines.jsonl'
+ args = sys.executable, '-m', self.module, '--json-lines', jsonl
+ process = subprocess.run(args, capture_output=True, text=True, check=True)
+ self.assertEqual(process.stdout, self.jsonlines_expect)
+ self.assertEqual(process.stderr, '')
+
def test_help_flag(self):
rc, out, err = assert_python_ok('-m', self.module, '-h',
PYTHON_COLORS='0')
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index cfaf7429dba..5ba6b3de201 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -4043,7 +4043,6 @@ async def test_trailers(self):
@requires_headers_trailers
@requires_32b
async def test_headers_overflow_32bits(self):
- self.server.handler_instance.accumulate = False
with self.assertRaises(OSError) as cm:
await self.async_sendfile(self.sockno, self.fileno, 0, 0,
headers=[b"x" * 2**16] * 2**15)
@@ -4052,7 +4051,6 @@ async def test_headers_overflow_32bits(self):
@requires_headers_trailers
@requires_32b
async def test_trailers_overflow_32bits(self):
- self.server.handler_instance.accumulate = False
with self.assertRaises(OSError) as cm:
await self.async_sendfile(self.sockno, self.fileno, 0, 0,
trailers=[b"x" * 2**16] * 2**15)
diff --git a/Lib/test/test_rlcompleter.py b/Lib/test/test_rlcompleter.py
index 6db31df891b..5ba183dfded 100644
--- a/Lib/test/test_rlcompleter.py
+++ b/Lib/test/test_rlcompleter.py
@@ -1,6 +1,7 @@
import unittest
from unittest.mock import patch
import builtins
+import types
import rlcompleter
from test.support import MISSING_C_DOCSTRINGS
@@ -136,6 +137,57 @@ def bar(self):
self.assertEqual(completer.complete('f.b', 0), 'f.bar')
self.assertFalse(f.property_called)
+ def test_released_memoryview_completion_works(self):
+ mv = memoryview(b"abc")
+ mv.release()
+
+ self.assertIsInstance(type(mv).shape, types.GetSetDescriptorType)
+ self.assertIsInstance(type(mv).strides, types.GetSetDescriptorType)
+
+ completer = rlcompleter.Completer(dict(mv=mv))
+ matches = completer.attr_matches('mv.')
+
+ # These are getset descriptors on memoryview and should be completed
+ # without evaluating the released-memoryview getters.
+ self.assertIn('mv.shape', matches)
+ self.assertIn('mv.strides', matches)
+
+ def test_member_descriptor_not_evaluated(self):
+ class Foo:
+ __slots__ = ("boom",)
+ boom_accesses = 0
+
+ def __getattribute__(self, name):
+ if name == "boom":
+ type(self).boom_accesses += 1
+ raise RuntimeError("boom access should be skipped")
+ return super().__getattribute__(name)
+
+ self.assertIsInstance(Foo.boom, types.MemberDescriptorType)
+
+ completer = rlcompleter.Completer(dict(f=Foo()))
+ matches = completer.attr_matches('f.')
+ self.assertIn('f.boom', matches)
+ self.assertEqual(Foo.boom_accesses, 0)
+
+ def test_raising_descriptor_completion_works(self):
+ class ExplodingDescriptor:
+ def __init__(self):
+ self.instance_get_calls = 0
+
+ def __get__(self, obj, owner):
+ if obj is None:
+ return self
+ self.instance_get_calls += 1
+ raise RuntimeError("descriptor getter exploded")
+
+ class Foo:
+ boom = ExplodingDescriptor()
+
+ completer = rlcompleter.Completer(dict(f=Foo()))
+ matches = completer.attr_matches('f.')
+ self.assertIn('f.boom', matches)
+ self.assertEqual(Foo.boom.instance_get_calls, 0)
def test_uncreated_attr(self):
# Attributes like properties and slots should be completed even when
diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py
index fb1a7d876a6..8813cc46cb6 100644
--- a/Lib/test/test_shutil.py
+++ b/Lib/test/test_shutil.py
@@ -2888,6 +2888,23 @@ def test_destinsrc_false_positive(self):
finally:
os_helper.rmtree(TESTFN)
+ @os_helper.skip_unless_symlink
+ def test_destinsrc_symlink_bypass(self):
+ tmp = self.mkdtemp()
+ src = os.path.join(tmp, 'src')
+ os.makedirs(src)
+ # tmp/link -> tmp (one level up)
+ link = os.path.join(tmp, 'link')
+ os.symlink(tmp, link)
+ # raw path: tmp/link/src/sub - no src prefix in string space
+ # real path: tmp/src/sub - physically inside src
+ dst = os.path.join(link, 'src', 'sub')
+ self.assertTrue(
+ shutil._destinsrc(src, dst),
+ msg='_destinsrc failed to detect dst inside src via symlink '
+ '(dst=%s, src=%s)' % (dst, src),
+ )
+
@os_helper.skip_unless_symlink
@mock_rename
def test_move_file_symlink(self):
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 8d9f8824f7c..d974c7d46ec 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -892,10 +892,39 @@ def test_extractall_hardlink_on_symlink(self):
self._assert_on_file_content(hardlink_filepath, sha256_regtype)
+class GzipReadTestBase:
+
+ def test_read_with_extra_field(self):
+ with open(self.tarname, 'rb') as f:
+ data = bytearray(f.read())
+ flags = data[3]
+ self.assertEqual(flags, 8)
+ data[3] = flags | 4
+ data[10:10] = b'\x05\x00extra'
+ with open(tmpname, 'wb') as f:
+ f.write(data)
+ print(self.mode)
+ with tarfile.open(tmpname, mode=self.mode):
+ pass
+
+ def test_read_with_file_comment(self):
+ with open(self.tarname, 'rb') as f:
+ data = bytearray(f.read())
+ flags = data[3]
+ self.assertEqual(flags, 8)
+ data[3] = flags | 16
+ i = data.index(0, 10) + 1
+ data[i:i] = b'comment\x00'
+ with open(tmpname, 'wb') as f:
+ f.write(data)
+ with tarfile.open(tmpname, mode=self.mode):
+ pass
+
+
class MiscReadTest(MiscReadTestBase, unittest.TestCase):
test_fail_comp = None
-class GzipMiscReadTest(GzipTest, MiscReadTestBase, unittest.TestCase):
+class GzipMiscReadTest(GzipTest, GzipReadTestBase, MiscReadTestBase, unittest.TestCase):
pass
class Bz2MiscReadTest(Bz2Test, MiscReadTestBase, unittest.TestCase):
@@ -969,7 +998,7 @@ def test_compare_members(self):
finally:
tar1.close()
-class GzipStreamReadTest(GzipTest, StreamReadTest):
+class GzipStreamReadTest(GzipTest, GzipReadTestBase, StreamReadTest):
pass
class Bz2StreamReadTest(Bz2Test, StreamReadTest):
@@ -3682,6 +3711,39 @@ class TestExtractionFilters(unittest.TestCase):
# The destination for the extraction, within `outerdir`
destdir = outerdir / 'dest'
+ @classmethod
+ def setUpClass(cls):
+ # Posix and Windows have different pathname resolution:
+ # either symlink or a '..' component resolve first.
+ # Let's see which we are on.
+ if os_helper.can_symlink():
+ testpath = os.path.join(TEMPDIR, 'resolution_test')
+ os.mkdir(testpath)
+
+ # testpath/current links to `.` which is all of:
+ # - `testpath`
+ # - `testpath/current`
+ # - `testpath/current/current`
+ # - etc.
+ os.symlink('.', os.path.join(testpath, 'current'))
+
+ # we'll test where `testpath/current/../file` ends up
+ with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
+ pass
+
+ if os.path.exists(os.path.join(testpath, 'file')):
+ # Windows collapses 'current\..' to '.' first, leaving
+ # 'testpath\file'
+ cls.dotdot_resolves_early = True
+ elif os.path.exists(os.path.join(testpath, '..', 'file')):
+ # Posix resolves 'current' to '.' first, leaving
+ # 'testpath/../file'
+ cls.dotdot_resolves_early = False
+ else:
+ raise AssertionError('Could not determine link resolution')
+ else:
+ cls.dotdot_resolves_early = False
+
@contextmanager
def check_context(self, tar, filter, *, check_flag=True):
"""Extracts `tar` to `self.destdir` and allows checking the result
@@ -3853,10 +3915,19 @@ def test_parent_symlink(self):
+ "which is outside the destination")
with self.check_context(arc.open(), 'data'):
- self.expect_exception(
- tarfile.LinkOutsideDestinationError,
- """'parent' would link to ['"].*outerdir['"], """
- + "which is outside the destination")
+ if self.dotdot_resolves_early:
+ # 'current/../..' normalises to '..', which is rejected.
+ self.expect_exception(
+ tarfile.LinkOutsideDestinationError,
+ """'parent' would link to ['"].*outerdir['"], """
+ + "which is outside the destination")
+ else:
+ # 'current/..' normalises to '.'; the rewritten link is
+ # created and 'parent/evil' lands harmlessly inside the
+ # destination.
+ self.expect_file('current', symlink_to='.')
+ self.expect_file('parent', symlink_to='.')
+ self.expect_file('evil')
else:
# No symlink support. The symlinks are ignored.
@@ -3946,35 +4017,6 @@ def test_parent_symlink2(self):
# Test interplaying symlinks
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives
- # Posix and Windows have different pathname resolution:
- # either symlink or a '..' component resolve first.
- # Let's see which we are on.
- if os_helper.can_symlink():
- testpath = os.path.join(TEMPDIR, 'resolution_test')
- os.mkdir(testpath)
-
- # testpath/current links to `.` which is all of:
- # - `testpath`
- # - `testpath/current`
- # - `testpath/current/current`
- # - etc.
- os.symlink('.', os.path.join(testpath, 'current'))
-
- # we'll test where `testpath/current/../file` ends up
- with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
- pass
-
- if os.path.exists(os.path.join(testpath, 'file')):
- # Windows collapses 'current\..' to '.' first, leaving
- # 'testpath\file'
- dotdot_resolves_early = True
- elif os.path.exists(os.path.join(testpath, '..', 'file')):
- # Posix resolves 'current' to '.' first, leaving
- # 'testpath/../file'
- dotdot_resolves_early = False
- else:
- raise AssertionError('Could not determine link resolution')
-
with ArchiveMaker() as arc:
# `current` links to `.` which is both the destination directory
@@ -4010,7 +4052,7 @@ def test_parent_symlink2(self):
with self.check_context(arc.open(), 'data'):
if os_helper.can_symlink():
- if dotdot_resolves_early:
+ if self.dotdot_resolves_early:
# Fail when extracting a file outside destination
self.expect_exception(
tarfile.OutsideDestinationError,
@@ -4130,6 +4172,76 @@ def test_sly_relative2(self):
+ """['"].*moo['"], which is outside the """
+ "destination")
+ @symlink_test
+ @os_helper.skip_unless_symlink
+ def test_normpath_realpath_mismatch(self):
+ # The link-target check must validate the value that will actually
+ # be written to disk (the normalised linkname), not the original.
+ # Here 'a' is a symlink to a deep nonexistent path, so realpath()
+ # of 'a/../../...' stays inside the destination while normpath()
+ # collapses 'a/..' lexically and escapes.
+ depth = len(self.destdir.parts) + 5
+ deep = '/'.join(f'p{i}' for i in range(depth))
+ sneaky = 'a/' + '../' * depth + 'flag'
+ for kind in 'symlink_to', 'hardlink_to':
+ with self.subTest(kind):
+ with ArchiveMaker() as arc:
+ arc.add('a', symlink_to=deep)
+ arc.add('escape', **{kind: sneaky})
+ with self.check_context(arc.open(), 'data'):
+ self.expect_exception(
+ tarfile.LinkOutsideDestinationError)
+
+ @symlink_test
+ @os_helper.skip_unless_symlink
+ def test_symlink_trailing_slash(self):
+ # A trailing slash on a symlink member's name must not cause the
+ # link target to be resolved relative to the wrong directory.
+ with ArchiveMaker() as arc:
+ t = tarfile.TarInfo('x/')
+ t.type = tarfile.SYMTYPE
+ t.linkname = '..'
+ arc.tar_w.addfile(t)
+ arc.add('x/escaped', content='hi')
+
+ with self.check_context(arc.open(), 'data'):
+ self.expect_exception(tarfile.LinkOutsideDestinationError)
+
+ @symlink_test
+ @os_helper.skip_unless_symlink
+ def test_link_at_destination(self):
+ # A link member whose name resolves to the destination directory
+ # itself must be rejected: otherwise the destination is replaced
+ # by a symlink and later members can be redirected through it.
+ for name in '', '.', './':
+ with ArchiveMaker() as arc:
+ t = tarfile.TarInfo(name)
+ t.type = tarfile.SYMTYPE
+ t.linkname = '.'
+ arc.tar_w.addfile(t)
+
+ with self.check_context(arc.open(), 'data'):
+ self.expect_exception(tarfile.OutsideDestinationError)
+
+ @symlink_test
+ @os_helper.skip_unless_symlink
+ def test_empty_name_symlink_chain(self):
+ # Regression test for a chain of empty-named symlinks that
+ # incrementally redirects the destination outwards.
+ with ArchiveMaker() as arc:
+ for name, target in [('', ''), ('a/', '..'),
+ ('', 'dummy'), ('', 'a'),
+ ('b/', '..'),
+ ('', 'dummy'), ('', 'a/b')]:
+ t = tarfile.TarInfo(name)
+ t.type = tarfile.SYMTYPE
+ t.linkname = target
+ arc.tar_w.addfile(t)
+ arc.add('escaped', content='hi')
+
+ with self.check_context(arc.open(), 'data'):
+ self.expect_exception(tarfile.FilterError)
+
@symlink_test
def test_deep_symlink(self):
# Test that symlinks and hardlinks inside a directory
From 59607bccb652fa8f2e3caab12b7e1cc944912039 Mon Sep 17 00:00:00 2001
From: ShaharNaveh <50263213+ShaharNaveh@users.noreply.github.com>
Date: Fri, 12 Jun 2026 11:20:37 +0300
Subject: [PATCH 5/5] Mark failing tests
---
Lib/test/test_json/test_speedups.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/Lib/test/test_json/test_speedups.py b/Lib/test/test_json/test_speedups.py
index 26c63eef600..64d210f4ac9 100644
--- a/Lib/test/test_json/test_speedups.py
+++ b/Lib/test/test_json/test_speedups.py
@@ -119,6 +119,7 @@ def test_current_indent_level(self):
self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}], 3.0)
self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}])
+ @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: 'NoneType' object is not callable
def test_mutate_dict_items_during_encode(self):
# gh-142831: Clearing the items list via a re-entrant key encoder
# must not cause a use-after-free. BadDict.items() returns a
@@ -152,6 +153,7 @@ def encode_str(obj):
encoder(BadDict(real=1), 0)
self.assertTrue(cleared)
+ @unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: 'NoneType' object is not callable
def test_mutate_list_during_encode(self):
# gh-142831: Clearing a list mid-iteration via the default
# callback must not cause a use-after-free.