Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
[3.12] gh-113993: Allow interned strings to be mortal, and fix relate…
…d issues (GH-120520)

* Add an InternalDocs file describing how interning should work and how to use it.

* Add internal functions to *explicitly* request what kind of interning is done:
  - `_PyUnicode_InternMortal`
  - `_PyUnicode_InternImmortal`
  - `_PyUnicode_InternStatic`

* Switch uses of `PyUnicode_InternInPlace` to those.

* Disallow using `_Py_SetImmortal` on strings directly.
  You should use `_PyUnicode_InternImmortal` instead:
  - Strings should be interned before immortalization, otherwise you're possibly
    interning a immortalizing copy.
  - `_Py_SetImmortal` doesn't handle the `SSTATE_INTERNED_MORTAL` to
    `SSTATE_INTERNED_IMMORTAL` update, and those flags can't be changed in
    backports, as they are now part of public API and version-specific ABI.

* Add private `_only_immortal` argument for `sys.getunicodeinternedsize`, used in refleak test machinery.

* Make sure the statically allocated string singletons are unique. This means these sets are now disjoint:
  - `_Py_ID`
  - `_Py_STR` (including the empty string)
  - one-character latin-1 singletons

  Now, when you intern a singleton, that exact singleton will be interned.

* Add a `_Py_LATIN1_CHR` macro, use it instead of `_Py_ID`/`_Py_STR` for one-character latin-1 singletons everywhere (including Clinic).

* Intern `_Py_STR` singletons at startup.

* Beef up the tests. Cover internal details (marked with `@cpython_only`).

* Add lots of assertions

Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
  • Loading branch information
encukou and ericsnowcurrently committed Aug 16, 2024
commit 9fd6334433075ff696dd872c6aebe79944f8fcfb
17 changes: 1 addition & 16 deletions Include/internal/pycore_global_objects_fini_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 5 additions & 16 deletions Include/internal/pycore_global_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,16 @@ struct _Py_global_strings {
STRUCT_FOR_STR(anon_setcomp, "<setcomp>")
STRUCT_FOR_STR(anon_string, "<string>")
STRUCT_FOR_STR(anon_unknown, "<unknown>")
STRUCT_FOR_STR(close_br, "}")
STRUCT_FOR_STR(dbl_close_br, "}}")
STRUCT_FOR_STR(dbl_open_br, "{{")
STRUCT_FOR_STR(dbl_percent, "%%")
STRUCT_FOR_STR(defaults, ".defaults")
STRUCT_FOR_STR(dot, ".")
STRUCT_FOR_STR(dot_locals, ".<locals>")
STRUCT_FOR_STR(empty, "")
STRUCT_FOR_STR(generic_base, ".generic_base")
STRUCT_FOR_STR(json_decoder, "json.decoder")
STRUCT_FOR_STR(kwdefaults, ".kwdefaults")
STRUCT_FOR_STR(list_err, "list index out of range")
STRUCT_FOR_STR(newline, "\n")
STRUCT_FOR_STR(open_br, "{")
STRUCT_FOR_STR(percent, "%")
STRUCT_FOR_STR(shim_name, "<shim>")
STRUCT_FOR_STR(type_params, ".type_params")
STRUCT_FOR_STR(utf_8, "utf-8")
Expand All @@ -66,7 +61,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(TextIOWrapper)
STRUCT_FOR_ID(True)
STRUCT_FOR_ID(WarningMessage)
STRUCT_FOR_ID(_)
STRUCT_FOR_ID(_WindowsConsoleIO)
STRUCT_FOR_ID(__IOBase_closed)
STRUCT_FOR_ID(__abc_tpflags__)
Expand Down Expand Up @@ -255,6 +249,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(_lock_unlock_module)
STRUCT_FOR_ID(_loop)
STRUCT_FOR_ID(_needs_com_addref_)
STRUCT_FOR_ID(_only_immortal)
STRUCT_FOR_ID(_pack_)
STRUCT_FOR_ID(_restype_)
STRUCT_FOR_ID(_showwarnmsg)
Expand All @@ -266,7 +261,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(_uninitialized_submodules)
STRUCT_FOR_ID(_warn_unawaited_coroutine)
STRUCT_FOR_ID(_xoptions)
STRUCT_FOR_ID(a)
STRUCT_FOR_ID(abs_tol)
STRUCT_FOR_ID(access)
STRUCT_FOR_ID(add)
Expand All @@ -286,7 +280,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(attribute)
STRUCT_FOR_ID(authorizer_callback)
STRUCT_FOR_ID(autocommit)
STRUCT_FOR_ID(b)
STRUCT_FOR_ID(backtick)
STRUCT_FOR_ID(base)
STRUCT_FOR_ID(before)
Expand All @@ -304,7 +297,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(byteorder)
STRUCT_FOR_ID(bytes)
STRUCT_FOR_ID(bytes_per_sep)
STRUCT_FOR_ID(c)
STRUCT_FOR_ID(c_call)
STRUCT_FOR_ID(c_exception)
STRUCT_FOR_ID(c_return)
Expand Down Expand Up @@ -357,7 +349,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(count)
STRUCT_FOR_ID(covariant)
STRUCT_FOR_ID(cwd)
STRUCT_FOR_ID(d)
STRUCT_FOR_ID(data)
STRUCT_FOR_ID(database)
STRUCT_FOR_ID(decode)
Expand Down Expand Up @@ -385,7 +376,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(dst)
STRUCT_FOR_ID(dst_dir_fd)
STRUCT_FOR_ID(duration)
STRUCT_FOR_ID(e)
STRUCT_FOR_ID(eager_start)
STRUCT_FOR_ID(effective_ids)
STRUCT_FOR_ID(element_factory)
Expand Down Expand Up @@ -546,7 +536,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(mro)
STRUCT_FOR_ID(msg)
STRUCT_FOR_ID(mycmp)
STRUCT_FOR_ID(n)
STRUCT_FOR_ID(n_arg)
STRUCT_FOR_ID(n_fields)
STRUCT_FOR_ID(n_sequence_fields)
Expand Down Expand Up @@ -591,7 +580,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(outgoing)
STRUCT_FOR_ID(overlapped)
STRUCT_FOR_ID(owner)
STRUCT_FOR_ID(p)
STRUCT_FOR_ID(pages)
STRUCT_FOR_ID(parent)
STRUCT_FOR_ID(password)
Expand Down Expand Up @@ -619,7 +607,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(ps2)
STRUCT_FOR_ID(query)
STRUCT_FOR_ID(quotetabs)
STRUCT_FOR_ID(r)
STRUCT_FOR_ID(raw)
STRUCT_FOR_ID(read)
STRUCT_FOR_ID(read1)
Expand All @@ -643,7 +630,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(return)
STRUCT_FOR_ID(reverse)
STRUCT_FOR_ID(reversed)
STRUCT_FOR_ID(s)
STRUCT_FOR_ID(salt)
STRUCT_FOR_ID(sched_priority)
STRUCT_FOR_ID(scheduler)
Expand Down Expand Up @@ -746,7 +732,6 @@ struct _Py_global_strings {
STRUCT_FOR_ID(writable)
STRUCT_FOR_ID(write)
STRUCT_FOR_ID(write_through)
STRUCT_FOR_ID(x)
STRUCT_FOR_ID(year)
STRUCT_FOR_ID(zdict)
} identifiers;
Expand All @@ -769,6 +754,10 @@ struct _Py_global_strings {
(_Py_SINGLETON(strings.identifiers._py_ ## NAME._ascii.ob_base))
#define _Py_STR(NAME) \
(_Py_SINGLETON(strings.literals._py_ ## NAME._ascii.ob_base))
#define _Py_LATIN1_CHR(CH) \
((CH) < 128 \
? (PyObject*)&_Py_SINGLETON(strings).ascii[(CH)] \
: (PyObject*)&_Py_SINGLETON(strings).latin1[(CH) - 128])

/* _Py_DECLARE_STR() should precede all uses of _Py_STR() in a function.

Expand Down
17 changes: 1 addition & 16 deletions Include/internal/pycore_runtime_init_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading