@@ -459,7 +459,7 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
459459 const char * newloc = target -> locale_name ;
460460
461461 /* Reset locale back to currently configured defaults */
462- setlocale (LC_ALL , "" );
462+ _Py_SetLocaleFromEnv (LC_ALL );
463463
464464 /* Set the relevant locale environment variable */
465465 if (setenv ("LC_CTYPE" , newloc , 1 )) {
@@ -472,7 +472,7 @@ _coerce_default_locale_settings(const _LocaleCoercionTarget *target)
472472 }
473473
474474 /* Reconfigure with the overridden environment variables */
475- setlocale (LC_ALL , "" );
475+ _Py_SetLocaleFromEnv (LC_ALL );
476476}
477477#endif
478478
@@ -503,13 +503,14 @@ _Py_CoerceLegacyLocale(void)
503503 const char * new_locale = setlocale (LC_CTYPE ,
504504 target -> locale_name );
505505 if (new_locale != NULL ) {
506- #if !defined(__APPLE__ ) && defined(HAVE_LANGINFO_H ) && defined(CODESET )
506+ #if !defined(__APPLE__ ) && !defined(__ANDROID__ ) && \
507+ defined(HAVE_LANGINFO_H ) && defined(CODESET )
507508 /* Also ensure that nl_langinfo works in this locale */
508509 char * codeset = nl_langinfo (CODESET );
509510 if (!codeset || * codeset == '\0' ) {
510511 /* CODESET is not set or empty, so skip coercion */
511512 new_locale = NULL ;
512- setlocale (LC_CTYPE , "" );
513+ _Py_SetLocaleFromEnv (LC_CTYPE );
513514 continue ;
514515 }
515516#endif
@@ -524,6 +525,65 @@ _Py_CoerceLegacyLocale(void)
524525#endif
525526}
526527
528+ /* _Py_SetLocaleFromEnv() is a wrapper around setlocale(category, "") to
529+ * isolate the idiosyncrasies of different libc implementations. It reads the
530+ * appropriate environment variable and uses its value to select the locale for
531+ * 'category'. */
532+ char *
533+ _Py_SetLocaleFromEnv (int category )
534+ {
535+ #ifdef __ANDROID__
536+ const char * locale ;
537+ const char * * pvar ;
538+ #ifdef PY_COERCE_C_LOCALE
539+ const char * coerce_c_locale ;
540+ #endif
541+ const char * utf8_locale = "C.UTF-8" ;
542+ const char * env_var_set [] = {
543+ "LC_ALL" ,
544+ "LC_CTYPE" ,
545+ "LANG" ,
546+ NULL ,
547+ };
548+
549+ /* Android setlocale(category, "") doesn't check the environment variables
550+ * and incorrectly sets the "C" locale at API 24 and older APIs. We only
551+ * check the environment variables listed in env_var_set. */
552+ for (pvar = env_var_set ; * pvar ; pvar ++ ) {
553+ locale = getenv (* pvar );
554+ if (locale != NULL && * locale != '\0' ) {
555+ if (strcmp (locale , utf8_locale ) == 0 ||
556+ strcmp (locale , "en_US.UTF-8" ) == 0 ) {
557+ return setlocale (category , utf8_locale );
558+ }
559+ return setlocale (category , "C" );
560+ }
561+ }
562+
563+ /* Android uses UTF-8, so explicitly set the locale to C.UTF-8 if none of
564+ * LC_ALL, LC_CTYPE, or LANG is set to a non-empty string.
565+ * Quote from POSIX section "8.2 Internationalization Variables":
566+ * "4. If the LANG environment variable is not set or is set to the empty
567+ * string, the implementation-defined default locale shall be used." */
568+
569+ #ifdef PY_COERCE_C_LOCALE
570+ coerce_c_locale = getenv ("PYTHONCOERCECLOCALE" );
571+ if (coerce_c_locale == NULL || strcmp (coerce_c_locale , "0" ) != 0 ) {
572+ /* Some other ported code may check the environment variables (e.g. in
573+ * extension modules), so we make sure that they match the locale
574+ * configuration */
575+ if (setenv ("LC_CTYPE" , utf8_locale , 1 )) {
576+ fprintf (stderr , "Warning: failed setting the LC_CTYPE "
577+ "environment variable to %s\n" , utf8_locale );
578+ }
579+ }
580+ #endif
581+ return setlocale (category , utf8_locale );
582+ #else /* __ANDROID__ */
583+ return setlocale (category , "" );
584+ #endif /* __ANDROID__ */
585+ }
586+
527587
528588/* Global initializations. Can be undone by Py_Finalize(). Don't
529589 call this twice without an intervening Py_Finalize() call.
@@ -599,19 +659,12 @@ void _Py_InitializeCore(const _PyCoreConfig *config)
599659 exit (1 );
600660 }
601661
602- #ifdef __ANDROID__
603- /* Passing "" to setlocale() on Android requests the C locale rather
604- * than checking environment variables, so request C.UTF-8 explicitly
605- */
606- setlocale (LC_CTYPE , "C.UTF-8" );
607- #else
608662#ifndef MS_WINDOWS
609663 /* Set up the LC_CTYPE locale, so we can obtain
610664 the locale's charset without having to switch
611665 locales. */
612- setlocale (LC_CTYPE , "" );
666+ _Py_SetLocaleFromEnv (LC_CTYPE );
613667 _emit_stderr_warning_for_legacy_locale ();
614- #endif
615668#endif
616669
617670 if ((p = Py_GETENV ("PYTHONDEBUG" )) && * p != '\0' )
0 commit comments