Skip to content

Commit 985ecdc

Browse files
committed
ssue python#19183: Implement PEP 456 'secure and interchangeable hash algorithm'.
Python now uses SipHash24 on all major platforms.
1 parent fe32aec commit 985ecdc

27 files changed

+1029
-242
lines changed

Doc/library/sys.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -594,9 +594,20 @@ always available.
594594
| :const:`imag` | multiplier used for the imaginary part of a |
595595
| | complex number |
596596
+---------------------+--------------------------------------------------+
597+
| :const:`algorithm` | name of the algorithm for hashing of str, bytes, |
598+
| | and memoryview |
599+
+---------------------+--------------------------------------------------+
600+
| :const:`hash_bits` | internal output size of the hash algorithm |
601+
+---------------------+--------------------------------------------------+
602+
| :const:`seed_bits` | size of the seed key of the hash algorithm |
603+
+---------------------+--------------------------------------------------+
604+
597605

598606
.. versionadded:: 3.2
599607

608+
.. versionchanged: 3.4
609+
Added *algorithm*, *hash_bits* and *seed_bits*
610+
600611
601612
.. data:: hexversion
602613

Doc/license.rst

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,35 @@ the following note::
609609
http://creativecommons.org/publicdomain/zero/1.0/
610610

611611

612+
SipHash24
613+
---------
614+
615+
The file :file:`Python/pyhash.c` contains Marek Majkowski' implementation of
616+
Dan Bernstein's SipHash24 algorithm. The contains the following note::
617+
618+
<MIT License>
619+
Copyright (c) 2013 Marek Majkowski <marek@popcount.org>
620+
621+
Permission is hereby granted, free of charge, to any person obtaining a copy
622+
of this software and associated documentation files (the "Software"), to deal
623+
in the Software without restriction, including without limitation the rights
624+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
625+
copies of the Software, and to permit persons to whom the Software is
626+
furnished to do so, subject to the following conditions:
627+
628+
The above copyright notice and this permission notice shall be included in
629+
all copies or substantial portions of the Software.
630+
</MIT License>
631+
632+
Original location:
633+
https://github.com/majek/csiphash/
634+
635+
Solution inspired by code from:
636+
Samuel Neves (supercop/crypto_auth/siphash24/little)
637+
djb (supercop/crypto_auth/siphash24/little2)
638+
Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)
639+
640+
612641
strtod and dtoa
613642
---------------
614643

Doc/whatsnew/3.4.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ CPython implementation improvements:
116116

117117
* :ref:`PEP 442: Safe object finalization <pep-442>`
118118
* :ref:`PEP 445: Configurable memory allocators <pep-445>`
119+
* :pep:`456` Secure and interchangeable hash algorithm
119120
* Improve finalization of Python modules to avoid setting their globals
120121
to None, in most cases (:issue:`18214`).
121122
* A more efficient :mod:`marshal` format (:issue:`16475`).

Include/Python.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
#include "object.h"
6969
#include "objimpl.h"
7070
#include "typeslots.h"
71+
#include "pyhash.h"
7172

7273
#include "pydebug.h"
7374

Include/object.h

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -562,23 +562,6 @@ PyAPI_FUNC(PyObject *) PyObject_Dir(PyObject *);
562562
PyAPI_FUNC(int) Py_ReprEnter(PyObject *);
563563
PyAPI_FUNC(void) Py_ReprLeave(PyObject *);
564564

565-
/* Helpers for hash functions */
566-
#ifndef Py_LIMITED_API
567-
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
568-
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
569-
PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t);
570-
#endif
571-
572-
typedef struct {
573-
Py_hash_t prefix;
574-
Py_hash_t suffix;
575-
} _Py_HashSecret_t;
576-
PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
577-
578-
#ifdef Py_DEBUG
579-
PyAPI_DATA(int) _Py_HashSecret_Initialized;
580-
#endif
581-
582565
/* Helper for passing objects to printf and the like */
583566
#define PyObject_REPR(obj) _PyUnicode_AsString(PyObject_Repr(obj))
584567

Include/pyhash.h

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
#ifndef Py_HASH_H
2+
3+
#define Py_HASH_H
4+
#ifdef __cplusplus
5+
extern "C" {
6+
#endif
7+
8+
/* Helpers for hash functions */
9+
#ifndef Py_LIMITED_API
10+
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
11+
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
12+
PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
13+
#endif
14+
15+
/* Prime multiplier used in string and various other hashes. */
16+
#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */
17+
18+
/* Parameters used for the numeric hash implementation. See notes for
19+
_Py_HashDouble in Objects/object.c. Numeric hashes are based on
20+
reduction modulo the prime 2**_PyHASH_BITS - 1. */
21+
22+
#if SIZEOF_VOID_P >= 8
23+
# define _PyHASH_BITS 61
24+
#else
25+
# define _PyHASH_BITS 31
26+
#endif
27+
28+
#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
29+
#define _PyHASH_INF 314159
30+
#define _PyHASH_NAN 0
31+
#define _PyHASH_IMAG _PyHASH_MULTIPLIER
32+
33+
34+
/* hash secret
35+
*
36+
* memory layout on 64 bit systems
37+
* cccccccc cccccccc cccccccc uc -- unsigned char[24]
38+
* pppppppp ssssssss ........ fnv -- two Py_hash_t
39+
* k0k0k0k0 k1k1k1k1 ........ siphash -- two PY_UINT64_T
40+
* ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t
41+
* ........ ........ eeeeeeee pyexpat XML hash salt
42+
*
43+
* memory layout on 32 bit systems
44+
* cccccccc cccccccc cccccccc uc
45+
* ppppssss ........ ........ fnv -- two Py_hash_t
46+
* k0k0k0k0 k1k1k1k1 ........ siphash -- two PY_UINT64_T (*)
47+
* ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t
48+
* ........ ........ eeee.... pyexpat XML hash salt
49+
*
50+
* (*) The siphash member may not be available on 32 bit platforms without
51+
* an unsigned int64 data type.
52+
*/
53+
typedef union {
54+
/* ensure 24 bytes */
55+
unsigned char uc[24];
56+
/* two Py_hash_t for FNV */
57+
struct {
58+
Py_hash_t prefix;
59+
Py_hash_t suffix;
60+
} fnv;
61+
#ifdef PY_UINT64_T
62+
/* two uint64 for SipHash24 */
63+
struct {
64+
PY_UINT64_T k0;
65+
PY_UINT64_T k1;
66+
} siphash;
67+
#endif
68+
/* a different (!) Py_hash_t for small string optimization */
69+
struct {
70+
unsigned char padding[16];
71+
Py_hash_t suffix;
72+
} djbx33a;
73+
struct {
74+
unsigned char padding[16];
75+
Py_hash_t hashsalt;
76+
} expat;
77+
} _Py_HashSecret_t;
78+
PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
79+
80+
#ifdef Py_DEBUG
81+
PyAPI_DATA(int) _Py_HashSecret_Initialized;
82+
#endif
83+
84+
85+
/* hash function definition */
86+
#ifndef Py_LIMITED_API
87+
typedef struct {
88+
Py_hash_t (*const hash)(const void *, Py_ssize_t);
89+
const char *name;
90+
const int hash_bits;
91+
const int seed_bits;
92+
} PyHash_FuncDef;
93+
94+
PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
95+
#endif
96+
97+
98+
/* cutoff for small string DJBX33A optimization in range [1, cutoff).
99+
*
100+
* About 50% of the strings in a typical Python application are smaller than
101+
* 6 to 7 chars. However DJBX33A is vulnerable to hash collision attacks.
102+
* NEVER use DJBX33A for long strings!
103+
*
104+
* A Py_HASH_CUTOFF of 0 disables small string optimization. 32 bit platforms
105+
* should use a smaller cutoff because it is easier to create colliding
106+
* strings. A cutoff of 7 on 64bit platforms and 5 on 32bit platforms should
107+
* provide a decent safety margin.
108+
*/
109+
#ifndef Py_HASH_CUTOFF
110+
# define Py_HASH_CUTOFF 0
111+
#elif (Py_HASH_CUTOFF > 7 || Py_HASH_CUTOFF < 0)
112+
# error Py_HASH_CUTOFF must in range 0...7.
113+
#endif /* Py_HASH_CUTOFF */
114+
115+
116+
/* hash algorithm selection
117+
*
118+
* The values for Py_HASH_SIPHASH24 and Py_HASH_FNV are hard-coded in the
119+
* configure script.
120+
*
121+
* - FNV is available on all platforms and architectures.
122+
* - SIPHASH24 only works on plaforms that provide PY_UINT64_T and doesn't
123+
* require aligned memory for integers.
124+
* - With EXTERNAL embedders can provide an alternative implementation with::
125+
*
126+
* PyHash_FuncDef PyHash_Func = {...};
127+
*
128+
* XXX: Figure out __declspec() for extern PyHash_FuncDef.
129+
*/
130+
#define Py_HASH_EXTERNAL 0
131+
#define Py_HASH_SIPHASH24 1
132+
#define Py_HASH_FNV 2
133+
134+
#ifndef Py_HASH_ALGORITHM
135+
# if (defined(PY_UINT64_T) && defined(PY_UINT32_T) \
136+
&& !defined(HAVE_ALIGNED_REQUIRED))
137+
# define Py_HASH_ALGORITHM Py_HASH_SIPHASH24
138+
# else
139+
# define Py_HASH_ALGORITHM Py_HASH_FNV
140+
# endif /* uint64_t && uint32_t && aligned */
141+
#endif /* Py_HASH_ALGORITHM */
142+
143+
#ifdef __cplusplus
144+
}
145+
#endif
146+
147+
#endif /* !Py_HASH_H */

Include/pyport.h

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -144,23 +144,6 @@ Used in: PY_LONG_LONG
144144
#endif
145145
#endif
146146

147-
/* Prime multiplier used in string and various other hashes. */
148-
#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */
149-
150-
/* Parameters used for the numeric hash implementation. See notes for
151-
_Py_HashDouble in Objects/object.c. Numeric hashes are based on
152-
reduction modulo the prime 2**_PyHASH_BITS - 1. */
153-
154-
#if SIZEOF_VOID_P >= 8
155-
#define _PyHASH_BITS 61
156-
#else
157-
#define _PyHASH_BITS 31
158-
#endif
159-
#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
160-
#define _PyHASH_INF 314159
161-
#define _PyHASH_NAN 0
162-
#define _PyHASH_IMAG _PyHASH_MULTIPLIER
163-
164147
/* uintptr_t is the C9X name for an unsigned integral type such that a
165148
* legitimate void* can be cast to uintptr_t and then back to void* again
166149
* without loss of information. Similarly for intptr_t, wrt a signed
@@ -199,8 +182,10 @@ typedef Py_intptr_t Py_ssize_t;
199182
#endif
200183

201184
/* Py_hash_t is the same size as a pointer. */
185+
#define SIZEOF_PY_HASH_T SIZEOF_SIZE_T
202186
typedef Py_ssize_t Py_hash_t;
203187
/* Py_uhash_t is the unsigned equivalent needed to calculate numeric hash. */
188+
#define SIZEOF_PY_UHASH_T SIZEOF_SIZE_T
204189
typedef size_t Py_uhash_t;
205190

206191
/* Largest possible value of size_t.

Lib/test/regrtest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,8 @@ def main(tests=None, **kwargs):
601601
print("==", platform.python_implementation(), *sys.version.split())
602602
print("== ", platform.platform(aliased=True),
603603
"%s-endian" % sys.byteorder)
604+
print("== ", "hash algorithm:", sys.hash_info.algorithm,
605+
"64bit" if sys.maxsize > 2**32 else "32bit")
604606
print("== ", os.getcwd())
605607
print("Testing with flags:", sys.flags)
606608

0 commit comments

Comments
 (0)