@@ -1148,6 +1148,64 @@ The final prediction goes to the largest posterior. This is known as the
11481148 'female'
11491149
11501150
1151+ Sampling from kernel density estimation
1152+ ***************************************
1153+
1154+ The :func: `kde() ` function creates a continuous probability density
1155+ function from discrete samples. Some applications need a way to make
1156+ random selections from that distribution.
1157+
1158+ The technique is to pick a sample from a bandwidth scaled kernel
1159+ function and recenter the result around a randomly chosen point from
1160+ the input data. This can be done with any kernel that has a known or
1161+ accurately approximated inverse cumulative distribution function.
1162+
1163+ .. testcode ::
1164+
1165+ from random import choice, random, seed
1166+ from math import sqrt, log, pi, tan, asin
1167+ from statistics import NormalDist
1168+
1169+ kernel_invcdfs = {
1170+ 'normal': NormalDist().inv_cdf,
1171+ 'logistic': lambda p: log(p / (1 - p)),
1172+ 'sigmoid': lambda p: log(tan(p * pi/2)),
1173+ 'rectangular': lambda p: 2*p - 1,
1174+ 'triangular': lambda p: sqrt(2*p) - 1 if p < 0.5 else 1 - sqrt(2 - 2*p),
1175+ 'cosine': lambda p: 2*asin(2*p - 1)/pi,
1176+ }
1177+
1178+ def kde_random(data, h, kernel='normal'):
1179+ 'Return a function that samples from kde() smoothed data.'
1180+ kernel_invcdf = kernel_invcdfs[kernel]
1181+ def rand():
1182+ return h * kernel_invcdf(random()) + choice(data)
1183+ return rand
1184+
1185+ For example:
1186+
1187+ .. doctest ::
1188+
1189+ >>> discrete_samples = [- 2.1 , - 1.3 , - 0.4 , 1.9 , 5.1 , 6.2 ]
1190+ >>> rand = kde_random(discrete_samples, h = 1.5 )
1191+ >>> seed(8675309 )
1192+ >>> selections = [rand() for i in range (10 )]
1193+ >>> [round (x, 1 ) for x in selections]
1194+ [4.7, 7.4, 1.2, 7.8, 6.9, -1.3, 5.8, 0.2, -1.4, 5.7]
1195+
1196+ .. testcode ::
1197+ :hide:
1198+
1199+ from statistics import kde
1200+ from math import isclose
1201+
1202+ # Verify that cdf / invcdf will round trip
1203+ xarr = [i/100 for i in range(-100, 101)]
1204+ for kernel, invcdf in kernel_invcdfs.items():
1205+ cdf = kde([0.0], h=1.0, kernel=kernel, cumulative=True)
1206+ for x in xarr:
1207+ assert isclose(invcdf(cdf(x)), x, abs_tol=1E-9)
1208+
11511209..
11521210 # This modelines must appear within the last ten lines of the file.
11531211 kate: indent-width 3; remove-trailing-space on; replace-tabs on; encoding utf-8;
0 commit comments