Skip to content

Commit d59432b

Browse files
author
Matthew Daws
committed
Work for evaluation
1 parent e77d64d commit d59432b

9 files changed

Lines changed: 416 additions & 57 deletions

File tree

examples/Chicago/Geo-coding of the Chicago dataset.ipynb

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,147 @@
327327
"frame.head()"
328328
]
329329
},
330+
{
331+
"cell_type": "code",
332+
"execution_count": 6,
333+
"metadata": {
334+
"collapsed": false
335+
},
336+
"outputs": [
337+
{
338+
"data": {
339+
"text/html": [
340+
"<div>\n",
341+
"<table border=\"1\" class=\"dataframe\">\n",
342+
" <thead>\n",
343+
" <tr style=\"text-align: right;\">\n",
344+
" <th></th>\n",
345+
" <th>address</th>\n",
346+
" <th>case</th>\n",
347+
" <th>crime</th>\n",
348+
" <th>geometry</th>\n",
349+
" <th>location</th>\n",
350+
" <th>timestamp</th>\n",
351+
" <th>type</th>\n",
352+
" <th>xcoord</th>\n",
353+
" <th>ycoord</th>\n",
354+
" </tr>\n",
355+
" </thead>\n",
356+
" <tbody>\n",
357+
" <tr>\n",
358+
" <th>4209</th>\n",
359+
" <td>019XX E 73RD ST</td>\n",
360+
" <td>HZ567200</td>\n",
361+
" <td>BURGLARY</td>\n",
362+
" <td>POINT (-87.578760989 41.76248689)</td>\n",
363+
" <td>VACANT LOT/LAND</td>\n",
364+
" <td>2016-12-27T17:00:00</td>\n",
365+
" <td>FORCIBLE ENTRY</td>\n",
366+
" <td>1190114.0</td>\n",
367+
" <td>1856944.0</td>\n",
368+
" </tr>\n",
369+
" <tr>\n",
370+
" <th>21540</th>\n",
371+
" <td>019XX E 73RD ST</td>\n",
372+
" <td>JA208153</td>\n",
373+
" <td>ASSAULT</td>\n",
374+
" <td>POINT (-87.578980927 41.76248559)</td>\n",
375+
" <td>STREET</td>\n",
376+
" <td>2017-04-01T03:08:00</td>\n",
377+
" <td>AGGRAVATED: HANDGUN</td>\n",
378+
" <td>1190054.0</td>\n",
379+
" <td>1856943.0</td>\n",
380+
" </tr>\n",
381+
" <tr>\n",
382+
" <th>48182</th>\n",
383+
" <td>019XX E 73RD ST</td>\n",
384+
" <td>HZ431786</td>\n",
385+
" <td>BURGLARY</td>\n",
386+
" <td>POINT (-87.576891409 41.762507528)</td>\n",
387+
" <td>OTHER</td>\n",
388+
" <td>2016-09-13T05:00:00</td>\n",
389+
" <td>FORCIBLE ENTRY</td>\n",
390+
" <td>1190624.0</td>\n",
391+
" <td>1856956.0</td>\n",
392+
" </tr>\n",
393+
" <tr>\n",
394+
" <th>67209</th>\n",
395+
" <td>019XX E 73RD ST</td>\n",
396+
" <td>HZ285311</td>\n",
397+
" <td>BATTERY</td>\n",
398+
" <td>POINT (-87.577639248 41.762498728)</td>\n",
399+
" <td>STREET</td>\n",
400+
" <td>2016-05-29T22:09:00</td>\n",
401+
" <td>DOMESTIC BATTERY SIMPLE</td>\n",
402+
" <td>1190420.0</td>\n",
403+
" <td>1856951.0</td>\n",
404+
" </tr>\n",
405+
" <tr>\n",
406+
" <th>96195</th>\n",
407+
" <td>019XX E 73RD ST</td>\n",
408+
" <td>HZ464497</td>\n",
409+
" <td>DECEPTIVE PRACTICE</td>\n",
410+
" <td>POINT (-87.577639248 41.762498728)</td>\n",
411+
" <td>RESIDENCE</td>\n",
412+
" <td>2016-10-04T10:50:00</td>\n",
413+
" <td>FRAUD OR CONFIDENCE GAME</td>\n",
414+
" <td>1190420.0</td>\n",
415+
" <td>1856951.0</td>\n",
416+
" </tr>\n",
417+
" </tbody>\n",
418+
"</table>\n",
419+
"</div>"
420+
],
421+
"text/plain": [
422+
" address case crime \\\n",
423+
"4209 019XX E 73RD ST HZ567200 BURGLARY \n",
424+
"21540 019XX E 73RD ST JA208153 ASSAULT \n",
425+
"48182 019XX E 73RD ST HZ431786 BURGLARY \n",
426+
"67209 019XX E 73RD ST HZ285311 BATTERY \n",
427+
"96195 019XX E 73RD ST HZ464497 DECEPTIVE PRACTICE \n",
428+
"\n",
429+
" geometry location \\\n",
430+
"4209 POINT (-87.578760989 41.76248689) VACANT LOT/LAND \n",
431+
"21540 POINT (-87.578980927 41.76248559) STREET \n",
432+
"48182 POINT (-87.576891409 41.762507528) OTHER \n",
433+
"67209 POINT (-87.577639248 41.762498728) STREET \n",
434+
"96195 POINT (-87.577639248 41.762498728) RESIDENCE \n",
435+
"\n",
436+
" timestamp type xcoord ycoord \n",
437+
"4209 2016-12-27T17:00:00 FORCIBLE ENTRY 1190114.0 1856944.0 \n",
438+
"21540 2017-04-01T03:08:00 AGGRAVATED: HANDGUN 1190054.0 1856943.0 \n",
439+
"48182 2016-09-13T05:00:00 FORCIBLE ENTRY 1190624.0 1856956.0 \n",
440+
"67209 2016-05-29T22:09:00 DOMESTIC BATTERY SIMPLE 1190420.0 1856951.0 \n",
441+
"96195 2016-10-04T10:50:00 FRAUD OR CONFIDENCE GAME 1190420.0 1856951.0 "
442+
]
443+
},
444+
"execution_count": 6,
445+
"metadata": {},
446+
"output_type": "execute_result"
447+
}
448+
],
449+
"source": [
450+
"frame[frame.address==\"019XX E 73RD ST\"].head()"
451+
]
452+
},
453+
{
454+
"cell_type": "code",
455+
"execution_count": null,
456+
"metadata": {
457+
"collapsed": true
458+
},
459+
"outputs": [],
460+
"source": []
461+
},
462+
{
463+
"cell_type": "code",
464+
"execution_count": null,
465+
"metadata": {
466+
"collapsed": true
467+
},
468+
"outputs": [],
469+
"source": []
470+
},
330471
{
331472
"cell_type": "code",
332473
"execution_count": 5,

examples/Chicago/Geographic_Boundaries.ipynb

Lines changed: 43 additions & 41 deletions
Large diffs are not rendered by default.

open_cp/data.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
import datetime as _datetime
55

66
class Point():
7-
"""A simple 2 dimensional point class."""
7+
"""A simple 2 dimensional point class.
8+
9+
Is "iterable" and returns (x,y). Similarly supports indexing."""
810
def __init__(self, x=0, y=0):
911
self._x = x
1012
self._y = y
@@ -19,6 +21,17 @@ def y(self):
1921
"""The y coordinate."""
2022
return self._y
2123

24+
def __iter__(self):
25+
yield self.x
26+
yield self.y
27+
28+
def __getitem__(self, i):
29+
if i == 0:
30+
return self.x
31+
if i == 1:
32+
return self.y
33+
raise ValueError("Index must be 0 or 1.")
34+
2235
def __add__(self, other):
2336
return Point(self.x + other.x, self.y + other.y)
2437

@@ -48,6 +61,16 @@ def ymin(self):
4861
def ymax(self):
4962
return self._max.y
5063

64+
@property
65+
def xrange(self):
66+
"""The pair (xmin, xmax)"""
67+
return (self.xmin, self.xmax)
68+
69+
@property
70+
def yrange(self):
71+
"""The pair (ymin, ymax)"""
72+
return (self.ymin, self.ymax)
73+
5174
@property
5275
def min(self):
5376
"""The pair (xmin, ymin)"""
@@ -92,8 +115,8 @@ def grid_size(self, cell_width, cell_height = None):
92115
"""
93116
if cell_height is None:
94117
cell_height = cell_width
95-
xsize = int(_np.rint((self.xmax - self.xmin) / cell_width))
96-
ysize = int(_np.rint((self.ymax - self.ymin) / cell_height))
118+
xsize = int(_np.ceil((self.xmax - self.xmin) / cell_width))
119+
ysize = int(_np.ceil((self.ymax - self.ymin) / cell_height))
97120
return xsize, ysize
98121

99122
def __repr__(self):

open_cp/evaluation.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
"""
2+
evaluation
3+
~~~~~~~~~~
4+
5+
Contains routines and classes to help with evaluation of predictions.
6+
"""
7+
8+
import numpy as _np
9+
10+
def _top_slice_one_dim(risk, fraction):
11+
data = risk.copy()
12+
data.sort()
13+
N = len(risk)
14+
n = int(_np.floor(N * fraction))
15+
n = min(max(0, n), N)
16+
if n == N:
17+
return _np.zeros(risk.shape, dtype=_np.bool) + 1
18+
if n == 0:
19+
return _np.zeros(risk.shape, dtype=_np.bool)
20+
mask = (risk >= data[-n])
21+
have = _np.sum(mask)
22+
if have == n:
23+
return mask
24+
25+
top = _np.ma.min(_np.ma.masked_where(~mask, risk))
26+
for i in range(len(risk)):
27+
if risk[i] == top:
28+
mask[i] = False
29+
have -= 1
30+
if have == n:
31+
return mask
32+
raise AssertionError()
33+
34+
35+
def top_slice(risk, fraction):
36+
"""Returns a boolean array of the same shape as `risk` where there are
37+
exactly `n` True entries. If `risk` has `N` entries, `n` is the greatest
38+
integer less than or equal to `N * fraction`. The returned cells are True
39+
for the `n` greatest cells in `risk`. If there are ties, then returns the
40+
first cells.
41+
42+
:param risk: Array of values.
43+
:param fraction: Between 0 and 1.
44+
"""
45+
risk = _np.asarray(risk)
46+
if len(risk.shape) == 1:
47+
return _top_slice_one_dim(risk, fraction)
48+
mask = _top_slice_one_dim(risk.ravel(), fraction)
49+
return _np.reshape(mask, risk.shape)

open_cp/naive.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
"""
2+
naive
3+
~~~~~
4+
5+
Implements some very "naive" prediction techniques, mainly for baseline
6+
comparisons.
7+
"""
8+
9+
from . import predictors
10+
import numpy as _np
11+
12+
class CountingGridKernel(predictors.DataTrainer):
13+
"""Makes "predictions" by simply laying down a grid, and then counting the
14+
number of events in each grid cell to generate a relative risk.
15+
16+
:param grid_size: The width and height of each grid cell.
17+
:param region: Optionally, the :class:`RectangularRegion` to base the grid
18+
on. If not specified, this will be the bounding box of the data.
19+
"""
20+
def __init__(self, grid_size, region = None):
21+
self.grid_size = grid_size
22+
self.region = region
23+
24+
def predict(self):
25+
if self.region is None:
26+
region = self.data.bounding_box
27+
xsize, ysize = region.grid_size(self.grid_size)
28+
29+
matrix = _np.zeros((ysize, xsize))
30+
xg = _np.floor((self.data.xcoords - region.xmin) / self.grid_size).astype(_np.int)
31+
yg = _np.floor((self.data.ycoords - region.ymin) / self.grid_size).astype(_np.int)
32+
for x, y in zip(xg, yg):
33+
matrix[y][x] += 1
34+
35+
return predictors.GridPredictionArray(self.grid_size, self.grid_size,
36+
matrix, region.xmin, region.ymin)

open_cp/predictors.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,7 @@ def __init__(self, xsize, ysize, xoffset = 0, yoffset = 0):
5353

5454
def risk(self, x, y):
5555
"""The risk at coordinate `(x,y)`."""
56-
xx = x - self._xoffset
57-
yy = y - self._yoffset
58-
return self.grid_risk(_floor(xx / self._xsize), _floor(yy / self._ysize))
56+
return self.grid_risk(*self.grid_coord(x, y))
5957

6058
def grid_risk(self, gridx, gridy):
6159
raise NotImplementedError()
@@ -69,6 +67,29 @@ def xsize(self):
6967
def ysize(self):
7068
"""The height of each cell"""
7169
return self._ysize
70+
71+
@property
72+
def xoffset(self):
73+
"""The x coordinate of the left side of the grid."""
74+
return self._xoffset
75+
76+
@property
77+
def yoffset(self):
78+
"""The y coordinate of the bottom side of the grid."""
79+
return self._yoffset
80+
81+
def grid_coord(self, x, y):
82+
"""Where does the point fall in the grid.
83+
84+
:param x: x coord
85+
:param y: y coord
86+
87+
:return: `(gridx, gridy)` coordinates in the grid where this point
88+
falls.
89+
"""
90+
xx = x - self._xoffset
91+
yy = y - self._yoffset
92+
return (_floor(xx / self._xsize), _floor(yy / self._ysize))
7293

7394

7495
class GridPredictionArray(GridPrediction):

0 commit comments

Comments
 (0)