Skip to content

Commit dc44a2c

Browse files
committed
changes to indexing
1 parent 9907b79 commit dc44a2c

4 files changed

Lines changed: 177 additions & 69 deletions

File tree

gazetteer/gazetteer-functions.sql

Lines changed: 108 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ DECLARE
2626
NEWgeometry geometry;
2727
BEGIN
2828
NEWgeometry := place;
29-
IF ST_IsEmpty(NEWgeometry) OR NOT ST_IsValid(NEWgeometry) OR ST_X(ST_Centroid(NEWgeometry))::text in ('NaN','Infinity','-Infinity') OR ST_Y(ST_Centroid(NEWgeometry))::text in ('NaN','Infinity','-Infinity') THEN
29+
IF ST_X(ST_Centroid(NEWgeometry))::text in ('NaN','Infinity','-Infinity') OR ST_Y(ST_Centroid(NEWgeometry))::text in ('NaN','Infinity','-Infinity') THEN
3030
NEWgeometry := ST_buffer(NEWgeometry,0);
31-
IF ST_IsEmpty(NEWgeometry) OR NOT ST_IsValid(NEWgeometry) OR ST_X(ST_Centroid(NEWgeometry))::text in ('NaN','Infinity','-Infinity') OR ST_Y(ST_Centroid(NEWgeometry))::text in ('NaN','Infinity','-Infinity') THEN
31+
IF ST_X(ST_Centroid(NEWgeometry))::text in ('NaN','Infinity','-Infinity') OR ST_Y(ST_Centroid(NEWgeometry))::text in ('NaN','Infinity','-Infinity') THEN
3232
RETURN ST_SetSRID(ST_Point(0,0),4326);
3333
END IF;
3434
END IF;
@@ -60,7 +60,7 @@ CREATE OR REPLACE FUNCTION debug_geometry_sector(osmid integer, place geometry)
6060
DECLARE
6161
NEWgeometry geometry;
6262
BEGIN
63-
RAISE WARNING '%',osmid;
63+
-- RAISE WARNING '%',osmid;
6464
IF osmid = 61315 THEN
6565
return null;
6666
END IF;
@@ -1251,6 +1251,9 @@ DECLARE
12511251
BEGIN
12521252
newpoints := 0;
12531253
IF interpolationtype = 'odd' OR interpolationtype = 'even' OR interpolationtype = 'all' THEN
1254+
1255+
--RAISE WARNING 'interpolation % %',wayid,interpolationtype;
1256+
12541257
select nodes from planet_osm_ways where id = wayid INTO waynodes;
12551258
IF array_upper(waynodes, 1) IS NOT NULL THEN
12561259

@@ -1259,9 +1262,9 @@ BEGIN
12591262
FOR nodeidpos in 1..array_upper(waynodes, 1) LOOP
12601263

12611264
select * from placex where osm_type = 'N' and osm_id = waynodes[nodeidpos]::bigint limit 1 INTO nextnode;
1262-
1265+
--RAISE WARNING '%',waynodes[nodeidpos];
12631266
IF nextnode.geometry IS NULL THEN
1264-
select ST_SetSRID(ST_Point(lon,lat),4326) from planet_osm_nodes where id = waynodes[nodeidpos] INTO nextnode.geometry;
1267+
select ST_SetSRID(ST_Point(lon::float/10000000,lat::float/10000000),4326) from planet_osm_nodes where id = waynodes[nodeidpos] INTO nextnode.geometry;
12651268
END IF;
12661269

12671270
IF havefirstpoint THEN
@@ -1276,6 +1279,7 @@ BEGIN
12761279
IF startnumber != endnumber THEN
12771280

12781281
linestr := linestr || ')';
1282+
--RAISE WARNING 'linestr %',linestr;
12791283
linegeo := ST_GeomFromText(linestr,4326);
12801284
linestr := 'LINESTRING('||ST_X(nextnode.geometry)||' '||ST_Y(nextnode.geometry);
12811285
IF (startnumber > endnumber) THEN
@@ -1308,7 +1312,7 @@ BEGIN
13081312
FOR housenum IN startnumber..endnumber BY stepsize LOOP
13091313
-- this should really copy postcodes but it puts a huge burdon on the system for no big benefit
13101314
-- ideally postcodes should move up to the way
1311-
insert into placex values (null,'N',prevnode.osm_id,prevnode.class,prevnode.type,NULL,prevnode.admin_level,housenum,prevnode.street,prevnode.isin,null,prevnode.country_code,prevnode.street_place_id,prevnode.rank_address,prevnode.rank_search,false,ST_Line_Interpolate_Point(linegeo, (housenum::float-orginalstartnumber)/originalnumberrange));
1315+
insert into placex values (null,'N',prevnode.osm_id,prevnode.class,prevnode.type,NULL,prevnode.admin_level,housenum,prevnode.street,prevnode.isin,null,prevnode.country_code,prevnode.street_place_id,prevnode.rank_address,prevnode.rank_search,false,ST_Line_Interpolate_Point(linegeo, (housenum::float-orginalstartnumber::float)/originalnumberrange::float));
13121316
newpoints := newpoints + 1;
13131317
END LOOP;
13141318
END IF;
@@ -1340,7 +1344,8 @@ DECLARE
13401344
country_code VARCHAR(2);
13411345
diameter FLOAT;
13421346
BEGIN
1343-
RAISE WARNING '%',NEW;
1347+
-- RAISE WARNING '%',NEW;
1348+
-- RAISE WARNING '%',NEW.osm_id;
13441349

13451350
-- just block these
13461351
IF NEW.class = 'highway' and NEW.type in ('turning_circle','traffic_signals','mini_roundabout','noexit','crossing') THEN
@@ -1351,6 +1356,10 @@ BEGIN
13511356
END IF;
13521357

13531358
IF ST_IsEmpty(NEW.geometry) OR NOT ST_IsValid(NEW.geometry) OR ST_X(ST_Centroid(NEW.geometry))::text in ('NaN','Infinity','-Infinity') OR ST_Y(ST_Centroid(NEW.geometry))::text in ('NaN','Infinity','-Infinity') THEN
1359+
IF NEW.osm_type = 'R' THEN
1360+
-- invalid multipolygons can crash postgis, don't even bother to try!
1361+
RETURN NULL;
1362+
END IF;
13541363
NEW.geometry := ST_buffer(NEW.geometry,0);
13551364
IF ST_IsEmpty(NEW.geometry) OR NOT ST_IsValid(NEW.geometry) OR ST_X(ST_Centroid(NEW.geometry))::text in ('NaN','Infinity','-Infinity') OR ST_Y(ST_Centroid(NEW.geometry))::text in ('NaN','Infinity','-Infinity') THEN
13561365
RAISE WARNING 'Invalid geometary, rejecting: % %', NEW.osm_type, NEW.osm_id;
@@ -1518,7 +1527,7 @@ BEGIN
15181527
result := add_location(NEW.place_id,NEW.country_code,NEW.name,NEW.rank_search,NEW.rank_address,NEW.geometry);
15191528
END IF;
15201529

1521-
RETURN NEW;
1530+
--RETURN NEW;
15221531
-- The following is not needed until doing diff updates, and slows the main index process down
15231532

15241533
IF (ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_IsValid(NEW.geometry)) THEN
@@ -1549,9 +1558,9 @@ BEGIN
15491558

15501559
END IF;
15511560

1552-
IF NEW.rank_search < 18 THEN
1553-
RAISE WARNING 'placex insert end: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
1554-
END IF;
1561+
--IF NEW.rank_search < 18 THEN
1562+
-- RAISE WARNING 'placex insert end: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
1563+
--END IF;
15551564

15561565
RETURN NEW;
15571566

@@ -2095,9 +2104,9 @@ BEGIN
20952104
DELETE FROM place where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class;
20962105
END IF;
20972106

2098-
IF existing.rank_search < 26 THEN
2099-
RAISE WARNING 'existing: % % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type,existing;
2100-
END IF;
2107+
--IF existing.rank_search < 26 THEN
2108+
-- RAISE WARNING 'existing: % % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type,existing;
2109+
--END IF;
21012110

21022111
-- To paraphrase, if there isn't an existing item, OR if the admin level has changed, OR if it is a major change in geometry
21032112
IF existing IS NULL OR coalesce(existing.admin_level, 100) != coalesce(NEW.admin_level, 100)
@@ -2106,13 +2115,13 @@ END IF;
21062115
THEN
21072116

21082117
IF existing IS NOT NULL THEN
2109-
RAISE WARNING 'insert delete % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
2118+
--RAISE WARNING 'insert delete % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
21102119
DELETE FROM place where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class and type = NEW.type;
21112120
END IF;
21122121

2113-
IF existing.rank_search < 26 THEN
2114-
RAISE WARNING 'insert placex % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
2115-
END IF;
2122+
--IF existing.rank_search < 26 THEN
2123+
-- RAISE WARNING 'insert placex % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
2124+
--END IF;
21162125

21172126
-- No - process it as a new insertion (hopefully of low rank or it will be slow)
21182127
insert into placex values (NEW.place_id
@@ -2205,7 +2214,7 @@ END IF;
22052214
OR coalesce(existing.postcode, '') != coalesce(NEW.postcode, '')
22062215
OR coalesce(existing.country_code, '') != coalesce(NEW.country_code, '') THEN
22072216

2208-
IF existing.rank_search < 26 THEN
2217+
IF false and existing.rank_search < 26 THEN
22092218
IF coalesce(existing.name::text, '') != coalesce(NEW.name::text, '') THEN
22102219
RAISE WARNING 'update details, name: % % % %',NEW.osm_type,NEW.osm_id,existing.name::text,NEW.name::text;
22112220
END IF;
@@ -2479,7 +2488,7 @@ DECLARE
24792488
BEGIN
24802489
select * from place_boundingbox into result where place_id = search_place_id;
24812490
IF result IS NULL THEN
2482-
select count(*) from place_addressline where address_place_id = search_place_id into numfeatures;
2491+
select count(*) from place_addressline where address_place_id = search_place_id and isaddress = true into numfeatures;
24832492
insert into place_boundingbox select place_id,
24842493
ST_Y(ST_PointN(ExteriorRing(ST_Box2D(area)),4)),ST_Y(ST_PointN(ExteriorRing(ST_Box2D(area)),2)),
24852494
ST_X(ST_PointN(ExteriorRing(ST_Box2D(area)),1)),ST_X(ST_PointN(ExteriorRing(ST_Box2D(area)),3)),
@@ -2495,7 +2504,9 @@ BEGIN
24952504
count(*), ST_Area(ST_Buffer(ST_Convexhull(ST_Collect(geometry)),0.0001)) as area,
24962505
ST_Buffer(ST_Convexhull(ST_Collect(geometry)),0.0001) as boundary
24972506
from place_addressline join placex using (place_id)
2498-
where address_place_id = search_place_id and (st_length(geometry) < 0.01 or place_id = search_place_id)
2507+
where address_place_id = search_place_id
2508+
and (isaddress = true OR place_id = search_place_id)
2509+
and (st_length(geometry) < 0.01 or place_id = search_place_id)
24992510
group by address_place_id limit 1;
25002511
select * from place_boundingbox into result where place_id = search_place_id;
25012512
END IF;
@@ -2504,6 +2515,46 @@ END;
25042515
$$
25052516
LANGUAGE plpgsql;
25062517

2518+
-- don't do the operation if it would be slow
2519+
CREATE OR REPLACE FUNCTION get_place_boundingbox_quick(search_place_id INTEGER) RETURNS place_boundingbox
2520+
AS $$
2521+
DECLARE
2522+
result place_boundingbox;
2523+
numfeatures integer;
2524+
rank integer;
2525+
BEGIN
2526+
select * from place_boundingbox into result where place_id = search_place_id;
2527+
IF result IS NULL THEN
2528+
select count(*) from place_addressline where address_place_id = search_place_id and isaddress = true into numfeatures;
2529+
insert into place_boundingbox select place_id,
2530+
ST_Y(ST_PointN(ExteriorRing(ST_Box2D(area)),4)),ST_Y(ST_PointN(ExteriorRing(ST_Box2D(area)),2)),
2531+
ST_X(ST_PointN(ExteriorRing(ST_Box2D(area)),1)),ST_X(ST_PointN(ExteriorRing(ST_Box2D(area)),3)),
2532+
numfeatures, ST_Area(area),
2533+
area from location_area where place_id = search_place_id;
2534+
select * from place_boundingbox into result where place_id = search_place_id;
2535+
END IF;
2536+
IF result IS NULL THEN
2537+
select rank_search from placex where place_id = search_place_id into rank;
2538+
IF rank > 20 THEN
2539+
-- TODO 0.0001
2540+
insert into place_boundingbox select address_place_id,
2541+
min(ST_Y(ST_Centroid(geometry))) as minlon,max(ST_Y(ST_Centroid(geometry))) as maxlon,
2542+
min(ST_X(ST_Centroid(geometry))) as minlat,max(ST_X(ST_Centroid(geometry))) as maxlat,
2543+
count(*), ST_Area(ST_Buffer(ST_Convexhull(ST_Collect(geometry)),0.0001)) as area,
2544+
ST_Buffer(ST_Convexhull(ST_Collect(geometry)),0.0001) as boundary
2545+
from place_addressline join placex using (place_id)
2546+
where address_place_id = search_place_id
2547+
and (isaddress = true OR place_id = search_place_id)
2548+
and (st_length(geometry) < 0.01 or place_id = search_place_id)
2549+
group by address_place_id limit 1;
2550+
select * from place_boundingbox into result where place_id = search_place_id;
2551+
END IF;
2552+
END IF;
2553+
return result;
2554+
END;
2555+
$$
2556+
LANGUAGE plpgsql;
2557+
25072558
CREATE OR REPLACE FUNCTION update_place(search_place_id INTEGER) RETURNS BOOLEAN
25082559
AS $$
25092560
DECLARE
@@ -2606,3 +2657,39 @@ BEGIN
26062657
END;
26072658
$$
26082659
LANGUAGE plpgsql;
2660+
2661+
CREATE OR REPLACE FUNCTION get_word_suggestion(srcword TEXT) RETURNS TEXT
2662+
AS $$
2663+
DECLARE
2664+
trigramtoken TEXT;
2665+
result TEXT;
2666+
BEGIN
2667+
2668+
trigramtoken := regexp_replace(make_standard_name(srcword),E'([^0-9])\\1+',E'\\1','g');
2669+
SELECT word FROM word WHERE word_trigram like ' %' and word_trigram % trigramtoken ORDER BY similarity(word_trigram, trigramtoken) DESC, word limit 1 into result;
2670+
2671+
return result;
2672+
END;
2673+
$$
2674+
LANGUAGE plpgsql;
2675+
2676+
CREATE OR REPLACE FUNCTION get_word_suggestions(srcword TEXT) RETURNS TEXT[]
2677+
AS $$
2678+
DECLARE
2679+
trigramtoken TEXT;
2680+
result TEXT[];
2681+
r RECORD;
2682+
BEGIN
2683+
2684+
trigramtoken := regexp_replace(make_standard_name(srcword),E'([^0-9])\\1+',E'\\1','g');
2685+
2686+
FOR r IN SELECT word,similarity(word_trigram, trigramtoken) as score FROM word
2687+
WHERE word_trigram like ' %' and word_trigram % trigramtoken ORDER BY similarity(word_trigram, trigramtoken) DESC, word limit 4
2688+
LOOP
2689+
result[coalesce(array_upper(result,1)+1,1)] := r.word;
2690+
END LOOP;
2691+
2692+
return result;
2693+
END;
2694+
$$
2695+
LANGUAGE plpgsql;

gazetteer/gazetteer-index.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ select 'now'::timestamp;
4545
--insert into placex select null,'X',nextval,'place','postcodearea',ARRAY[ROW('name',postcodeareaname)::keyvalue],null,null,null,null,null,'gb',null,15,23,false,geometry from gb_postcode join gb_postcodearea on (substring(postcode from '^([A-Z][A-Z]?)[0-9][0-9A-Z]? [0-9]$') = postcodeareaid) where postcode ~ '^[A-Z][A-Z]?[0-9][0-9A-Z]? [0-9]$' and ST_GeometryType(geometry) = 'ST_Polygon';
4646

4747
select 'now'::timestamp;
48-
insert into placex select * from place where osm_type = 'N' order by geometry_sector(geometry);
48+
insert into placex select * from place where osm_type = 'N';-- order by geometry_sector(geometry);
4949
select 'now'::timestamp;
50-
insert into placex select * from place where osm_type = 'W' order by geometry_sector(geometry);
50+
insert into placex select * from place where osm_type = 'W';-- order by geometry_sector(geometry);
5151
select 'now'::timestamp;
52-
insert into placex select * from place where osm_type = 'R' order by geometry_sector(geometry);
52+
insert into placex select * from place where osm_type = 'R';-- order by geometry_sector(geometry);
5353
select 'now'::timestamp;
5454

5555
-- use this to do a simple index - for the full planet use 'reindex.php'

gazetteer/gazetteer-tables.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ CREATE TABLE word (
4646
SELECT AddGeometryColumn('word', 'location', 4326, 'GEOMETRY', 2);
4747
CREATE INDEX idx_word_word_id on word USING BTREE (word_id);
4848
CREATE INDEX idx_word_word_token on word USING BTREE (word_token);
49+
CREATE INDEX idx_word_trigram ON word USING gin(word_trigram gin_trgm_ops);
4950
GRANT SELECT ON word TO "www-data" ;
5051
DROP SEQUENCE seq_word;
5152
CREATE SEQUENCE seq_word start 1;
@@ -179,6 +180,10 @@ CREATE INDEX idx_placex_pendingbylatlon ON placex USING BTREE (geometry_index(ge
179180
CREATE INDEX idx_placex_street_place_id ON placex USING BTREE (street_place_id) where street_place_id IS NOT NULL;
180181
CREATE INDEX idx_placex_gb_postcodesector ON placex USING BTREE (substring(upper(postcode) from '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])[A-Z][A-Z]$'))
181182
where country_code = 'gb' and substring(upper(postcode) from '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])[A-Z][A-Z]$') is not null;
183+
184+
CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector(geometry),rank_address,osm_type,osm_id);
185+
CLUSTER placex USING idx_placex_sector;
186+
182187
DROP SEQUENCE seq_place;
183188
CREATE SEQUENCE seq_place start 1;
184189
GRANT SELECT on placex to "www-data" ;

0 commit comments

Comments
 (0)