Skip to content

Commit 6846d98

Browse files
committed
gazetteer: move main tag filtering into processing function
Filtering of main tags (i.e. handling of named-only tags or fallback tags) must happen before the actual output because the filtered list is already needed when computing the list of classes to keep. Currently the function that creates the class list does its own filtering and is inconsisten with what the output function does.
1 parent 550b2c6 commit 6846d98

5 files changed

Lines changed: 175 additions & 158 deletions

File tree

src/gazetteer-style.cpp

Lines changed: 95 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -9,28 +9,33 @@
99
#include "pgsql.hpp"
1010
#include "wkb.hpp"
1111

12+
namespace {
13+
1214
enum : int
1315
{
1416
MAX_ADMINLEVEL = 15
1517
};
1618

17-
static std::vector<osmium::Tag const *>
18-
domain_names(char const *cls, osmium::TagList const &tags)
19+
class DomainMatcher
1920
{
20-
std::vector<osmium::Tag const *> ret;
21-
22-
std::string const prefix = cls + std::string(":name");
23-
auto plen = prefix.length();
21+
public:
22+
DomainMatcher(char const *cls) : m_domain(cls), m_len(strlen(cls)) {}
2423

25-
for (auto const &item : tags) {
26-
char const *k = item.key();
27-
if (prefix.compare(0, plen, k) == 0 &&
28-
(k[plen] == '\0' || k[plen] == ':')) {
29-
ret.push_back(&item);
24+
char const *operator()(osmium::Tag const &t) const noexcept
25+
{
26+
if (strncmp(t.key(), m_domain, m_len) == 0 &&
27+
strncmp(t.key() + m_len, ":name", 5) == 0 &&
28+
(t.key()[m_len + 5] == '\0' || t.key()[m_len + 5] == ':')) {
29+
return t.key() + m_len + 6;
3030
}
31+
32+
return nullptr;
3133
}
3234

33-
return ret;
35+
private:
36+
char const *m_domain;
37+
size_t m_len;
38+
};
3439
}
3540

3641
namespace pt = boost::property_tree;
@@ -59,18 +64,14 @@ void gazetteer_style_t::clear()
5964
m_address.clear();
6065
m_operator = nullptr;
6166
m_admin_level = MAX_ADMINLEVEL;
62-
m_is_named = false;
6367
}
6468

6569
std::string gazetteer_style_t::class_list() const
6670
{
6771
fmt::memory_buffer buf;
6872

6973
for (auto const &m : m_main) {
70-
// XXX should handle SF_MAIN_NAMED_KEY as well
71-
if (!(std::get<2>(m) & SF_MAIN_NAMED) || !m_names.empty()) {
72-
fmt::format_to(buf, FMT_STRING("'{}',"), std::get<0>(m));
73-
}
74+
fmt::format_to(buf, FMT_STRING("'{}',"), std::get<0>(m));
7475
}
7576

7677
if (buf.size() > 0) {
@@ -284,14 +285,15 @@ void gazetteer_style_t::process_tags(osmium::OSMObject const &o)
284285
{
285286
clear();
286287

287-
char const *postcode = nullptr;
288-
char const *country = nullptr;
288+
bool has_postcode = false;
289+
bool has_country = false;
289290
char const *place = nullptr;
290291
flag_t place_flag;
291292
bool address_point = false;
292293
bool interpolation = false;
293294
bool admin_boundary = false;
294295
bool postcode_fallback = false;
296+
bool is_named = false;
295297

296298
for (auto const &item : o.tags()) {
297299
char const *k = item.key();
@@ -330,7 +332,7 @@ void gazetteer_style_t::process_tags(osmium::OSMObject const &o)
330332
if (flag & (SF_NAME | SF_REF)) {
331333
m_names.emplace_back(k, v);
332334
if (flag & SF_NAME) {
333-
m_is_named = true;
335+
is_named = true;
334336
}
335337
}
336338

@@ -344,39 +346,31 @@ void gazetteer_style_t::process_tags(osmium::OSMObject const &o)
344346
addr_key = k;
345347
}
346348

347-
if (strcmp(addr_key, "postcode") == 0) {
348-
if (!postcode) {
349-
postcode = v;
350-
}
351-
} else if (strcmp(addr_key, "country") == 0) {
352-
if (!country && strlen(v) == 2) {
353-
country = v;
354-
}
355-
} else {
356-
bool first = std::none_of(
357-
m_address.begin(), m_address.end(), [&](ptag_t const &t) {
358-
return strcmp(t.first, addr_key) == 0;
359-
});
360-
if (first) {
361-
m_address.emplace_back(addr_key, v);
362-
}
349+
bool first = std::none_of(m_address.begin(), m_address.end(),
350+
[&](ptag_t const &t) {
351+
return strcmp(t.first, addr_key) == 0;
352+
});
353+
if (first) {
354+
m_address.emplace_back(addr_key, v);
363355
}
364356
}
365357

366358
if (flag & SF_ADDRESS_POINT) {
367359
address_point = true;
368-
m_is_named = true;
360+
is_named = true;
369361
}
370362

371-
if ((flag & SF_POSTCODE) && !postcode) {
372-
postcode = v;
363+
if ((flag & SF_POSTCODE) && !has_postcode) {
364+
has_postcode = true;
365+
m_address.emplace_back("postcode", v);
373366
if (flag & SF_MAIN_FALLBACK) {
374367
postcode_fallback = true;
375368
}
376369
}
377370

378-
if ((flag & SF_COUNTRY) && !country && std::strlen(v) == 2) {
379-
country = v;
371+
if ((flag & SF_COUNTRY) && !has_country && std::strlen(v) == 2) {
372+
has_country = true;
373+
m_address.emplace_back("country", v);
380374
}
381375

382376
if (flag & SF_EXTRA) {
@@ -389,12 +383,6 @@ void gazetteer_style_t::process_tags(osmium::OSMObject const &o)
389383
}
390384
}
391385

392-
if (postcode) {
393-
m_address.emplace_back("postcode", postcode);
394-
}
395-
if (country) {
396-
m_address.emplace_back("country", country);
397-
}
398386
if (place) {
399387
if (interpolation || (admin_boundary && strncmp(place, "isl", 3) !=
400388
0)) { // island or islet
@@ -403,56 +391,75 @@ void gazetteer_style_t::process_tags(osmium::OSMObject const &o)
403391
m_main.emplace_back("place", place, place_flag);
404392
}
405393
}
406-
if (address_point) {
407-
m_main.emplace_back("place", "house", SF_MAIN | SF_MAIN_FALLBACK);
408-
} else if (postcode_fallback && postcode) {
409-
m_main.emplace_back("place", "postcode", SF_MAIN | SF_MAIN_FALLBACK);
394+
395+
filter_main_tags(is_named, o.tags());
396+
397+
if (m_main.empty()) {
398+
if (address_point) {
399+
m_main.emplace_back("place", "house", SF_MAIN | SF_MAIN_FALLBACK);
400+
} else if (postcode_fallback && has_postcode) {
401+
m_main.emplace_back("place", "postcode",
402+
SF_MAIN | SF_MAIN_FALLBACK);
403+
}
410404
}
411405
}
412406

413-
bool gazetteer_style_t::copy_out(osmium::OSMObject const &o,
414-
std::string const &geom, copy_mgr_t &buffer)
407+
void gazetteer_style_t::filter_main_tags(bool is_named,
408+
osmium::TagList const &tags)
415409
{
416-
bool any = false;
417-
for (auto const &main : m_main) {
418-
if (!(std::get<2>(main) & SF_MAIN_FALLBACK)) {
419-
any |= copy_out_maintag(main, o, geom, buffer);
420-
}
421-
}
410+
// first throw away unnamed mains
411+
auto mend =
412+
std::remove_if(m_main.begin(), m_main.end(), [&](pmaintag_t const &t) {
413+
auto flags = std::get<2>(t);
414+
415+
if (flags & SF_MAIN_NAMED) {
416+
return !is_named;
417+
}
422418

423-
if (any) {
424-
return true;
419+
if (flags & SF_MAIN_NAMED_KEY) {
420+
return !std::any_of(tags.begin(), tags.end(),
421+
DomainMatcher(std::get<0>(t)));
422+
}
423+
424+
return false;
425+
});
426+
427+
// any non-fallback mains left?
428+
bool has_primary =
429+
std::any_of(m_main.begin(), mend, [](pmaintag_t const &t) {
430+
return !(std::get<2>(t) & SF_MAIN_FALLBACK);
431+
});
432+
433+
if (has_primary) {
434+
// remove all fallbacks
435+
mend = std::remove_if(m_main.begin(), mend, [&](pmaintag_t const &t) {
436+
return (std::get<2>(t) & SF_MAIN_FALLBACK);
437+
});
438+
m_main.erase(mend, m_main.end());
439+
} else if (mend == m_main.begin()) {
440+
m_main.clear();
441+
} else {
442+
// remove everything except the first entry
443+
m_main.resize(1);
425444
}
445+
}
426446

447+
bool gazetteer_style_t::copy_out(osmium::OSMObject const &o,
448+
std::string const &geom, copy_mgr_t &buffer)
449+
{
427450
for (auto const &main : m_main) {
428-
if ((std::get<2>(main) & SF_MAIN_FALLBACK) &&
429-
copy_out_maintag(main, o, geom, buffer)) {
430-
return true;
431-
}
451+
copy_out_maintag(main, o, geom, buffer);
432452
}
433453

434-
return false;
454+
return !m_main.empty();
435455
}
436456

437-
bool gazetteer_style_t::copy_out_maintag(pmaintag_t const &tag,
457+
void gazetteer_style_t::copy_out_maintag(pmaintag_t const &tag,
438458
osmium::OSMObject const &o,
439459
std::string const &geom,
440460
copy_mgr_t &buffer)
441461
{
442-
std::vector<osmium::Tag const *> domain_name;
443-
if (std::get<2>(tag) & SF_MAIN_NAMED_KEY) {
444-
domain_name = domain_names(std::get<0>(tag), o.tags());
445-
if (domain_name.empty()) {
446-
return false;
447-
}
448-
}
449-
450-
if (std::get<2>(tag) & SF_MAIN_NAMED) {
451-
if (domain_name.empty() && !m_is_named) {
452-
return false;
453-
}
454-
}
455-
462+
buffer.new_line();
456463
// osm_id
457464
buffer.add_column(o.id());
458465
// osm_type
@@ -464,11 +471,14 @@ bool gazetteer_style_t::copy_out_maintag(pmaintag_t const &tag,
464471
// type
465472
buffer.add_column(std::get<1>(tag));
466473
// names
467-
if (!domain_name.empty()) {
468-
auto prefix_len = strlen(std::get<0>(tag)) + 1; // class name and ':'
474+
if (std::get<2>(tag) & SF_MAIN_NAMED_KEY) {
475+
DomainMatcher m(std::get<0>(tag));
469476
buffer.new_hash();
470-
for (auto *t : domain_name) {
471-
buffer.add_hash_elem(t->key() + prefix_len, t->value());
477+
for (auto const &t : o.tags()) {
478+
char const *k = m(t);
479+
if (k) {
480+
buffer.add_hash_elem(k, t.value());
481+
}
472482
}
473483
buffer.finish_hash();
474484
} else {
@@ -552,6 +562,4 @@ bool gazetteer_style_t::copy_out_maintag(pmaintag_t const &tag,
552562
buffer.add_hex_geom(geom);
553563

554564
buffer.finish_line();
555-
556-
return true;
557565
}

src/gazetteer-style.hpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,22 @@ class db_deleter_place_t
5353
std::vector<item_t> m_deletables;
5454
};
5555

56+
class gazetteer_copy_mgr_t : public db_copy_mgr_t<db_deleter_place_t>
57+
{
58+
public:
59+
gazetteer_copy_mgr_t(std::shared_ptr<db_copy_thread_t> const &processor)
60+
: db_copy_mgr_t<db_deleter_place_t>(processor),
61+
m_table(std::make_shared<db_target_descr_t>("place", "place_id"))
62+
{}
63+
64+
using db_copy_mgr_t<db_deleter_place_t>::new_line;
65+
66+
void new_line() { new_line(m_table); }
67+
68+
private:
69+
std::shared_ptr<db_target_descr_t> m_table;
70+
};
71+
5672
class gazetteer_style_t
5773
{
5874
using flag_t = uint16_t;
@@ -100,7 +116,7 @@ class gazetteer_style_t
100116
using flag_list_t = std::vector<string_with_flag_t>;
101117

102118
public:
103-
using copy_mgr_t = db_copy_mgr_t<db_deleter_place_t>;
119+
using copy_mgr_t = gazetteer_copy_mgr_t;
104120

105121
void load_style(std::string const &filename);
106122
void process_tags(osmium::OSMObject const &o);
@@ -116,8 +132,9 @@ class gazetteer_style_t
116132
flag_t flags);
117133
flag_t parse_flags(std::string const &str);
118134
flag_t find_flag(char const *k, char const *v) const;
135+
void filter_main_tags(bool is_named, osmium::TagList const &tags);
119136

120-
bool copy_out_maintag(pmaintag_t const &tag, osmium::OSMObject const &o,
137+
void copy_out_maintag(pmaintag_t const &tag, osmium::OSMObject const &o,
121138
std::string const &geom, copy_mgr_t &buffer);
122139
void clear();
123140

@@ -140,8 +157,6 @@ class gazetteer_style_t
140157
char const *m_operator;
141158
/// admin level
142159
int m_admin_level;
143-
/// True if there is an actual name to the object (not a ref).
144-
bool m_is_named;
145160

146161
/// which metadata fields of the OSM objects should be written to the output
147162
osmium::metadata_options m_metadata_fields{"none"};

src/output-gazetteer.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@
1313
#include <iostream>
1414
#include <memory>
1515

16-
static auto place_table =
17-
std::make_shared<db_target_descr_t>("place", "place_id");
18-
1916
void output_gazetteer_t::delete_unused_classes(char osm_type, osmid_t osm_id)
2017
{
2118
if (!m_options.append) {
@@ -76,7 +73,7 @@ void output_gazetteer_t::commit() { m_copy.sync(); }
7673

7774
int output_gazetteer_t::process_node(osmium::Node const &node)
7875
{
79-
m_copy.new_line(place_table);
76+
m_copy.new_line();
8077
m_style.process_tags(node);
8178
delete_unused_classes('N', node.id());
8279

@@ -93,7 +90,7 @@ int output_gazetteer_t::process_node(osmium::Node const &node)
9390

9491
int output_gazetteer_t::process_way(osmium::Way *way)
9592
{
96-
m_copy.new_line(place_table);
93+
m_copy.new_line();
9794
m_style.process_tags(*way);
9895
delete_unused_classes('W', way->id());
9996

@@ -127,7 +124,7 @@ int output_gazetteer_t::process_way(osmium::Way *way)
127124

128125
int output_gazetteer_t::process_relation(osmium::Relation const &rel)
129126
{
130-
m_copy.new_line(place_table);
127+
m_copy.new_line();
131128

132129
auto const &tags = rel.tags();
133130
char const *type = tags["type"];

src/output-gazetteer.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ class output_gazetteer_t : public output_t
115115
}
116116
}
117117

118-
db_copy_mgr_t<db_deleter_place_t> m_copy;
118+
gazetteer_copy_mgr_t m_copy;
119119
gazetteer_style_t m_style;
120120

121121
geom::osmium_builder_t m_builder;

0 commit comments

Comments
 (0)