@@ -45,25 +45,25 @@ int main(int argc, char *argv[])
4545 po::options_description generic (" Geocoder NLP importer options" );
4646 generic.add_options ()(" help,h" , " Help message" )(" version,v" , " Data format version" );
4747 generic.add_options ()(" poly,p" , po::value<std::string>(&polyjson),
48- " Boundary of the imported region in GeoJSON format" );
48+ " Boundary of the imported region in GeoJSON format" );
4949 generic.add_options ()(" postal-country" , po::value<std::string>(&postal_country_parser),
50- " libpostal country preference for this database" );
50+ " libpostal country preference for this database" );
5151 generic.add_options ()(
52- " postal-address" , po::value<std::string>(&postal_address_parser_dir),
53- " libpostal address parser directory. If not specified, global libpostal parser directory "
54- " preference is used." );
52+ " postal-address" , po::value<std::string>(&postal_address_parser_dir),
53+ " libpostal address parser directory. If not specified, global libpostal parser directory "
54+ " preference is used." );
5555 generic.add_options ()(
56- " priority" , po::value<std::string>(&type_priority_list),
57- " File with OSM tags that are kept even if there is no name associated with the location" );
56+ " priority" , po::value<std::string>(&type_priority_list),
57+ " File with OSM tags that are kept even if there is no name associated with the location" );
5858 generic.add_options ()(
59- " skip" , po::value<std::string>(&type_skip_list),
60- " File with OSM tags for locations that should be dropped even if there is a name "
61- " associated with the location" );
59+ " skip" , po::value<std::string>(&type_skip_list),
60+ " File with OSM tags for locations that should be dropped even if there is a name "
61+ " associated with the location" );
6262 generic.add_options ()(" verbose" , " Verbose address expansion" );
6363
6464 po::options_description hidden (" Hidden options" );
6565 hidden.add_options ()(" output-directory" , po::value<std::string>(&database_path),
66- " Output directory for imported database" );
66+ " Output directory for imported database" );
6767
6868 po::positional_options_description p;
6969 p.add (" output-directory" , 1 );
@@ -74,38 +74,38 @@ int main(int argc, char *argv[])
7474 po::variables_map vm;
7575 try
7676 {
77- po::store (po::command_line_parser (argc, argv).options (cmdline_options).positional (p).run (),
78- vm);
79- po::notify (vm);
77+ po::store (po::command_line_parser (argc, argv).options (cmdline_options).positional (p).run (),
78+ vm);
79+ po::notify (vm);
8080 }
8181 catch (std::exception &e)
8282 {
83- std::cerr << " Error while parsing options: " << e.what () << " \n\n " ;
84- std::cerr << generic << " \n " ;
83+ std::cerr << " Error while parsing options: " << e.what () << " \n\n " ;
84+ std::cerr << generic << " \n " ;
8585 }
8686
8787 if (vm.count (" help" ))
8888 {
89- std::cout << " Geocoder NLP importer:\n\n "
90- << " Call as\n\n " << argv[0 ] << " <options> output-directory\n "
91- << " \n where output-directory is a directory for imported database.\n\n "
92- << generic << " \n " ;
93- return 0 ;
89+ std::cout << " Geocoder NLP importer:\n\n "
90+ << " Call as\n\n " << argv[0 ] << " <options> output-directory\n "
91+ << " \n where output-directory is a directory for imported database.\n\n "
92+ << generic << " \n " ;
93+ return 0 ;
9494 }
9595
9696 if (vm.count ((" version" )))
9797 {
98- std::cout << GeoNLP::Geocoder::version << " \n " ;
99- return 0 ;
98+ std::cout << GeoNLP::Geocoder::version << " \n " ;
99+ return 0 ;
100100 }
101101
102102 if (vm.count (" verbose" ))
103103 verbose_address_expansion = true ;
104104
105105 if (!vm.count (" poly" ))
106106 {
107- std::cerr << " Boundary of the imported region in GeoJSON format is missing\n " ;
108- return -1 ;
107+ std::cerr << " Boundary of the imported region in GeoJSON format is missing\n " ;
108+ return -1 ;
109109 }
110110 }
111111
@@ -137,7 +137,7 @@ int main(int argc, char *argv[])
137137 else
138138 {
139139 std::cout << " Please specify PostgreSQL connection string using environment variable "
140- << GEOCODER_IMPORTER_POSTGRES << " \n " ;
140+ << GEOCODER_IMPORTER_POSTGRES << " \n " ;
141141 return 0 ;
142142 }
143143
@@ -148,48 +148,48 @@ int main(int argc, char *argv[])
148148
149149 const std::string base_query
150150 = " select place_id, linked_place_id, parent_place_id, country_code, class, type, "
151- " hstore_to_json(name) as name, hstore_to_json(extratags) as extra, "
152- " COALESCE(address->'housenumber',housenumber) AS housenumber, postcode, ST_X(centroid) as "
153- " longitude, ST_Y(centroid) as latitude, osm_id "
154- " from placex " ;
151+ " hstore_to_json(name) as name, hstore_to_json(extratags) as extra, "
152+ " COALESCE(address->'housenumber',housenumber) AS housenumber, postcode, ST_X(centroid) as "
153+ " longitude, ST_Y(centroid) as latitude, osm_id "
154+ " from placex " ;
155155
156156 // load primary hierarchy
157157 {
158158 pqxx::result r = txn.exec_params (
159- base_query
160- + " where linked_place_id IS NULL and ST_Intersects(ST_GeomFromGeoJSON($1), "
161- " geometry) order by admin_level" ,
162- border);
159+ base_query
160+ + " where linked_place_id IS NULL and ST_Intersects(ST_GeomFromGeoJSON($1), "
161+ " geometry) order by admin_level" ,
162+ border);
163163 size_t count = 0 ;
164164 for (const pqxx::row &row : r)
165165 {
166- ++count;
167- std::shared_ptr<HierarchyItem> item = std::make_shared<HierarchyItem>(row);
168- hierarchy.add_item (item);
169- if (count % printout_step == 0 )
170- std::cout << " Imported records: " << count
171- << " ; Root elements: " << hierarchy.get_root_count ()
172- << " ; Missing parents: " << hierarchy.get_missing_count () << std::endl;
166+ ++count;
167+ std::shared_ptr<HierarchyItem> item = std::make_shared<HierarchyItem>(row);
168+ hierarchy.add_item (item);
169+ if (count % printout_step == 0 )
170+ std::cout << " Imported records: " << count
171+ << " ; Root elements: " << hierarchy.get_root_count ()
172+ << " ; Missing parents: " << hierarchy.get_missing_count () << std::endl;
173173 }
174174 }
175175
176176 // load all linked places and merge with the primary ones
177177 {
178178 pqxx::result r = txn.exec_params (
179- base_query
180- + " where linked_place_id IS NOT NULL and ST_Intersects(ST_GeomFromGeoJSON($1), "
181- " geometry) order by admin_level" ,
182- border);
179+ base_query
180+ + " where linked_place_id IS NOT NULL and ST_Intersects(ST_GeomFromGeoJSON($1), "
181+ " geometry) order by admin_level" ,
182+ border);
183183 size_t count = 0 ;
184184 for (const pqxx::row &row : r)
185185 {
186- ++count;
187- std::shared_ptr<HierarchyItem> item = std::make_shared<HierarchyItem>(row);
188- hierarchy.add_linked_item (item);
189- if (count % printout_step == 0 )
190- std::cout << " Imported linked records: " << count
191- << " ; Root elements: " << hierarchy.get_root_count ()
192- << " ; Missing parents: " << hierarchy.get_missing_count () << std::endl;
186+ ++count;
187+ std::shared_ptr<HierarchyItem> item = std::make_shared<HierarchyItem>(row);
188+ hierarchy.add_linked_item (item);
189+ if (count % printout_step == 0 )
190+ std::cout << " Imported linked records: " << count
191+ << " ; Root elements: " << hierarchy.get_root_count ()
192+ << " ; Missing parents: " << hierarchy.get_missing_count () << std::endl;
193193 }
194194 }
195195
@@ -200,17 +200,17 @@ int main(int argc, char *argv[])
200200 pqxx::result r = txn.exec_params (base_query + " where place_id=$1" , parent);
201201 bool found = false ;
202202 for (auto row : r)
203- {
204- std::shared_ptr<HierarchyItem> item = std::make_shared<HierarchyItem>(row);
205- hierarchy.add_item (item);
206- found = true ;
207- }
203+ {
204+ std::shared_ptr<HierarchyItem> item = std::make_shared<HierarchyItem>(row);
205+ hierarchy.add_item (item);
206+ found = true ;
207+ }
208208
209209 if (!found)
210- {
211- std::cerr << " Missing parent with ID " << parent << " . Stopping import\n " ;
212- return -1 ;
213- }
210+ {
211+ std::cerr << " Missing parent with ID " << parent << " . Stopping import\n " ;
212+ return -1 ;
213+ }
214214
215215 parent = hierarchy.get_next_nonzero_root_parent ();
216216 }
@@ -221,20 +221,20 @@ int main(int argc, char *argv[])
221221
222222 // find missing countries and move root nodes under them if possible
223223 std::cout << " Try to fill missing parents through countries. Root size: "
224- << hierarchy.get_root_count () << " \n " ;
224+ << hierarchy.get_root_count () << " \n " ;
225225 for (std::string country : hierarchy.get_root_countries ())
226226 {
227227 for (auto row : txn.exec_params (
228- base_query + " where rank_address = 4 and country_code = $1 limit 1" , country))
229- {
230- hindex id = row[" place_id" ].as <hindex>(0 );
231- if (!hierarchy.has_item (id))
232- {
233- std::shared_ptr<HierarchyItem> item = std::make_shared<HierarchyItem>(row);
234- hierarchy.add_item (item);
235- }
236- hierarchy.set_country (country, id);
237- }
228+ base_query + " where rank_address = 4 and country_code = $1 limit 1" , country))
229+ {
230+ hindex id = row[" place_id" ].as <hindex>(0 );
231+ if (!hierarchy.has_item (id))
232+ {
233+ std::shared_ptr<HierarchyItem> item = std::make_shared<HierarchyItem>(row);
234+ hierarchy.add_item (item);
235+ }
236+ hierarchy.set_country (country, id);
237+ }
238238 }
239239
240240 hierarchy.finalize ();
@@ -262,14 +262,14 @@ int main(int argc, char *argv[])
262262 db.execute (" DROP TABLE IF EXISTS object_primary_rtree" );
263263
264264 db.execute (" CREATE " TEMPORARY " TABLE object_primary_tmp ("
265- " id INTEGER PRIMARY KEY AUTOINCREMENT, postgres_id INTEGER, name TEXT, name_extra "
266- " TEXT, name_en TEXT, phone TEXT, postal_code TEXT, website TEXT, parent INTEGER, "
267- " latitude REAL, longitude REAL)" );
265+ " id INTEGER PRIMARY KEY AUTOINCREMENT, postgres_id INTEGER, name TEXT, name_extra "
266+ " TEXT, name_en TEXT, phone TEXT, postal_code TEXT, website TEXT, parent INTEGER, "
267+ " latitude REAL, longitude REAL)" );
268268 db.execute (" CREATE " TEMPORARY " TABLE object_type_tmp (prim_id INTEGER, type TEXT NOT NULL, "
269- " FOREIGN KEY (prim_id) REFERENCES objects_primary_tmp(id))" );
269+ " FOREIGN KEY (prim_id) REFERENCES objects_primary_tmp(id))" );
270270 db.execute (" CREATE TABLE hierarchy (prim_id INTEGER PRIMARY KEY, last_subobject INTEGER, "
271- " FOREIGN KEY (prim_id) REFERENCES objects_primary(id), FOREIGN KEY (last_subobject) "
272- " REFERENCES objects_primary(id))" );
271+ " FOREIGN KEY (prim_id) REFERENCES objects_primary(id), FOREIGN KEY (last_subobject) "
272+ " REFERENCES objects_primary(id))" );
273273
274274 std::cout << " Preliminary filling of the database" << std::endl;
275275 hierarchy.write (db);
@@ -283,29 +283,29 @@ int main(int argc, char *argv[])
283283 db.execute (" CREATE TABLE type (id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT)" );
284284 db.execute (" INSERT INTO type (name) SELECT DISTINCT type FROM object_type_tmp" );
285285 db.execute (" CREATE " TEMPORARY
286- " TABLE object_primary_tmp2 (id INTEGER PRIMARY KEY AUTOINCREMENT, "
287- " name TEXT, name_extra TEXT, name_en TEXT, phone TEXT, postal_code TEXT, website "
288- " TEXT, parent INTEGER, type_id INTEGER, latitude REAL, longitude REAL, boxstr TEXT, "
289- " FOREIGN KEY (type_id) REFERENCES type(id))" );
286+ " TABLE object_primary_tmp2 (id INTEGER PRIMARY KEY AUTOINCREMENT, "
287+ " name TEXT, name_extra TEXT, name_en TEXT, phone TEXT, postal_code TEXT, website "
288+ " TEXT, parent INTEGER, type_id INTEGER, latitude REAL, longitude REAL, boxstr TEXT, "
289+ " FOREIGN KEY (type_id) REFERENCES type(id))" );
290290
291291 db.execute (" INSERT INTO object_primary_tmp2 (id, name, name_extra, name_en, phone, postal_code, "
292- " website, parent, type_id, latitude, longitude, boxstr) "
293- " SELECT p.id, p.name, p.name_extra, p.name_en, p.phone, p.postal_code, p.website, "
294- " p.parent, type.id, p.latitude, p.longitude, "
295- // LINE BELOW DETERMINES ROUNDING USED FOR BOXES
296- " CAST(CAST(p.latitude*100 AS INTEGER) AS TEXT) || ',' || CAST(CAST(p.longitude*100 AS "
297- " INTEGER) AS TEXT) "
298- " FROM object_primary_tmp p JOIN object_type_tmp tt ON p.id=tt.prim_id "
299- " JOIN type ON tt.type=type.name" );
292+ " website, parent, type_id, latitude, longitude, boxstr) "
293+ " SELECT p.id, p.name, p.name_extra, p.name_en, p.phone, p.postal_code, p.website, "
294+ " p.parent, type.id, p.latitude, p.longitude, "
295+ // LINE BELOW DETERMINES ROUNDING USED FOR BOXES
296+ " CAST(CAST(p.latitude*100 AS INTEGER) AS TEXT) || ',' || CAST(CAST(p.longitude*100 AS "
297+ " INTEGER) AS TEXT) "
298+ " FROM object_primary_tmp p JOIN object_type_tmp tt ON p.id=tt.prim_id "
299+ " JOIN type ON tt.type=type.name" );
300300
301301 db.execute (" CREATE " TEMPORARY " TABLE boxids (id INTEGER PRIMARY KEY AUTOINCREMENT, boxstr "
302- " TEXT, CONSTRAINT struni UNIQUE (boxstr))" );
302+ " TEXT, CONSTRAINT struni UNIQUE (boxstr))" );
303303 db.execute (" INSERT INTO boxids (boxstr) SELECT DISTINCT boxstr FROM object_primary_tmp2" );
304304
305305 db.execute (" CREATE TABLE object_primary (id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT, "
306- " name_extra TEXT, name_en TEXT, phone TEXT, postal_code TEXT, website TEXT, "
307- " parent INTEGER, type_id INTEGER, latitude REAL, longitude REAL, box_id INTEGER, "
308- " FOREIGN KEY (type_id) REFERENCES type(id))" );
306+ " name_extra TEXT, name_en TEXT, phone TEXT, postal_code TEXT, website TEXT, "
307+ " parent INTEGER, type_id INTEGER, latitude REAL, longitude REAL, box_id INTEGER, "
308+ " FOREIGN KEY (type_id) REFERENCES type(id))" );
309309 db.execute (
310310 " INSERT INTO object_primary (id, name, name_extra, name_en, phone, postal_code, website, "
311311 " parent, type_id, latitude, longitude, box_id) "
@@ -328,8 +328,8 @@ int main(int argc, char *argv[])
328328 db.execute (
329329 " CREATE VIRTUAL TABLE object_primary_rtree USING rtree(id, minLat, maxLat, minLon, maxLon)" );
330330 db.execute (" INSERT INTO object_primary_rtree (id, minLat, maxLat, minLon, maxLon) "
331- " SELECT box_id, min(latitude), max(latitude), min(longitude), max(longitude) from "
332- " object_primary group by box_id" );
331+ " SELECT box_id, min(latitude), max(latitude), min(longitude), max(longitude) from "
332+ " object_primary group by box_id" );
333333
334334 // Stats view
335335 db.execute (" DROP VIEW IF EXISTS type_stats" );
@@ -341,10 +341,10 @@ int main(int argc, char *argv[])
341341 sqlite3pp::query qry (db, " SELECT type_name, cnt FROM type_stats ORDER BY cnt DESC LIMIT 25" );
342342 for (auto v : qry)
343343 {
344- std::string name;
345- int cnt;
346- v.getter () >> name >> cnt;
347- std::cout << " " << name << " \t " << cnt << " \n " ;
344+ std::string name;
345+ int cnt;
346+ v.getter () >> name >> cnt;
347+ std::cout << " " << name << " \t " << cnt << " \n " ;
348348 }
349349 }
350350 // Recording version
@@ -363,7 +363,7 @@ int main(int argc, char *argv[])
363363 {
364364 std::cout << " Recording postal parser country preference: " << postal_country_parser << " \n " ;
365365 std::string cmd = " INSERT INTO meta (key, value) VALUES (\" postal:country:parser\" , \" "
366- + postal_country_parser + " \" )" ;
366+ + postal_country_parser + " \" )" ;
367367 db.execute (cmd.c_str ());
368368 }
369369
0 commit comments