Hi there,<br><br>hoping someone may have a pointer as to where we went wrong setting up Gazetteer & Nominatim.<br><br>we only used a UK extract, not a full planet. we used this UK extract:<br><a href="http://nick.dev.openstreetmap.org/downloads/planet/uk-091028.osm.bz2">http://nick.dev.openstreetmap.org/downloads/planet/uk-091028.osm.bz2</a><br>
<br>The indexing took 2-4 days on an absolute honking machine (200+ gb of ram, 16 processors) - but I am pretty sure we dont have postgres configured properly to take up memory and processor power (am going to have to learn that now too )<br>
<br>anyway - Here is the symptom - please compare these to <a href="http://nominatim.openstreetmap.org">http://nominatim.openstreetmap.org</a>:<br><br>The query "pubs in south benfleet" returns no results. <br>
<br>The query "pubs in benfleet" returns "<span class="name">The Belgrave Arms, Benfleet Court, Haggerston, City,
London Borough of Hackney, Greater London, United Kingdom" as the top result<br><br>The query "pubs in london" returns "</span><span class="name">The London, High Street, Weston-super-Mare, North
Somerset, United Kingdom"as the top result (in fact the only result).<br><br>Obviously something has gone wrong. At first I was thinking maybe our extract didnt really have all the data, until I realized that it seemed to be keying on the city name as the potential pub name or street name<br>
<br>Putting nominatim into debug mode produced the output at the bottom of this email.<br><br>I am still pretty sure that we dont have all the data, but still - I KNOW we DO have pubs in London in the data, so "pubs in London" should still have returned at least some pubs in London, right?<br>
<br>Thanks in advance for any advice or suggestions on where to look for what went wrong<br><br>Ant<br><br>search?q=pubs+in+benfleet<br>Nominatim Debug Output:<br><br></span>string(137) "select word_id,word_token, word, class, type,
location,country_code from word where word_token in (' pubs','pubs','
benfleet','benfleet')"
array(2) {
[0]=>
array(3) {
["string"]=>
string(4) "pubs"
["words"]=>
array(1) {
[0]=>
string(4) "pubs"
}
["wordsets"]=>
array(1) {
[0]=>
array(1) {
[0]=>
string(4) "pubs"
}
}
}
[1]=>
array(3) {
["string"]=>
string(8) "benfleet"
["words"]=>
array(1) {
[0]=>
string(8) "benfleet"
}
["wordsets"]=>
array(1) {
[0]=>
array(1) {
[0]=>
string(8) "benfleet"
}
}
}
}
array(3) {
[" benfleet"]=>
array(1) {
[0]=>
array(7) {
["word_id"]=>
string(4) "8801"
["word_token"]=>
string(9) " benfleet"
["word"]=>
string(8) "Benfleet"
["class"]=>
NULL
["type"]=>
NULL
["location"]=>
NULL
["country_code"]=>
NULL
}
}
["benfleet"]=>
array(1) {
[0]=>
array(7) {
["word_id"]=>
string(4) "8802"
["word_token"]=>
string(8) "benfleet"
["word"]=>
NULL
["class"]=>
NULL
["type"]=>
NULL
["location"]=>
NULL
["country_code"]=>
NULL
}
}
[" pubs"]=>
array(1) {
[0]=>
array(7) {
["word_id"]=>
string(6) "532099"
["word_token"]=>
string(5) " pubs"
["word"]=>
NULL
["class"]=>
string(7) "amenity"
["type"]=>
string(3) "pub"
["location"]=>
NULL
["country_code"]=>
NULL
}
}
}
array(2) {
[2]=>
array(1) {
[0]=>
array(11) {
["iSearchRank"]=>
int(2)
["iNamePhrase"]=>
int(1)
["sCountryCode"]=>
bool(false)
["aName"]=>
array(1) {
[8801]=>
string(4) "8801"
}
["aAddress"]=>
array(1) {
[8801]=>
string(4) "8801"
}
["sClass"]=>
string(7) "amenity"
["sType"]=>
string(3) "pub"
["sHouseNumber"]=>
string(0) ""
["fLat"]=>
string(0) ""
["fLon"]=>
string(0) ""
["fRadius"]=>
string(0) ""
}
}
[11]=>
array(1) {
[0]=>
array(11) {
["iSearchRank"]=>
int(11)
["iNamePhrase"]=>
int(1)
["sCountryCode"]=>
bool(false)
["aName"]=>
array(1) {
[8802]=>
string(4) "8802"
}
["aAddress"]=>
array(1) {
[8802]=>
string(4) "8802"
}
["sClass"]=>
string(7) "amenity"
["sType"]=>
string(3) "pub"
["sHouseNumber"]=>
string(0) ""
["fLat"]=>
string(0) ""
["fLon"]=>
string(0) ""
["fRadius"]=>
string(0) ""
}
}
}
<table border="1"><tbody><tr><th>rank</th><th>Name Tokens</th><th>Address
Tokens</th><th>country</th><th>class</th><th>type</th><th>house#</th><th>Lat</th><th>Lon</th><th>Radius</th></tr><tr><td>2</td><td>#
benfleet#</td><td><br></td><td><br></td><td>amenity</td><td>pub</td><td><br></td><td><br></td><td><br></td><td><br></td></tr><tr><td>11</td><td>#benfleet#</td><td><br></td><td><br></td><td>amenity</td><td>pub</td><td><br>
</td><td><br></td><td><br></td><td><br></td></tr></tbody></table>string(4)
"<hr>"
array(11) {
["iSearchRank"]=>
int(2)
["iNamePhrase"]=>
int(1)
["sCountryCode"]=>
bool(false)
["aName"]=>
array(1) {
[8801]=>
string(4) "8801"
}
["aAddress"]=>
array(1) {
[8801]=>
string(4) "8801"
}
["sClass"]=>
string(7) "amenity"
["sType"]=>
string(3) "pub"
["sHouseNumber"]=>
string(0) ""
["fLat"]=>
string(0) ""
["fLon"]=>
string(0) ""
["fRadius"]=>
string(0) ""
}
<table border="1"><tbody><tr><th>rank</th><th>Name Tokens</th><th>Address
Tokens</th><th>country</th><th>class</th><th>type</th><th>house#</th><th>Lat</th><th>Lon</th><th>Radius</th></tr><tr><td>0</td><td>#
benfleet#</td><td><br></td><td><br></td><td>amenity</td><td>pub</td><td><br></td><td><br></td><td><br></td><td><br></td></tr></tbody></table>string(523)
"select
place_id,ST_Contains(ST_SetSRID(ST_MakeBox2D(ST_Point(-225,83.14),ST_Point(225,-76.06)),4326),
centroid) as
in_small,ST_Contains(ST_SetSRID(ST_MakeBox2D(ST_Point(-675,242.34),ST_Point(675,-235.26)),4326),
centroid) as in_large from search_name where name_vector @>
ARRAY[8801] order by
ST_Contains(ST_SetSRID(ST_MakeBox2D(ST_Point(-225,83.14),ST_Point(225,-76.06)),4326),
centroid) desc,
ST_Contains(ST_SetSRID(ST_MakeBox2D(ST_Point(-675,242.34),ST_Point(675,-235.26)),4326),
centroid) desc, search_rank ASC limit 10"
string(127) "select place_id from placex where place_id in (19334,3794)
and class='amenity' and type='pub' order by rank_search asc limit 10"
string(241) "select l.place_id from placex as l,placex as f where
f.place_id in (19334,3794) and ST_DWithin(l.geometry, f.geometry, 0.01)
and l.class='amenity' and l.type='pub' order by ST_Distance(l.geometry,
f.geometry) asc, l.rank_search ASC limit 10"
array(0) {
}
string(4) "<hr>"
array(11) {
["iSearchRank"]=>
int(11)
["iNamePhrase"]=>
int(1)
["sCountryCode"]=>
bool(false)
["aName"]=>
array(1) {
[8802]=>
string(4) "8802"
}
["aAddress"]=>
array(1) {
[8802]=>
string(4) "8802"
}
["sClass"]=>
string(7) "amenity"
["sType"]=>
string(3) "pub"
["sHouseNumber"]=>
string(0) ""
["fLat"]=>
string(0) ""
["fLon"]=>
string(0) ""
["fRadius"]=>
string(0) ""
}
<table border="1"><tbody><tr><th>rank</th><th>Name Tokens</th><th>Address
Tokens</th><th>country</th><th>class</th><th>type</th><th>house#</th><th>Lat</th><th>Lon</th><th>Radius</th></tr><tr><td>0</td><td>#benfleet#</td><td><br></td><td><br></td><td>amenity</td><td>pub</td><td><br></td><td><br>
</td><td><br></td><td><br></td></tr></tbody></table>string(523)
"select
place_id,ST_Contains(ST_SetSRID(ST_MakeBox2D(ST_Point(-225,83.14),ST_Point(225,-76.06)),4326),
centroid) as
in_small,ST_Contains(ST_SetSRID(ST_MakeBox2D(ST_Point(-675,242.34),ST_Point(675,-235.26)),4326),
centroid) as in_large from search_name where name_vector @>
ARRAY[8802] order by
ST_Contains(ST_SetSRID(ST_MakeBox2D(ST_Point(-225,83.14),ST_Point(225,-76.06)),4326),
centroid) desc,
ST_Contains(ST_SetSRID(ST_MakeBox2D(ST_Point(-675,242.34),ST_Point(675,-235.26)),4326),
centroid) desc, search_rank ASC limit 10"
string(185) "select place_id from placex where place_id in
(19334,22243,22244,453174,738688,738689,772432,698300,1239202,1033786)
and class='amenity' and type='pub' order by rank_search asc limit 10"
string(299) "select l.place_id from placex as l,placex as f where
f.place_id in
(19334,22243,22244,453174,738688,738689,772432,698300,1239202,1033786)
and ST_DWithin(l.geometry, f.geometry, 0.01) and l.class='amenity' and
l.type='pub' order by ST_Distance(l.geometry, f.geometry) asc,
l.rank_search ASC limit 10"
array(10) {
[0]=>
string(5) "38547"
[1]=>
string(6) "120706"
[2]=>
string(5) "92761"
[3]=>
string(5) "92765"
[4]=>
string(5) "41184"
[5]=>
string(5) "37195"
[6]=>
string(5) "34616"
[7]=>
string(5) "34615"
[8]=>
string(5) "92770"
[9]=>
string(5) "41176"
}
string(4) "<hr>"
string(1192) "select
osm_type,osm_id,class,type,rank_search,rank_address,min(place_id) as
place_id,country_code,get_address_by_language(place_id,
ARRAY['name:en-us','name:en','name','ref','type']) as
langaddress,get_name_by_language(name,
ARRAY['name:en-us','name:en','name','ref','type']) as
placename,get_name_by_language(name, ARRAY['ref']) as
ref,avg(ST_X(ST_Centroid(geometry))) as
lon,avg(ST_Y(ST_Centroid(geometry))) as lat from placex where place_id
in (38547,120706,92761,92765,41184,37195,34616,34615,92770,41176) group
by
osm_type,osm_id,class,type,rank_search,rank_address,country_code,get_address_by_language(place_id,
ARRAY['name:en-us','name:en','name','ref','type'])
,get_name_by_language(name,
ARRAY['name:en-us','name:en','name','ref','type'])
,get_name_by_language(name, ARRAY['ref']) order by
rank_search,rank_address,CASE when min(place_id) = 38547 then 0 when
min(place_id) = 120706 then 1 when min(place_id) = 92761 then 2 when
min(place_id) = 92765 then 3 when min(place_id) = 41184 then 4 when
min(place_id) = 37195 then 5 when min(place_id) = 34616 then 6 when
min(place_id) = 34615 then 7 when min(place_id) = 92770 then 8 when
min(place_id) = 41176 then 9 ELSE 10000000 END ASC"
<br><span class="name"><br><br><br><br><br></span>