[OSM-dev] TIGER import.rb

Dave Hansen dave at sr71.net
Sat Jun 16 19:56:57 BST 2007


Hi Brandon,

Here's a diff which takes your code and makes it use the class files
that I posted earlier.  It doesn't need the intermediate DB like the
stuff in svn, but it does do the coalescing of adjacent ways into each
other.

It produces what look to me to be pretty usable .osm files.  They pass
the validator checks, and look pretty good.  I think the main question
to address is how to do the addressing.

Generally, do you agree that we need to maximize the number of segments
that we have in each way?  If we hack the address information into the
nodes like we were discussing earlier, do you think we can morph it over
into any new data formats that come along?

Also, are there any tags we should be adding to the nodes so that we can
reconcile them against TIGER data in the future?  I was thinking that it
might be nice for diff'ing purposes in the future to be able to keep
track of which nodes in OSM came from which points in the TIGER data.
That way, if we want to import the 2007 data down the line, we can just
figure out which nodes we're missing from OSM.

diff -ruN orig/dave_model.rb dave-model/dave_model.rb
--- orig/dave_model.rb	1969-12-31 16:00:00.000000000 -0800
+++ dave-model/dave_model.rb	2007-06-16 11:48:15.000000000 -0700
@@ -0,0 +1,232 @@
+module OSMModel
+require 'fileutils'
+require 'find'
+require 'mysql'
+require 'net/http'
+require 'timeout'
+require 'uri'
+
+def debug(string)
+	#$stderr.puts string
+end
+
+def hash_to_tags(h)
+	s = ''
+	h.each_pair do |key, value|
+		next if value.nil?
+		next if value.to_s.length == 0
+		value.gsub(/\'/, "\\'")
+		value.gsub(/\&/, "amp")
+		s += "<tag k=\"#{key}\" v=\"#{value}\"/>\n"
+	end
+	s
+end
+module_function :hash_to_tags
+
+class Point
+	attr_accessor :osmid
+        def initialize point, tags={} #expect long, lat point
+	      @osmid = 0
+	      @visible="true"
+	      @lat = point[1]
+	      @lon = point[0]
+	      @tags = tags
+	end
+	def distance(lat, lon)
+		latdiff = (lat - @lat).abs
+		londiff = (lon - @lon).abs
+		Math.sqrt(latdiff*latdiff + londiff*londiff)
+	end
+	def to_xml
+		return "" if @printed
+		@printed = true
+		return "  <node id=\'#{@osmid}\' visible=\'true\' lat=\'#{@lat}\' lon=\'#{@lon}\'>\n" +
+			OSMModel.hash_to_tags(@tags) + "</node>\n"
+	end
+	def id
+		@osmid
+	end
+	def to_s
+		"lat: #{@lat} lon: #{@lon} id: #{@id}"
+	end
+end
+ 
+# Create a new object
+# g = Greeter.new("world")
+#  
+#  # Output "Hello World!"
+#  g.salute
+
+class Segment
+	attr :to
+	attr :from
+	attr :tags
+	attr_accessor :osmid
+	def initialize(from, to, tags = {})
+		@from = from
+		@to = to
+		@tags = tags
+		@printed = false
+		#debug("created segment: " + self.to_s)
+  	end
+	def comes_before(seg)
+		if self.to == seg.from
+			return true
+		end
+		return false
+	end
+	def shares_origin(seg)
+		return self.from == seg.from
+	end
+	def shares_terminus(seg)
+		return self.to == seg.to
+	end
+	def reverse
+		tmp = @from
+		@from = @to
+		@to = tmp
+	end
+	def to_xml
+		return "" if @printed
+		@printed = true
+		return from.to_xml() +
+		       to.to_xml() + 
+		"<segment from=\'#{from.id}\' to=\'#{to.id}\' id=\'#{self.id}\' visible=\'true\'>\n" +
+		OSMModel.hash_to_tags(@tags) +
+		"</segment>\n"	
+	end
+	def id
+		@osmid
+	end
+	def to_s
+		"segment:\{id:#{@osmid} from:#{@from.id} to:#{@to.id}\}"
+	end
+end
+ 
+def create_segment(from_point, to_point, tags = {})
+	return Segment.new(from_point, to_point, tags)
+end
+
+
+class Way
+	attr :segs
+	attr_accessor :osmid
+	def initialize(segs, tags = {})
+		@segs = []
+		segs.each do |seg|
+			self.add_segment(seg)
+		end
+		@tags = tags
+		self.verify
+  	end
+	def reverse
+		@segs.each do |seg|
+			seg.reverse
+		end
+		@segs.reverse!
+		self.verify
+	end
+	def name
+		return @tags["name"]
+	end
+	def __add_segment(newseg)
+		if @segs.empty?
+			#debug("added " + newseg.to_s  + " to empty way #{@osmid}")
+			@segs.push(newseg)
+			self.verify
+			return true
+		end
+		if newseg.comes_before(@segs.first)
+			#debug("added "+newseg.to_s+" to head of way #{@osmid} before " + segs.first.to_s)
+			@segs.unshift(newseg)
+			self.verify
+			return true
+		end
+		if @segs.last.comes_before(newseg)
+			#debug("added "+newseg.to_s+" to tail of way #{@osmid} after " + segs.last.to_s)
+			@segs.push(newseg)
+			self.verify
+			return true
+		end
+		self.verify
+		return false
+	end
+	def add_segment(newseg)
+		if __add_segment(newseg)
+			return true
+		end
+		# try the segment in the opposite direction
+		newseg = Segment.new(newseg.to, newseg.from, newseg.tags)
+		return __add_segment(newseg)
+	end
+	def combine_with(way)
+		# check tag compatibility here
+		if way.segs.empty?
+			$stderr.puts "no segments in way: " + way.to_s
+			return false
+		end
+		if @segs.empty?
+			$stderr.puts "no segments in self: " + self.to_s
+			return false
+		end
+		if way.segs.first.shares_origin(@segs.first) == true
+			#debug "origin going to reverse way: " + way.to_s + "\n to fit with way: " + self.to_s
+			way.reverse
+			#debug "reversed way: " + way.to_s
+		end
+		if way.segs.last.shares_terminus(@segs.last) == true
+			#debug "terminus going to reverse way: " + way.to_s + "\n to fit with way: " + self.to_s
+			way.reverse
+			#debug "reversed way: " + way.to_s
+		end
+			
+		combined = false
+		if way.segs.last.comes_before(@segs.first)
+			@segs = way.segs | @segs
+			combined = true
+		elsif @segs.last.comes_before(way.segs.first)
+			@segs.concat(way.segs)
+			combined = true
+		else
+			way.verify
+		end
+		# if the passed in way is empty, we've succeeded
+		self.verify
+		return combined
+	end
+	def to_xml
+		@segs.map { |seg| seg.to_xml }.join +
+		"<way id=\'#{@osmid}' visible=\'true\'>\n" +
+		@segs.map { |seg| "<seg id=\'#{seg.id}\'/>\n" }.join +
+		OSMModel.hash_to_tags(@tags) +
+		"</way>"
+	end
+	def name
+		return @tags["name"]
+	end
+	def id
+		@osmid
+	end
+	def verify
+		return
+		last_seg = nil
+		@segs.each do |seg|
+			if !last_seg.nil?
+				if last_seg.to != seg.from
+					$stderr.puts "invalid way: " + self.to_s
+					exit(1)
+				end
+			end
+			last_seg = seg
+		end
+	end
+	def to_s
+		ret = "way:\{id:#{@osmid} segments: ["
+		@segs.each do |seg|
+			ret += "\n   " + seg.to_s + ","
+		end
+		ret += "]\}"
+	end
+end
+
+end
diff -ruN orig/tiger.rb dave-model/tiger.rb
--- orig/tiger.rb	2007-06-16 07:29:23.000000000 -0700
+++ dave-model/tiger.rb	2007-06-16 11:48:15.000000000 -0700
@@ -120,7 +120,7 @@
       fp = File.new filename, "r"
       i=0
       fp.each_line do |line|
-        if i%5000 == 0 then print sprintf("%.1f", (Float(fp.pos)/fp_size)*100 ) + "%\n" end
+        if i%((fp_size/100).to_i)== 0 then print sprintf("%.1f", (Float(fp.pos)/fp_size)*100 ) + "%\r" end
 
         record = record_class.new( line )
         if key_field then
@@ -131,6 +131,7 @@
 
         i += 1
       end
+      print "\n"
       fp.close
 
       @records
@@ -206,6 +207,10 @@
     attr_reader :filename_base, :features
 
     def initialize directory
+      if FileTest.directory?(directory) == false
+	$stderr.puts directory + " is not a directory"
+	exit 1;
+      end
       @features = nil
       @filename_base = Dir["#{directory}/*.RT1"].first
       if @filename_base then
diff -ruN orig/to_osm.rb dave-model/to_osm.rb
--- orig/to_osm.rb	2007-06-16 07:29:23.000000000 -0700
+++ dave-model/to_osm.rb	2007-06-16 11:48:15.000000000 -0700
@@ -3,11 +3,74 @@
 require 'tiger'
 require 'attr_map'
 require 'cgi' #to escape srings
-require 'osm_model'
+require 'dave_model'
 
 module TigerLine
 
-  class Dataset
+class Dataset
+def new_point(lat, lon, tags = {})
+	point = nil
+	key = lat+lon
+	loop = 0;
+	while $points.has_key?(key)
+		point = $points[key]
+		return point if point.distance(lat,lon) < 0.0000001
+		key += lat+lon
+		loop+=1
+		$stderr.puts "loop nr: #{loop}" if loop > 10
+	end
+	point = Point.new(lat, lon, tags)
+	while $points.has_key?(key)
+		key += lat+lon
+	end
+	$points[key] = point
+	point
+end
+
+    def create_way(segs, tags = {})
+	if @ways.nil?
+		@ways = Hash.new
+	end
+    	name = tags["name"]
+    	new_way = OSMModel::Way.new(segs, tags)
+    	#debug("processing possible new way with name: \"" + name + "\" nr_segments: " + segs.length.to_s)
+    	#debug(new_way.to_s)
+    	merged = false
+    	if @ways.has_key?(name)
+    		@ways[name].each do |way|
+    			#debug("found another way with name: \"" + name + "\": " + way.to_s)
+    			# need to double-check equal tags here
+    			if way.combine_with(new_way)
+    				if merged == false 
+    					# we actually merged the truly
+    					# new way into an existing one
+    					merged = true
+    				else
+    					# the new way connected two older ways,
+    					# we need to delete the way that 
+    					# was consolidated
+    					@ways[name].delete(new_way)
+    				end
+    				new_way = way
+    			end
+    		end
+    	end
+    
+    	# we did not consolidate this way into another, make sure
+    	# to get it into the hash as a new entry
+    	if merged == false
+    		#debug("creating brand new way "+new_way.to_s+"with name: \"" + name + "\" nr_segments: " + segs.length.to_s)
+    		if @ways[name].nil?
+    			#debug("creating array for \"" + name + "\"")
+    			@ways[name] = []
+    		end
+    		@ways[name].push(new_way)
+    	else
+    		#debug("way \""+ name + "\" was consolidated, not adding hash entry " + new_way.to_s)
+    	end
+    	#debug("number of ways with name \""+ name + "\": #{@ways[name].length}")
+    end
+    
 
     def to_osm
       ret = []
@@ -25,75 +88,76 @@
         end
         osmpoints << (tzid_to_osm[feature.tzide] ||= OSMModel::Point.new( feature.points.last, {"tzid" => feature.tzide} ))
 
-        way = OSMModel::Way.new
-
+	way_segs = []
         osmpoints[0..-2].each_with_index do |point, i|
-          way.segs << OSMModel::Segment.new( point, osmpoints[i+1] )
+          way_segs << OSMModel::Segment.new( point, osmpoints[i+1] )
         end
 
+	way_tags = Hash.new
         #name tags
         feature.names.each_with_index do |name, i|
           strname = "#{name[:fedirp]} #{name[:fename]} #{name[:fetype]} #{name[:fedirs]}".strip
           ord_suffix = ("_#{i}" if i>0) or ""
-          way.tags["name#{ord_suffix}"] = "#{CGI.escapeHTML(strname)}"
-          way.tags["name_direction_prefix#{ord_suffix}"] = "#{CGI.escapeHTML(name[:fedirp])}" if not name[:fedirp].empty?
-          way.tags["name_base#{ord_suffix}"] = "#{CGI.escapeHTML(name[:fename])}" if not name[:fename].empty?
-          way.tags["name_type#{ord_suffix}"] = "#{CGI.escapeHTML(name[:fetype])}" if not name[:fetype].empty?
-          way.tags["name_direction_suffix#{ord_suffix}"] = "#{CGI.escapeHTML(name[:fedirs])}" if not name[:fedirs].empty?
+          way_tags["name#{ord_suffix}"] = "#{CGI.escapeHTML(strname)}"
+          way_tags["name_direction_prefix#{ord_suffix}"] = "#{CGI.escapeHTML(name[:fedirp])}" if not name[:fedirp].empty?
+          way_tags["name_base#{ord_suffix}"] = "#{CGI.escapeHTML(name[:fename])}" if not name[:fename].empty?
+          way_tags["name_type#{ord_suffix}"] = "#{CGI.escapeHTML(name[:fetype])}" if not name[:fetype].empty?
+          way_tags["name_direction_suffix#{ord_suffix}"] = "#{CGI.escapeHTML(name[:fedirs])}" if not name[:fedirs].empty?
         end
         #address range tags
         feature.address_ranges.each_with_index do |range, i|
           ord_suffix = ("_#{i}" if i>0) or ""
-          way.tags["from_address_right#{ord_suffix}"] = "#{range[:fraddr]}" if not range[:fraddr].empty?
-          way.tags["to_address_right#{ord_suffix}"] = "#{range[:toaddr]}" if not range[:toaddr].empty?
-          way.tags["from_address_left#{ord_suffix}"] ="#{range[:fraddl]}" if not range[:fraddl].empty?
-          way.tags["to_address_left#{ord_suffix}"] = "#{range[:toaddl]}" if not range[:toaddl].empty?
-          way.tags["zip_left#{ord_suffix}"] = "#{range[:zipl]}" if not range[:zipl].empty?
-          way.tags["zip_right#{ord_suffix}"] = "#{range[:zipr]}" if not range[:zipr].empty?
+          way_tags["from_address_right#{ord_suffix}"] = "#{range[:fraddr]}" if not range[:fraddr].empty?
+          way_tags["to_address_right#{ord_suffix}"] = "#{range[:toaddr]}" if not range[:toaddr].empty?
+          way_tags["from_address_left#{ord_suffix}"] ="#{range[:fraddl]}" if not range[:fraddl].empty?
+          way_tags["to_address_left#{ord_suffix}"] = "#{range[:toaddl]}" if not range[:toaddl].empty?
+          way_tags["zip_left#{ord_suffix}"] = "#{range[:zipl]}" if not range[:zipl].empty?
+          way_tags["zip_right#{ord_suffix}"] = "#{range[:zipr]}" if not range[:zipr].empty?
         end
         #preserved tiger fields
-        way.tags["tiger:tlid"] = "#{@tlid}"
-        way.tags["tiger:cfcc"] = "#{@cfcc}"
+        way_tags["tiger:tlid"] = "#{@tlid}"
+        way_tags["tiger:cfcc"] = "#{@cfcc}"
         #misc tags
-        way.tags["source"] = "tiger_import_#{Time.now.strftime("%Y%m%d")}"
-        way.tags["reviewed"] = "no"
+        way_tags["source"] = "tiger_import_#{Time.now.strftime("%Y%m%d")}"
+        way_tags["reviewed"] = "no"
         #cfcc tags
         tags.each_pair do |key, value|
-          way.tags["#{CGI.escapeHTML(key.to_s)}"] = "#{CGI.escapeHTML(value.to_s)}"
+          way_tags["#{CGI.escapeHTML(key.to_s)}"] = "#{CGI.escapeHTML(value.to_s)}"
         end
-
-        ret << way
+	way = create_way(way_segs, way_tags)
       end
 
       return ret
     end
 
     def to_osm_xml
-      ways = self.to_osm
       ret = []
-      
-      osmid = 0
-
-      ways.each do |way|
-        way.segs.each do |seg|
-          if seg.from.osmid==0 then 
-            seg.from.osmid = (osmid+=1)
-            ret << seg.from.to_xml
-          end
-          if seg.to.osmid==0 then 
-            seg.to.osmid = (osmid+=1)
-            ret << seg.to.to_xml
-          end
-          seg.osmid = (osmid+=1)
-          ret << seg.to_xml
-        end
-        way.osmid = (osmid+=1)
-        ret << way.to_xml
+      self.to_osm
+      osmid = -1
+      ret << "<?xml version='1.0' encoding='UTF-8'?>\n"
+      ret << "<osm version='0.4' generator='JOSM'>\n"
+
+      @ways.each_key do |name|
+      	@ways[name].each do |way|
+        	way.segs.each do |seg|
+			if seg.from.osmid==0 then 
+				seg.from.osmid = (osmid-=1)
+				ret << seg.from.to_xml
+			end
+			if seg.to.osmid==0 then 
+				seg.to.osmid = (osmid-=1)
+				ret << seg.to.to_xml
+			end
+			seg.osmid = (osmid-=1)
+			ret << seg.to_xml
+		end
+		way.osmid = (osmid-=1)
+		ret << way.to_xml
+	end
       end
-
+      ret << "</osm>"
       return ret.join("\n")
     end
-
-  end
+end
 
 end


-- Dave





More information about the dev mailing list