[OSM-dev] TIGER upload automation
Dave Hansen
dave at sr71.net
Wed Aug 29 18:40:38 BST 2007
On Wed, 2007-08-29 at 19:14 +0200, Martijn van Oosterhout wrote:
> On 8/29/07, Dave Hansen <dave at sr71.net> wrote:
> > I'm using a hacked up version of bulk_upload.pl, along with a ton of
> > previously-generated TIGER files. I've written scripts to automate the
> > upload.
>
> As author of bulk_upload.pl I'd be very interested in what hacks you've made :)
I added a return value from the OsmChangeReader->process() function so
that we can differentiate between uploaded or not objects.
It adds a -t option to accept additional tags to add to the osm file.
This is so I can add something like '--tag tiger:county=Sherman, OR' or
'--tag tiger:upload_uuid:yyyyyyyyy'. Just in case an upload goes
bonkers, I can go find the objects from just _that_ upload.
I also tried to change some of the time estimates so that they only
track the time we spend uploading. This is still really hackish.
I'd be happy to seprate out and fix some of these up if you'd be
interested in including them in the main version.
Index: perl_lib/Geo/OSM/OsmChangeReader.pm
===================================================================
--- perl_lib/Geo/OSM/OsmChangeReader.pm (revision 4339)
+++ perl_lib/Geo/OSM/OsmChangeReader.pm (working copy)
@@ -52,6 +52,7 @@
my $fh = data_open($file_name);
$self->{fh} = $fh;
$self->{count}=0;
+ $self->{processed_count}=0;
die "Cannot open OSM File $file_name\n" unless $fh;
eval {
$P->parse($fh);
@@ -140,26 +141,30 @@
{
if( $Name eq $self->{entity} )
{
+ my $ret = 0;
if( $self->{filetype} == FILETYPE_OSMCHANGE )
{
- $self->{proc}->( $self->{command}, $self->{entity}, $self->{attr}, $self->{tags}, $self->{segs} );
+ $ret = $self->{proc}->( $self->{command}, $self->{entity}, $self->{attr}, $self->{tags}, $self->{segs} );
}
else # FILETYPE_OSM
{
# Only entities with a modify tag are interesting, or if they have a negative ID (that's create)
if( exists $self->{attr}->{action} )
{
- $self->{proc}->( $self->{attr}->{action}, $self->{entity}, $self->{attr}, $self->{tags}, $self->{segs} );
+ $ret = $self->{proc}->( $self->{attr}->{action}, $self->{entity}, $self->{attr}, $self->{tags}, $self->{segs} );
}
elsif( $self->{attr}{id} < 0 )
{
- $self->{proc}->( "create", $self->{entity}, $self->{attr}, $self->{tags}, $self->{segs} );
+ $ret = $self->{proc}->( "create", $self->{entity}, $self->{attr}, $self->{tags}, $self->{segs} );
}
}
$self->{count}++;
- if( $self->{progress} and ($self->{count}%11) == 1)
+ if ($ret) {
+ $self->{processed_count}++;
+ }
+ if( $self->{progress} )
{
- $self->{progress}->($self->{count}, tell($self->{fh})/(-s $self->{fh}) );
+ $self->{progress}->($self->{count}, tell($self->{fh})/(-s $self->{fh}), $self->{processed_count});
}
$self->{state} = STATE_EXPECT_ENTITY;
}
Index: import/bulk_import/bulk_upload.pl
===================================================================
--- import/bulk_import/bulk_upload.pl (revision 4339)
+++ import/bulk_import/bulk_upload.pl (working copy)
@@ -7,6 +7,7 @@
BEGIN {
unshift @INC, "../../perl_lib";
+ unshift @INC, $ENV{HOME}."/projects/osm/svn.openstreetmap.org/applications/utils/perl_lib";
}
use Geo::OSM::OsmChangeReader;
@@ -17,6 +18,7 @@
my $force = 0;
my $dry_run = 0;
my $loop = 0;
+my %additional_tags;
Getopt::Long::Configure('no_ignore_case');
GetOptions (
@@ -29,12 +31,13 @@
'f|force+' => \$force,
'n|dry-run' => \$dry_run,
'l|loop' => \$loop,
+ 't|tags=s%' => \%additional_tags,
) or pod2usage(1);
pod2usage(1) if $help;
#$api ||= "http://openstreetmap.gryph.de/api/0.5/";
-$api ||= "http://www.openstreetmap.org/api/0.4";
+$api ||= "";#http://www.openstreetmap.org/api/0.4";
if( not $dry_run and (not defined $username or not defined $password) )
{
@@ -44,6 +47,7 @@
my $OSM = new Geo::OSM::OsmChangeReader(\&process,\&progress);
my $start_time = time();
+my $started_processing_per = 0;
my $last_time = 0;
my $uploader = new Geo::OSM::APIClient( api => $api, username => $username, password => $password )
unless $dry_run;
@@ -59,10 +63,14 @@
} while($loop and $did_something);
use Data::Dumper;
+my $resolved_in_cache = 0;
sub process
{
my($command, $entity, $attr, $tags, $segs) = @_;
-
+
+ foreach my $key (keys %additional_tags) {
+ push @{$tags}, $key, $additional_tags{$key};;
+ }
my $ent;
if( $entity eq "node" )
{
@@ -79,7 +87,10 @@
# print Dumper($ent);
# print $ent->xml;
return if $dry_run;
- return if resolve_ids( $ent, $command );
+ if (resolve_ids( $ent, $command )) {
+ $resolved_in_cache++;
+ return 0;
+ }
my $id;
if( $command eq "create" )
@@ -99,14 +110,14 @@
{
print "Error: ".$uploader->last_error_code()." ".$uploader->last_error_message." ($command ".$ent->type()." ".$ent->id().")\n";
# Unless force is on, exit on any error
- exit if $force == 0;
+ exit(1) if $force == 0;
# For force==1, exit on Bad Request, Unauthorized or Interal Server Error
# These shouldn't happen, but tend to keep happening when they do
# Note: when you can't connect to server you get error 500
if( $force == 1 )
{
my $code = $uploader->last_error_code();
- exit if $code == 401 or $code == 400 or $code == 500;
+ exit(2) if $code == 401 or $code == 400 or $code == 500;
}
# Force==2 is keep on going, no matter what
}
@@ -115,7 +126,7 @@
mark_done( $ent, $command, $id );
$did_something = 1;
}
-
+ return 1;
# my $id = $uploader->upload($ent);
}
@@ -123,13 +134,25 @@
{
my $count = shift;
my $perc = shift;
+ my $processed_count = shift;
+ my $already_processed = $count - $processed_count;
my $time = time();
# print "$time == $last_time or $last_time == $start_time\n";
- return if $time == $last_time or $time == $start_time;
+# return if $time == $last_time or $time == $start_time;
- $last_time = $time;
- my $remain = (1-$perc)*($last_time - $start_time)/$perc;
- printf STDERR "%10d %7.2f%% %3d:%02d:%02d \r", $count, $perc*100, int($remain)/3600, int($remain/60)%60, int($remain)%60;
+ $last_time = $time;
+ printf STDERR "previously processed: %10d uploaded: %10d, %7.2f%% of file processed",
+ $already_processed, $count-$already_processed, $perc*100;
+ my $perc_of_file_processed = $perc - $started_processing_per;
+ if ($perc_of_file_processed) {
+ my $remain = (1-$perc_of_file_processed)*($last_time - $start_time)/$perc_of_file_processed;
+ printf STDERR " %3d:%02d:%02d remaining", int($remain)/3600, int($remain/60)%60, int($remain)%60;
+ }
+ printf STDERR "\r";
+ if (!$processed_count) {
+ $started_processing_per = $perc;
+ $start_time = $time;
+ }
}
my %resolved;
-- Dave
More information about the dev
mailing list