[OSM-dev] Too many slow queries in db
Dave Hansen
dave at sr71.net
Tue Sep 4 05:57:31 BST 2007
On Tue, 2007-09-04 at 00:28 +0100, Jon Burgess wrote:
> I've also got some evidence that the 758M object size quoted on the
> tiger stats page is wrong, with the true figure being only half that
> size. I'm still downloading more of the data to confirm this.
Well, for goodness sake, please keep them to yourself!
Seriously, though. I wrote this pile in about 20 seconds and never
thought about it again. This generates the "statfiles" and the perl
script below generates the web page. Go to town :)
#!/bin/sh
find counties/ -name '*.osm.gz' -or -name '*.osm' | while read i; do
[ -e "${i/.gz/}.stats" ] && continue;
echo $i;
z=""
if [ "${i/.gz/}" != "$i" ]; then
z="z"
fi
${z}cat "$i" | awk '/^<node/ {nodes++;}
/^<segment/ {segs++;}
/^<way/ {ways++;}
END {print "nodes : ", nodes;
print "segments: ", segs;
print "ways : ", ways;}' \
> "${i/.gz/}.stats" || break
done
#!/usr/bin/perl
# make stat files:
# find counties/ -name '*.osm.gz' | while read i; do [ -e "$i.stats" ] && continue; echo $i; zcat "$i" | awk '/^<node/ {nodes++;} /^<segment/ {segs++;} /^<way/ {ways++;} END {print "nodes : ", nodes; print "segments: ", segs; print "ways : ", ways;}' > "$i.stats"; done
use daveperl;
print "<HTML><PRE>\n";
print `date`;
my @states = ls_nodot('counties');
my $state_dir = 'counties';
sub read_into_hash
{
my $file = shift;
my $separator = shift;
if (!length($separator)) {
$separator = ':';
}
if (! -e $file) {
print "$file does not exist\n";
}
my $hash;
foreach my $line (cat_file_into_array($file)) {
$line =~ s/\s//g;
my ($var, $val) = split($separator, $line);
$hash->{$var} = $val;
}
return $hash;
}
my $in_progress;
my $country;
foreach my $state (@states) {
my $state_stat_file = "$state_dir/$state/.stats";
my $state_stat_hash = read_into_hash($state_stat_file);
my @counties = grep(/\.(osm|gz)$/,ls("$state_dir/$state"));
#printf "nr counties in dir: %d\n", scalar @counties;
#printf "nr counties in stat file: %d\n", $state_stat_hash{counties};
if ((scalar @counties) == $state_stat_hash{counties}) {
#next;
}
$state_stat_hash = undef;
foreach my $county (@counties) {
$county =~ s/.gz$//;
my $county_file = "$state_dir/$state/$county";
my $county_stat_hash = read_into_hash("$county_file.stats");
$county =~ s/.osm$//;
if (!length(keys(%$county_stat_hash))) {
die "bad read of county stat file: $county_file.stats";
}
my $is_completed = 0;
if (-f "$county_file.completed") {
printf "%30s complete\n", "$county, $state";
$is_completed = 1;
$country{"counties-completed"}++;
} else {
if (-f "$county_file.uuid") {
my $log = cat_file("$county_file.log");
my @parts = split(/\r/, $log);
printf "$county, $state being processed: %s\n", $parts[-1];
}
}
foreach my $key (keys %$county_stat_hash) {
$state_stat_hash->{$key} += $county_stat_hash->{$key};
if ($is_completed) {
$state_stat_hash->{"$key-completed"} += $county_stat_hash->{$key};
}
$state_stat_hash->{$key} += $county_stat_hash->{$key};
}
}
unlink $state_stat_file;
foreach my $key (keys %$state_stat_hash) {
my $val = $state_stat_hash->{$key};
$country{$key} += $state_stat_hash->{$key};
file_append_line($state_stat_file, "$key:$val");
}
file_append_line($state_stat_file, "counties:".scalar(@counties));
$country{counties} += scalar(@counties);
}
printf "Entire Country:\n";
printf("%20s %10s/%10s\n", "", "complete", "total");
foreach my $key (sort keys %country) {
next if ($key =~ /-completed/);
my $val = $country{$key};
my $complete = $country{"$key-completed"};
printf("%20s:%10s/%10s (%5.2f%%)\n", $key, $complete, $val, (100.0*$complete/$val));
}
my $start_time = 1187801839-86400*5;
my $now = time();
my $elapsed = $now - $start_time;
my $total_objects = 0;
my $total_completed_objects = 0;
for $type qw(nodes segments ways) {
$total_objects += $country{$type};
$total_completed_objects += $country{"$type-completed"};
}
my $objects_per_sec = $total_completed_objects / $elapsed;
my $required_seconds = $total_objects * $objects_per_sec;
my $required_days = $required_seconds / 86400;
my $required_years = $required_days / 365.25;
printf("at this rate of upload, the entire US will be done in: %d seconds or %d days or %f years\n",
$required_seconds, $required_days, $required_years);
printf("on %s\n", scalar localtime($required_seconds+$now));
print "</PRE></HTML>\n";
-- Dave
More information about the dev
mailing list