version 1.1, 2011/03/21 17:26:58 |
version 1.4, 2011/05/29 02:04:30 |
|
|
#!/usr/bin/perl |
#!/usr/bin/perl |
# $AFresh1$ |
# $AFresh1: errata_scraper.pl,v 1.3 2011/03/23 18:46:16 andrew Exp $ |
######################################################################## |
######################################################################## |
# Copyright (c) 2011 Andrew Fresh <andrew@afresh1.com> |
# Copyright (c) 2011 Andrew Fresh <andrew@afresh1.com> |
# |
# |
|
|
use strict; |
use strict; |
use warnings; |
use warnings; |
|
|
use Mojo::Client; |
use Mojo::UserAgent; |
my $client = Mojo::Client->new; |
|
|
my $base_uri = 'http://www.openbsd.org/'; |
$client->get( |
|
'http://www.openbsd.org/errata48.html' => sub { |
my $ua = Mojo::UserAgent->new; |
shift->res->dom('li')->each( |
|
sub { |
my $ls = $ua->get( $base_uri . 'errata.html' )->res->dom('a[href^="errata"]'); |
my $e = shift; |
|
|
foreach my $l ( @{$ls}[ -2, -1 ] ) { |
my $patch = $e->at('a[href$=".patch"]')->attrs->{href}; |
print 'Errata for OpenBSD ', $l->text, "\n"; |
|
foreach my $e ( |
my $title = $e->at('strong')->replace('')->all_text; |
reverse @{ $ua->get( $base_uri . $l->attrs->{'href'} )->res->dom('li') |
$title =~ s/\s+/ /gxms; |
} ) |
|
{ |
my $arch = $e->at('i')->replace('')->all_text; |
my $patch = $e->at('a[href$=".patch"]')->replace('')->{href}; |
$arch =~ s/\s+/ /gxms; |
my $title = $e->at('strong')->replace('')->all_text; |
|
my $arch = $e->at('i')->replace('')->all_text; |
# the li ends at p, but the parser expects a /li |
my $descr = $e->all_text; |
$e->at('p')->replace(''); |
$descr =~ s/\s+/ /gs; |
|
$descr =~ s/\s(\.(?:\s|$))/$1/gs; |
my $descr = $e->all_text; |
$descr =~ s/\.+$/./gs; |
$descr =~ s/\s+/ /gxms; |
|
$descr =~ s/^\s+|\s+$//gxms; |
print 'Title: ', $title, "\n"; |
|
print 'Arch: ', $arch, "\n"; |
print 'Title: ', $title, "\n"; |
print 'Patch: ', $patch, "\n"; |
print 'Arch: ', $arch, "\n"; |
print 'Descr: ', $descr, "\n"; |
print 'Patch: ', $patch, "\n"; |
print "\n"; |
print 'Descr: ', $descr, "\n"; |
|
print "\n"; |
|
} |
|
); |
|
} |
} |
); |
} |
|
|
$client->start; |
|