version 1.2, 2011/03/21 17:28:15 |
version 1.4, 2011/05/29 02:04:30 |
|
|
#!/usr/bin/perl |
#!/usr/bin/perl |
# $AFresh1: errata_scraper.pl,v 1.1 2011/03/21 16:26:58 andrew Exp $ |
# $AFresh1: errata_scraper.pl,v 1.3 2011/03/23 18:46:16 andrew Exp $ |
######################################################################## |
######################################################################## |
# Copyright (c) 2011 Andrew Fresh <andrew@afresh1.com> |
# Copyright (c) 2011 Andrew Fresh <andrew@afresh1.com> |
# |
# |
|
|
use warnings; |
use warnings; |
|
|
use Mojo::UserAgent; |
use Mojo::UserAgent; |
use Mojo::ByteStream 'b'; |
|
|
|
Mojo::UserAgent->new->get('http://www.openbsd.org/errata48.html') |
my $base_uri = 'http://www.openbsd.org/'; |
->res->dom('li')->each( |
|
sub { |
|
my $e = shift; |
|
|
|
my $patch = $e->at('a[href$=".patch"]')->attrs->{href}; |
my $ua = Mojo::UserAgent->new; |
my $title = b( $e->at('strong')->replace('')->all_text )->trim; |
|
my $arch = b( $e->at('i')->replace('')->all_text )->trim; |
my $ls = $ua->get( $base_uri . 'errata.html' )->res->dom('a[href^="errata"]'); |
my $descr = b( $e->all_text )->trim; |
|
|
foreach my $l ( @{$ls}[ -2, -1 ] ) { |
|
print 'Errata for OpenBSD ', $l->text, "\n"; |
|
foreach my $e ( |
|
reverse @{ $ua->get( $base_uri . $l->attrs->{'href'} )->res->dom('li') |
|
} ) |
|
{ |
|
my $patch = $e->at('a[href$=".patch"]')->replace('')->{href}; |
|
my $title = $e->at('strong')->replace('')->all_text; |
|
my $arch = $e->at('i')->replace('')->all_text; |
|
my $descr = $e->all_text; |
$descr =~ s/\s+/ /gs; |
$descr =~ s/\s+/ /gs; |
|
$descr =~ s/\s(\.(?:\s|$))/$1/gs; |
|
$descr =~ s/\.+$/./gs; |
|
|
print 'Title: ', $title, "\n"; |
print 'Title: ', $title, "\n"; |
print 'Arch: ', $arch, "\n"; |
print 'Arch: ', $arch, "\n"; |
|
|
print 'Descr: ', $descr, "\n"; |
print 'Descr: ', $descr, "\n"; |
print "\n"; |
print "\n"; |
} |
} |
); |
} |