version 1.1, 2011/03/21 17:26:58 |
version 1.2, 2011/03/21 17:28:15 |
|
|
#!/usr/bin/perl |
#!/usr/bin/perl |
# $AFresh1$ |
# $AFresh1: errata_scraper.pl,v 1.1 2011/03/21 16:26:58 andrew Exp $ |
######################################################################## |
######################################################################## |
# Copyright (c) 2011 Andrew Fresh <andrew@afresh1.com> |
# Copyright (c) 2011 Andrew Fresh <andrew@afresh1.com> |
# |
# |
|
|
use strict; |
use strict; |
use warnings; |
use warnings; |
|
|
use Mojo::Client; |
use Mojo::UserAgent; |
my $client = Mojo::Client->new; |
use Mojo::ByteStream 'b'; |
|
|
$client->get( |
Mojo::UserAgent->new->get('http://www.openbsd.org/errata48.html') |
'http://www.openbsd.org/errata48.html' => sub { |
->res->dom('li')->each( |
shift->res->dom('li')->each( |
sub { |
sub { |
my $e = shift; |
my $e = shift; |
|
|
my $patch = $e->at('a[href$=".patch"]')->attrs->{href}; |
my $patch = $e->at('a[href$=".patch"]')->attrs->{href}; |
my $title = b( $e->at('strong')->replace('')->all_text )->trim; |
|
my $arch = b( $e->at('i')->replace('')->all_text )->trim; |
my $title = $e->at('strong')->replace('')->all_text; |
my $descr = b( $e->all_text )->trim; |
$title =~ s/\s+/ /gxms; |
$descr =~ s/\s+/ /gs; |
|
|
my $arch = $e->at('i')->replace('')->all_text; |
print 'Title: ', $title, "\n"; |
$arch =~ s/\s+/ /gxms; |
print 'Arch: ', $arch, "\n"; |
|
print 'Patch: ', $patch, "\n"; |
# the li ends at p, but the parser expects a /li |
print 'Descr: ', $descr, "\n"; |
$e->at('p')->replace(''); |
print "\n"; |
|
|
my $descr = $e->all_text; |
|
$descr =~ s/\s+/ /gxms; |
|
$descr =~ s/^\s+|\s+$//gxms; |
|
|
|
print 'Title: ', $title, "\n"; |
|
print 'Arch: ', $arch, "\n"; |
|
print 'Patch: ', $patch, "\n"; |
|
print 'Descr: ', $descr, "\n"; |
|
print "\n"; |
|
} |
|
); |
|
} |
} |
); |
); |
|
|
$client->start; |
|