File: [local] / openbsd / errata_scraper / errata_scraper.pl (download)
Revision 1.5, Mon Oct 12 00:37:53 2015 UTC (8 years, 8 months ago) by andrew
Branch: MAIN
CVS Tags: HEAD Changes since 1.4: +17 -6 lines
Update again for newer Mojo
Annoyingly ->replace('') no longer returns the element replaced,
instead returns the parent.
Also, account for errata without patches and li's that don't have titles.
Could probably do a better job of selecting just the elements I want above, but filtering after is easy.
|
#!/usr/bin/perl
# $AFresh1: errata_scraper.pl,v 1.5 2015/10/12 00:37:53 andrew Exp $
########################################################################
# Copyright (c) 2011 Andrew Fresh <andrew@afresh1.com>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
########################################################################
use strict;
use warnings;
use Mojo::UserAgent;
my $base_uri = 'http://www.openbsd.org/';
my $ua = Mojo::UserAgent->new;
my $ls = $ua->get( $base_uri . 'errata.html' )->res->dom('a[href^="errata"]');
foreach my $l ( @{$ls}[ -2, -1 ] ) {
print 'Errata for OpenBSD ', $l->text, "\n";
foreach my $e (
reverse @{ $ua->get( $base_uri . $l->attr('href') )->res->dom('li')
} )
{
my $title;
if (my $t = $e->at('strong')) {
$title = $t->all_text;
$t->replace('');
}
else {
next;
}
my $patch;
if (my $p = $e->at('a[href$=".patch.sig"],a[href$=".patch"]')) {
$patch = $p->attr('href');
$p->replace('');
}
my $arch = $e->at('i')->tap(sub { $_[0]->replace('') } )->text;
my $descr = $e->all_text;
$descr =~ s/\s+/ /gs;
$descr =~ s/\s(\.(?:\s|$))/$1/gs;
$descr =~ s/\.+$/./gs;
print 'Title: ', $title, "\n";
print 'Arch: ', $arch, "\n";
print 'Patch: ', $patch, "\n" if $patch;
print 'Descr: ', $descr, "\n";
print "\n";
}
}