#!/usr/bin/perl
# $AFresh1: errata_scraper.pl,v 1.1 2011/03/21 16:26:58 andrew Exp $
########################################################################
# Copyright (c) 2011 Andrew Fresh <andrew@afresh1.com>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
########################################################################
use strict;
use warnings;
use Mojo::Client;
my $client = Mojo::Client->new;
$client->get(
'http://www.openbsd.org/errata48.html' => sub {
shift->res->dom('li')->each(
sub {
my $e = shift;
my $patch = $e->at('a[href$=".patch"]')->attrs->{href};
my $title = $e->at('strong')->replace('')->all_text;
$title =~ s/\s+/ /gxms;
my $arch = $e->at('i')->replace('')->all_text;
$arch =~ s/\s+/ /gxms;
# the li ends at p, but the parser expects a /li
$e->at('p')->replace('');
my $descr = $e->all_text;
$descr =~ s/\s+/ /gxms;
$descr =~ s/^\s+|\s+$//gxms;
print 'Title: ', $title, "\n";
print 'Arch: ', $arch, "\n";
print 'Patch: ', $patch, "\n";
print 'Descr: ', $descr, "\n";
print "\n";
}
);
}
);
$client->start;