[BACK]Return to errata_scraper.pl CVS log [TXT][DIR] Up to [local] / openbsd / errata_scraper

File: [local] / openbsd / errata_scraper / errata_scraper.pl (download)

Revision 1.4, Sun May 29 01:04:30 2011 UTC (13 years, 1 month ago) by andrew
Branch: MAIN
Changes since 1.3: +7 -6 lines

Gets a little simpler with Mojo::Dom updates.

#!/usr/bin/perl
# $AFresh1: errata_scraper.pl,v 1.4 2011/05/29 01:04:30 andrew Exp $
########################################################################
# Copyright (c) 2011 Andrew Fresh <andrew@afresh1.com>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
########################################################################
use strict;
use warnings;

use Mojo::UserAgent;

my $base_uri = 'http://www.openbsd.org/';

my $ua = Mojo::UserAgent->new;

my $ls = $ua->get( $base_uri . 'errata.html' )->res->dom('a[href^="errata"]');

foreach my $l ( @{$ls}[ -2, -1 ] ) {
    print 'Errata for OpenBSD ', $l->text, "\n";
    foreach my $e (
        reverse @{ $ua->get( $base_uri . $l->attrs->{'href'} )->res->dom('li')
        } )
    {
        my $patch = $e->at('a[href$=".patch"]')->replace('')->{href};
        my $title = $e->at('strong')->replace('')->all_text;
        my $arch  = $e->at('i')->replace('')->all_text;
        my $descr = $e->all_text;
        $descr =~ s/\s+/ /gs;
        $descr =~ s/\s(\.(?:\s|$))/$1/gs;
        $descr =~ s/\.+$/./gs;

        print 'Title: ', $title, "\n";
        print 'Arch:  ', $arch,  "\n";
        print 'Patch: ', $patch, "\n";
        print 'Descr: ', $descr, "\n";
        print "\n";
    }
}