#!/usr/bin/perl # $AFresh1: errata_scraper.pl,v 1.5 2015/10/12 00:37:53 andrew Exp $ ######################################################################## # Copyright (c) 2011 Andrew Fresh # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ######################################################################## use strict; use warnings; use Mojo::UserAgent; my $base_uri = 'http://www.openbsd.org/'; my $ua = Mojo::UserAgent->new; my $ls = $ua->get( $base_uri . 'errata.html' )->res->dom('a[href^="errata"]'); foreach my $l ( @{$ls}[ -2, -1 ] ) { print 'Errata for OpenBSD ', $l->text, "\n"; foreach my $e ( reverse @{ $ua->get( $base_uri . $l->attr('href') )->res->dom('li') } ) { my $title; if (my $t = $e->at('strong')) { $title = $t->all_text; $t->replace(''); } else { next; } my $patch; if (my $p = $e->at('a[href$=".patch.sig"],a[href$=".patch"]')) { $patch = $p->attr('href'); $p->replace(''); } my $arch = $e->at('i')->tap(sub { $_[0]->replace('') } )->text; my $descr = $e->all_text; $descr =~ s/\s+/ /gs; $descr =~ s/\s(\.(?:\s|$))/$1/gs; $descr =~ s/\.+$/./gs; print 'Title: ', $title, "\n"; print 'Arch: ', $arch, "\n"; print 'Patch: ', $patch, "\n" if $patch; print 'Descr: ', $descr, "\n"; print "\n"; } }