Sunday, October 24, 2004

Another scraper

I only read Telsa's diary occasionally; maybe having a feed in liferea would change that.

#!/usr/bin/perl -w

use strict;
use XML::RSS;
use LWP::Simple;
use HTML::Entities;

my $rss = new XML::RSS (version => '1.0');
my $url = "http://www.linux.org.uk/~telsa/Diary/diary.html";
my $page = get($url);

$rss->channel(title       => "The more accurate diary. Really.",
              link        => $url,
              description => "Telsa's diary of life with a hacker:" 
	      		     . " the current ramblings");

foreach (split ('<dt>', $page))
{
	if (/<a\sname="([^"]*)">
		<strong>
		([^>]*)
		<\/strong><\/a><\/dt>\s*<dd>
		(.*)<\/dd>/six)
	{
		$rss->add_item(title       => $2,
			       link        => "$url#$1",
		       	       description => encode_entities($3));
	}
}

print $rss->as_string;

0 Comments:

Post a Comment

<< Home