#!/usr/bin/perl -w # tree2rss: generate RSS from a directory tree # containing HTML files # Copyright 2005 Don Marti . # All Rights Reserved. # tree2rss is free software. You can redistribute it and/or # modify it under the same terms as Perl itself. # Portions copied from HTML::Parser documentation, # # Copyright 1996-2004 Gisle Aas. All rights reserved. # Copyright 1999-2000 Michael A. Chase. All rights reserved. # Needed Perl modules are packaged for Debian as: # liburi-perl libxml-rss-perl libhtml-parser-perl use strict; use XML::RSS; use File::Find; use URI::WithBase; use HTML::Parser; ################# begin items to configure ##################### # Name of this feed my $TITLE = 'Linuxmafia FAQ'; # Where are the files on the filesystem my $FILE_ROOT = '/home/httpd/htdocs/mirrors/linuxmafia-faq'; # base for the URLs generated by this script my $URL_ROOT = 'http://zgp.org/mirrors/linuxmafia-faq/'; # regex that files must match to be included my $FILE_MATCH = '\.html$'; # max items in each generated RSS file my $MAX_ITEMS = 15; ############### end items to configure ########################## my %age; my $count = 0; my $title = ''; my $rss = new XML::RSS; $rss->channel(title => $TITLE); chdir($FILE_ROOT) || die $!; find (\&wanted, '.'); foreach my $item (sort {$age{$a} <=> $age{$b} } keys(%age)) { my $url = URI::WithBase->new($item, $URL_ROOT); $rss->add_item(title => get_title($item), link => $url->abs ); $count++; last if $count >= $MAX_ITEMS; } print $rss->as_string; sub get_title { # from example in perldoc HTML::Parser $title = ''; my $p = HTML::Parser->new(api_version => 3); $p->handler( start => \&start_handler, "tagname,self"); $p->parse_file(shift) || die $!; $title =~ s/\s+$//; return $title; } sub start_handler { # from example in perldoc HTML::Parser return if shift ne "title"; my $self = shift; $self->handler(text => sub { $title .= shift }, "dtext"); $self->handler(end => sub { shift->eof if shift eq "title"; }, "tagname,self"); } sub wanted { if($_ =~ $FILE_MATCH) { $age{$File::Find::name} = int(-M $_); } }