#!/usr/bin/perl -w

# tree2rss: generate RSS from a directory tree 
#           containing HTML files

# Copyright 2005 Don Marti <dmarti@zgp.org>.
# All Rights Reserved.

# tree2rss is free software. You can redistribute it and/or 
# modify it under the same terms as Perl itself.

# Portions copied from HTML::Parser documentation,
#
#    Copyright 1996-2004 Gisle Aas. All rights reserved.
#    Copyright 1999-2000 Michael A. Chase.  All rights reserved.

# Needed Perl modules are packaged for Debian as:
# liburi-perl libxml-rss-perl libhtml-parser-perl

use strict;
use XML::RSS;
use File::Find;
use URI::WithBase;
use HTML::Parser;

################# begin items to configure #####################

# Name of this feed
my $TITLE = 'Linuxmafia FAQ';

# Where are the files on the filesystem
my $FILE_ROOT = '/home/httpd/htdocs/mirrors/linuxmafia-faq';

# base for the URLs generated by this script
my $URL_ROOT = 'http://zgp.org/mirrors/linuxmafia-faq/';

# regex that files must match to be included
my $FILE_MATCH = '\.html$';

# max items in each generated RSS file
my $MAX_ITEMS = 15;

############### end items to configure ##########################

my %age;
my $count = 0;
my $title = '';

my $rss = new XML::RSS;
$rss->channel(title => $TITLE);

chdir($FILE_ROOT) || die $!;
find (\&wanted, '.');

foreach my $item (sort {$age{$a} <=> $age{$b} } keys(%age)) {
    my $url = URI::WithBase->new($item, $URL_ROOT);
    $rss->add_item(title => get_title($item),
                   link => $url->abs
                  );
    $count++;
    last if $count >= $MAX_ITEMS;
}

print $rss->as_string;

sub get_title {
    # from example in perldoc HTML::Parser
    $title = '';
    my $p = HTML::Parser->new(api_version => 3);
    $p->handler( start => \&start_handler, "tagname,self");
    $p->parse_file(shift) || die $!;
    $title =~ s/\s+$//;
    return $title;
}

sub start_handler {
    # from example in perldoc HTML::Parser
    return if shift ne "title";
    my $self = shift;
    $self->handler(text => sub { $title .= shift }, "dtext");
    $self->handler(end  => sub { shift->eof if shift eq "title"; },
                   "tagname,self");
}

sub wanted {
    if($_ =~ $FILE_MATCH) {
        $age{$File::Find::name} = int(-M $_);
    }
}
