#!/usr/bin/perl -w

use strict;
use File::Find;
use File::Spec::Functions;
use File::Touch;
use XML::Parser;
use CGI qw(escape);
use LWP::Simple qw(head mirror);
use threads;

use vars qw/*name/;
*name = *File::Find::name;

my $MAX_FEED_AGE = 2;
my $MAX_MEDIA_AGE = 30;
my $VERBOSE = 1;
my $MAX_PARSER_THREADS = 8;

my $FILE_ROOT = shift;
my $MEDIA_DIR = shift;
my @found_enclosures;
my @fetchers;
my @parsers;

if (!$FILE_ROOT or !$MEDIA_DIR) {
    print STDERR "Usage: $0 [venus cache] [media location]\n";
    exit 1;
}

-d $FILE_ROOT or die "$FILE_ROOT not a directory";
-d $MEDIA_DIR or mkdir($MEDIA_DIR)
  or die "Can't create $MEDIA_DIR: $!";
-w $MEDIA_DIR or die "$MEDIA_DIR not writable";

File::Find::find({wanted => \&feed_wanted}, $FILE_ROOT);
File::Find::find({wanted => \&media_wanted}, $MEDIA_DIR);

while (@parsers) {
    finish_a_parser();
}

# now just the main thread
# and the fetcher threads remain.

while (my $t = shift(@fetchers)) {
    my ($success, $message) = $t->join;
    if ($success) {
        print "$message\n" if $VERBOSE;
    }
    else {
        print STDERR "$message\n";
    }
}

sub handle_feed {
    my $file = shift;
    my $parser = XML::Parser->new();
    $parser->setHandlers(
        Start => \&startTag,
    );
    eval{$parser->parsefile($file)};
    return @found_enclosures;
}

sub mirror_enclosure {
    my $url = shift;
    my $escaped_url;
    if ($url =~ m|^http://(.+)|) {
        $escaped_url = escape($1);
    }
    my $filename = catfile($MEDIA_DIR, $escaped_url);
    if (-e $filename) {
        touch($filename); # can't trust server-side mod times  
        return (1, "$url already downloaded.");
    }
    else {
        my $type = (head($url))[0];
        return (0, "$url skipped: couldn't get content type.")
            if !$type;
        if ($type =~ /^audio\// or $type eq 'application/ogg') {
            mirror($url, $filename) or return (0, "$url failed: $!");
            touch($filename); # can't trust server-side mod times 
            return (1, "$url succeeded.");
        } else {
             return (1, "$url skipped: $type not an audio type.");
        }
    }
}

sub startTag {
    my ($p, $tag, %atts) = @_;
    if ($tag eq 'link' and $atts{'rel'} eq 'enclosure') {
        push (@found_enclosures, $atts{'href'});
    }
}

sub feed_wanted {
    if (-f $name) {
        if (-M $name < $MAX_FEED_AGE) {
            print "Parsing $name\n" if $VERBOSE;
            push (@parsers, threads->new(\&handle_feed, $name));
            if (@parsers >= $MAX_PARSER_THREADS) {
                finish_a_parser();
            }
        } else {
            print "Skipping old file $name\n" if $VERBOSE;
        }
    }
}

sub finish_a_parser {
    my $t = shift(@parsers);
    foreach my $enclosure ($t->join) {
        push (@fetchers,
              threads->new(\&mirror_enclosure, $enclosure));
    }
}


sub media_wanted {
    if (-f $File::Find::name and -M $File::Find::name > $MAX_MEDIA_AGE) {
        print "Removing old media file $name\n" if $VERBOSE;
        unlink($File::Find::name);
    }
}
