#!/usr/bin/perl -w use strict; use File::Find; use File::Spec::Functions; use File::Touch; use XML::Parser; use CGI qw(escape); use LWP::Simple qw(head mirror); use threads; use vars qw/*name/; *name = *File::Find::name; my $MAX_FEED_AGE = 2; my $MAX_MEDIA_AGE = 30; my $VERBOSE = 1; my $MAX_PARSER_THREADS = 8; my $FILE_ROOT = shift; my $MEDIA_DIR = shift; my @found_enclosures; my @fetchers; my @parsers; if (!$FILE_ROOT or !$MEDIA_DIR) { print STDERR "Usage: $0 [venus cache] [media location]\n"; exit 1; } -d $FILE_ROOT or die "$FILE_ROOT not a directory"; -d $MEDIA_DIR or mkdir($MEDIA_DIR) or die "Can't create $MEDIA_DIR: $!"; -w $MEDIA_DIR or die "$MEDIA_DIR not writable"; File::Find::find({wanted => \&feed_wanted}, $FILE_ROOT); File::Find::find({wanted => \&media_wanted}, $MEDIA_DIR); while (@parsers) { finish_a_parser(); } # now just the main thread # and the fetcher threads remain. while (my $t = shift(@fetchers)) { my ($success, $message) = $t->join; if ($success) { print "$message\n" if $VERBOSE; } else { print STDERR "$message\n"; } } sub handle_feed { my $file = shift; my $parser = XML::Parser->new(); $parser->setHandlers( Start => \&startTag, ); eval{$parser->parsefile($file)}; return @found_enclosures; } sub mirror_enclosure { my $url = shift; my $escaped_url; if ($url =~ m|^http://(.+)|) { $escaped_url = escape($1); } my $filename = catfile($MEDIA_DIR, $escaped_url); if (-e $filename) { touch($filename); # can't trust server-side mod times return (1, "$url already downloaded."); } else { my $type = (head($url))[0]; return (0, "$url skipped: couldn't get content type.") if !$type; if ($type =~ /^audio\// or $type eq 'application/ogg') { mirror($url, $filename) or return (0, "$url failed: $!"); touch($filename); # can't trust server-side mod times return (1, "$url succeeded."); } else { return (1, "$url skipped: $type not an audio type."); } } } sub startTag { my ($p, $tag, %atts) = @_; if ($tag eq 'link' and $atts{'rel'} eq 'enclosure') { push (@found_enclosures, $atts{'href'}); } } sub feed_wanted { if (-f $name) { if (-M $name < $MAX_FEED_AGE) { print "Parsing $name\n" if $VERBOSE; push (@parsers, threads->new(\&handle_feed, $name)); if (@parsers >= $MAX_PARSER_THREADS) { finish_a_parser(); } } else { print "Skipping old file $name\n" if $VERBOSE; } } } sub finish_a_parser { my $t = shift(@parsers); foreach my $enclosure ($t->join) { push (@fetchers, threads->new(\&mirror_enclosure, $enclosure)); } } sub media_wanted { if (-f $File::Find::name and -M $File::Find::name > $MAX_MEDIA_AGE) { print "Removing old media file $name\n" if $VERBOSE; unlink($File::Find::name); } }