#!/usr/local/bin/perl ############### MAIN ############### # Quick and dirty routine to get Yahoo news # Developed by Ricky J. Sethi for http://www.sethi.org # Should save result to a text file and then SSI the sucker where you want # # Version 1.0 # - Current version (2.0) uses LWP::UserAgent & RobotUA; also the script # is much more complicated (since it gets data from multiple news # sources) and relies on a configuration file to set various # parameters. # - Might be remnants here of stuff used in the current version # - magic switch needs to be fixed in regex ### # Initialization ### ### Use the right packages use LWP::Simple; ### Set date correctly (using my perllib): #$longDate = GetNiceDate(long); $longDate = `date`; ### Set url here: $url = "http://dailynews.yahoo.com"; ### Where should we save the crappola? $logfile = "news-yahoo.html"; ### Debug? $DEBUG = 0; ### # Let's get right to it: ### ### Get the data: my $urldata = GetUrlData($url); ### Log the match: open (LOGFILE, ">$logfile"); if ($DEBUG) {print "The URL is $url and the data is $urldata\n";} print LOGFILE "
$urldata
"; close (LOGFILE); ############### Get News Data ############### # Return parsed & processed News Data from Yahoo # sub GetUrlData { # Set original URL to goto for cartoon link: my $origurl = shift; # Get document using Simple method: my $doc = get($origurl); ### Parse the link # Strip any newlines so next match doesn't have to account for em $doc =~ s%\n% %g; # Fix non-absolute links (with or without quotes) $doc =~ s%href=('|")?/%href=$1$url/%msgi; ### Now find the news: $doc =~ m%\s*(.*)\s*%i; ### Now save our stuff and write out news: $foobar = $1; $foobar =~ s/Top Story/Latest Headlines direct from Yahoo/; if ($DEBUG) {print "Content-type: text/html\n\nFound $foobar2 in the doc, $foobar
\n";} return $foobar; } __END__