天天看点

windows版爬取csdn

use  LWP::UserAgent;
use POSIX;
use HTML::TreeBuilder::XPath; 
use Encode; 
use HTML::TreeBuilder;

open DATAFH,">csdn.html" || die "open csdn file failed:$!"; 
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
$ua->agent("Mozilla/8.0");
my $response = $ua->get;
my $base_dir="F:\\pa";


if ($response->is_success) {
 print DATAFH  $response->content
 };
 
use HTML::TreeBuilder::XPath;
  my $tree= HTML::TreeBuilder::XPath->new;
  $tree->parse_file( "csdn.html");
  ##     <a href='javascript:void(0)'>

      

继续阅读