Tuesday, July 9, 2013

Very useful Perl scripts

1) Perl script to get web page content
 #!/usr/bin/perl
use LWP::Simple;
$website_content = get("http://www.xyz.com");  #enter the URL
open FILEHANDLE, ">D:\Filename.txt";
print FILEHANDLE $website_content;
close FILEHANDLE;



2) Perl script to get Links from web page
#!/usr/bin/perl
use HTML::LinkExtor;
use LWP::Simple;

my $page = get("http://www.xyz.com ");
my $parser = new HTML::LinkExtor;
$parser->parse($page);


 # once we have parsed the page ...
 my @links = $parser->links;
 foreach (@links) {
    # $_ contains [type, [name, value], ...]
    print "Type: ", shift @$_, "\n";
    while (my ($name, $value) = splice(@$_, 0, 2)) {
         print "  $name -> $value\n";
     }
 sub absolutize {
  my($url, $base) = @_;
  use URI;
  return URI->new_abs($url, $base)->canonical;
}

 }



3) Extracting Links from Arbitrary HTML
#!/usr/bin/perl -w
use strict;
use LWP;

my $doc_url = "http://www.xyz.com ";
my $document;
my $browser;
init_browser( );

{  # Get the page whose links we want to check:
  my $response = $browser->get($doc_url);
  die "Couldn't get $doc_url: ", $response->status_line
    unless $response->is_success;
  $document = $response->content;
  $doc_url = $response->base;
  # In case we need to resolve relative URLs later
}

while ($document =~ m/href\s*=\s*"([^"\s]+)"/gi) {
  my $absolute_url = absolutize($1, $doc_url);
  check_url($absolute_url);
}

sub absolutize {
  my($url, $base) = @_;
  use URI;
  return URI->new_abs($url, $base)->canonical;
}

sub init_browser {
  $browser = LWP::UserAgent->new;
  # ...And any other initialization we might need to do...
  return $browser;
}

sub check_url {
  # A temporary placeholder...
  print "Check - $_[0]\n";
}



4) Calculate the load time of webpage
#----  code  ----#
#!/usr/local/bin/perl
use LWP::Simple;
use Time::HiRes qw(sleep);
use Test::WWW::Selenium;
use Test::More "no_plan";
use Test::Exception;
use HTML::TreeBuilder;
use WWW::Mechanize;

$url = ‘http://www.xyz.com ';
$started = time;
print "Started: $started\n";

my $mech  = WWW::Mechanize->new();
$mech->get( $url );
my @links = $mech->links();

foreach my $link (@links) {
   print "LINK: "        . $link->url() . "\n";
   print "DESCRIPTION: " . $link->text() . "\n";
   print MYFILE1 "LINK: "        . $link->url() . "\n";
}
# this will get the HTML text
$content = get($url);
print "Got content\n";

# parse the IMG SRC tags from the HTML
while ($content =~ /<IMG.*?SRC="(.*?)"/gis)
    {
    # retrieve each IMG SRC
    print "Getting $1\n";
    get($1); # I have not tried this, but, I don't see
                           # why it would not work
    }
$stopped = time;
print "Stopped: $stopped\n";
$elapsed = $stopped - $started;
print "Elapsed: $elapsed\n";



5) Integrating java code inside Perl script
use LWP::Simple;

$url = ‘http://www.xyz.com ';

# JAVA COMMAND LINE THAT I HAD DONE FOR GETTING TIME IN MILLISECONDS
$started = `java date`;

print "Started: $started\n";

$content = get($url);
print "Got content\n";

while ($content =~ /<IMG.*?SRC="(.*?)"/gis)
    {
    print "Getting $1\n";
    get($1);
    }

#######
$stopped = `java date`;
print "Stopped: $stopped\n";
$elapsed = $stopped - $started;
print "Elapsed: $elapsed\n";


Java Code:

class date
{
   public static void main(String args[]) throws Exception
   {
        System.out.println((new java.util.Date()).getTime());
   }
}



6) Reading Whole File at Once
#!/usr/local/bin/perl

open (MYFILE, 'D:\sample.txt');

 while (<MYFILE>) {
     chomp;
     print "$_\n";
 }
 close (MYFILE);



7) Writing to a File
#!/usr/local/bin/perl

open (MYFILE, '>>D:\sample.txt');

print MYFILE "This is being appended or rather added to the existing file which already has data.\n";

close (MYFILE);



8) Check if a file exists
#!/usr/bin/perl -w

my $filename = 'D:\sample.txt';

if (-e $filename) {print "File Exists!";}



9) Check if a file does not exist
#!/usr/bin/perl -w

my $filename = 'D:\sample.txt';

if (-e $filename) {print "File Exists!";}

unless (-e $filename) {print "File Doesn't Exist!";}



10) Search specific word in file and display the whole line
#!/usr/bin/perl

$file = 'D:\sample.txt';
open(txt, $file);
while($line = <txt>) {
  print "$line" if $line =~ /Services:/;
}
close(txt);



11)Parse all the links in a webpage into an array and print
#parse all the links in a web page into an array organized like this: ($link, $description)
use strict;
use warnings;
my $link;


use WWW::Mechanize;

my $url  = " http://www.xyz.com ";
#my $url   = "http://www.domain.com/webpage.html";

my $mech  = WWW::Mechanize->new();

$mech->get( $url );

my @links = $mech->links();

foreach my $link (@links) {   
    print "LINK: "        . $link->url() . "\n";
    print "DESCRIPTION: " . $link->text() . "\n";
    }



12) Display the line number of matching text


#!/usr/bin/perl
use strict;
use warnings;
my $line;

open('txt', 'D:\sample.txt');
my $i = 0;

while ($line = <txt>)
{
    $i = $i+1;
    print "$i\n" if $line =~ /appended/; #Only prints the Line number that matches the text.
   
    if($line =~ /appended/){  #Prints Line number and full sentence.
    print "$i -- ";print"$line";}
}



13) Dumping webpage content into XML writing into a file

use strict;
use warnings;
use Data::Dumper;
use XML::Simple;
use LWP::Simple;
my $line;

open my $MYFILE, '>', 'outfile.txt';

my $parser = new XML::Simple;
my $url = 'http://www.xyz.com &dumpXML=1';
my $content = get $url or die "Unable to get $url\n";
my $data = $parser->XMLin($content);
#print Dumper($data);
#print FILE Data::Dumper->Dump($data);

print $MYFILE Dumper($data);
close $MYFILE;

No comments:

Post a Comment