1) Perl script to get
web page content
#!/usr/bin/perl
use LWP::Simple;
$website_content = get("http://www.xyz.com"); #enter the URL
open FILEHANDLE, ">D:\Filename.txt";
print FILEHANDLE $website_content;
close FILEHANDLE;
2) Perl script to get Links from web page
#!/usr/bin/perl
use HTML::LinkExtor;
use LWP::Simple;
my $page = get("http://www.xyz.com ");
my $parser = new HTML::LinkExtor;
$parser->parse($page);
# once we have parsed the page ...
my @links = $parser->links;
foreach (@links) {
# $_ contains [type, [name, value], ...]
print "Type: ", shift @$_, "\n";
while (my ($name, $value) = splice(@$_, 0, 2)) {
print " $name -> $value\n";
}
sub absolutize {
my($url, $base) = @_;
use URI;
return URI->new_abs($url, $base)->canonical;
}
}
3) Extracting Links from Arbitrary HTML
#!/usr/bin/perl -w
use strict;
use LWP;
my $doc_url = "http://www.xyz.com ";
my $document;
my $browser;
init_browser( );
{ # Get the page whose links we want to check:
my $response = $browser->get($doc_url);
die "Couldn't get $doc_url: ", $response->status_line
unless $response->is_success;
$document = $response->content;
$doc_url = $response->base;
# In case we need to resolve relative URLs later
}
while ($document =~ m/href\s*=\s*"([^"\s]+)"/gi) {
my $absolute_url = absolutize($1, $doc_url);
check_url($absolute_url);
}
sub absolutize {
my($url, $base) = @_;
use URI;
return URI->new_abs($url, $base)->canonical;
}
sub init_browser {
$browser = LWP::UserAgent->new;
# ...And any other initialization we might need to do...
return $browser;
}
sub check_url {
# A temporary placeholder...
print "Check - $_[0]\n";
}
4) Calculate the load time of webpage
#---- code ----#
#!/usr/local/bin/perl
use LWP::Simple;
use Time::HiRes qw(sleep);
use Test::WWW::Selenium;
use Test::More "no_plan";
use Test::Exception;
use HTML::TreeBuilder;
use WWW::Mechanize;
$url = ‘http://www.xyz.com ';
$started = time;
print "Started: $started\n";
my $mech = WWW::Mechanize->new();
$mech->get( $url );
my @links = $mech->links();
foreach my $link (@links) {
print "LINK: " . $link->url() . "\n";
print "DESCRIPTION: " . $link->text() . "\n";
print MYFILE1 "LINK: " . $link->url() . "\n";
}
# this will get the HTML text
$content = get($url);
print "Got content\n";
# parse the IMG SRC tags from the HTML
while ($content =~ /<IMG.*?SRC="(.*?)"/gis)
{
# retrieve each IMG SRC
print "Getting $1\n";
get($1); # I have not tried this, but, I don't see
# why it would not work
}
$stopped = time;
print "Stopped: $stopped\n";
$elapsed = $stopped - $started;
print "Elapsed: $elapsed\n";
5) Integrating java code inside Perl script
use LWP::Simple;
$url = ‘http://www.xyz.com ';
# JAVA COMMAND LINE THAT I HAD DONE FOR GETTING TIME IN MILLISECONDS
$started = `java date`;
print "Started: $started\n";
$content = get($url);
print "Got content\n";
while ($content =~ /<IMG.*?SRC="(.*?)"/gis)
{
print "Getting $1\n";
get($1);
}
#######
$stopped = `java date`;
print "Stopped: $stopped\n";
$elapsed = $stopped - $started;
print "Elapsed: $elapsed\n";
Java Code:
class date
{
public static void main(String args[]) throws Exception
{
System.out.println((new java.util.Date()).getTime());
}
}
6) Reading Whole File at Once
#!/usr/local/bin/perl
open (MYFILE, 'D:\sample.txt');
while (<MYFILE>) {
chomp;
print "$_\n";
}
close (MYFILE);
7) Writing to a File
#!/usr/local/bin/perl
open (MYFILE, '>>D:\sample.txt');
print MYFILE "This is being appended or rather added to the existing file which already has data.\n";
close (MYFILE);
8) Check if a file exists
#!/usr/bin/perl -w
my $filename = 'D:\sample.txt';
if (-e $filename) {print "File Exists!";}
9) Check if a file does not exist
#!/usr/bin/perl -w
my $filename = 'D:\sample.txt';
if (-e $filename) {print "File Exists!";}
unless (-e $filename) {print "File Doesn't Exist!";}
10) Search specific word in file and display the whole line
#!/usr/bin/perl
$file = 'D:\sample.txt';
open(txt, $file);
while($line = <txt>) {
print "$line" if $line =~ /Services:/;
}
close(txt);
11)Parse all the links in a webpage into an array and print
#parse all the links in a web page into an array organized like this: ($link, $description)
use strict;
use warnings;
my $link;
use WWW::Mechanize;
my $url = " http://www.xyz.com ";
#my $url = "http://www.domain.com/webpage.html";
my $mech = WWW::Mechanize->new();
$mech->get( $url );
my @links = $mech->links();
foreach my $link (@links) {
print "LINK: " . $link->url() . "\n";
print "DESCRIPTION: " . $link->text() . "\n";
}
12) Display the line number of matching text
#!/usr/bin/perl
use strict;
use warnings;
my $line;
open('txt', 'D:\sample.txt');
my $i = 0;
while ($line = <txt>)
{
$i = $i+1;
print "$i\n" if $line =~ /appended/; #Only prints the Line number that matches the text.
if($line =~ /appended/){ #Prints Line number and full sentence.
print "$i -- ";print"$line";}
}
13) Dumping webpage content into XML writing into a file
use strict;
use warnings;
use Data::Dumper;
use XML::Simple;
use LWP::Simple;
my $line;
open my $MYFILE, '>', 'outfile.txt';
my $parser = new XML::Simple;
my $url = 'http://www.xyz.com &dumpXML=1';
my $content = get $url or die "Unable to get $url\n";
my $data = $parser->XMLin($content);
#print Dumper($data);
#print FILE Data::Dumper->Dump($data);
print $MYFILE Dumper($data);
close $MYFILE;
#!/usr/bin/perl
use LWP::Simple;
$website_content = get("http://www.xyz.com"); #enter the URL
open FILEHANDLE, ">D:\Filename.txt";
print FILEHANDLE $website_content;
close FILEHANDLE;
2) Perl script to get Links from web page
#!/usr/bin/perl
use HTML::LinkExtor;
use LWP::Simple;
my $page = get("http://www.xyz.com ");
my $parser = new HTML::LinkExtor;
$parser->parse($page);
# once we have parsed the page ...
my @links = $parser->links;
foreach (@links) {
# $_ contains [type, [name, value], ...]
print "Type: ", shift @$_, "\n";
while (my ($name, $value) = splice(@$_, 0, 2)) {
print " $name -> $value\n";
}
sub absolutize {
my($url, $base) = @_;
use URI;
return URI->new_abs($url, $base)->canonical;
}
}
3) Extracting Links from Arbitrary HTML
#!/usr/bin/perl -w
use strict;
use LWP;
my $doc_url = "http://www.xyz.com ";
my $document;
my $browser;
init_browser( );
{ # Get the page whose links we want to check:
my $response = $browser->get($doc_url);
die "Couldn't get $doc_url: ", $response->status_line
unless $response->is_success;
$document = $response->content;
$doc_url = $response->base;
# In case we need to resolve relative URLs later
}
while ($document =~ m/href\s*=\s*"([^"\s]+)"/gi) {
my $absolute_url = absolutize($1, $doc_url);
check_url($absolute_url);
}
sub absolutize {
my($url, $base) = @_;
use URI;
return URI->new_abs($url, $base)->canonical;
}
sub init_browser {
$browser = LWP::UserAgent->new;
# ...And any other initialization we might need to do...
return $browser;
}
sub check_url {
# A temporary placeholder...
print "Check - $_[0]\n";
}
4) Calculate the load time of webpage
#---- code ----#
#!/usr/local/bin/perl
use LWP::Simple;
use Time::HiRes qw(sleep);
use Test::WWW::Selenium;
use Test::More "no_plan";
use Test::Exception;
use HTML::TreeBuilder;
use WWW::Mechanize;
$url = ‘http://www.xyz.com ';
$started = time;
print "Started: $started\n";
my $mech = WWW::Mechanize->new();
$mech->get( $url );
my @links = $mech->links();
foreach my $link (@links) {
print "LINK: " . $link->url() . "\n";
print "DESCRIPTION: " . $link->text() . "\n";
print MYFILE1 "LINK: " . $link->url() . "\n";
}
# this will get the HTML text
$content = get($url);
print "Got content\n";
# parse the IMG SRC tags from the HTML
while ($content =~ /<IMG.*?SRC="(.*?)"/gis)
{
# retrieve each IMG SRC
print "Getting $1\n";
get($1); # I have not tried this, but, I don't see
# why it would not work
}
$stopped = time;
print "Stopped: $stopped\n";
$elapsed = $stopped - $started;
print "Elapsed: $elapsed\n";
5) Integrating java code inside Perl script
use LWP::Simple;
$url = ‘http://www.xyz.com ';
# JAVA COMMAND LINE THAT I HAD DONE FOR GETTING TIME IN MILLISECONDS
$started = `java date`;
print "Started: $started\n";
$content = get($url);
print "Got content\n";
while ($content =~ /<IMG.*?SRC="(.*?)"/gis)
{
print "Getting $1\n";
get($1);
}
#######
$stopped = `java date`;
print "Stopped: $stopped\n";
$elapsed = $stopped - $started;
print "Elapsed: $elapsed\n";
Java Code:
class date
{
public static void main(String args[]) throws Exception
{
System.out.println((new java.util.Date()).getTime());
}
}
6) Reading Whole File at Once
#!/usr/local/bin/perl
open (MYFILE, 'D:\sample.txt');
while (<MYFILE>) {
chomp;
print "$_\n";
}
close (MYFILE);
7) Writing to a File
#!/usr/local/bin/perl
open (MYFILE, '>>D:\sample.txt');
print MYFILE "This is being appended or rather added to the existing file which already has data.\n";
close (MYFILE);
8) Check if a file exists
#!/usr/bin/perl -w
my $filename = 'D:\sample.txt';
if (-e $filename) {print "File Exists!";}
9) Check if a file does not exist
#!/usr/bin/perl -w
my $filename = 'D:\sample.txt';
if (-e $filename) {print "File Exists!";}
unless (-e $filename) {print "File Doesn't Exist!";}
10) Search specific word in file and display the whole line
#!/usr/bin/perl
$file = 'D:\sample.txt';
open(txt, $file);
while($line = <txt>) {
print "$line" if $line =~ /Services:/;
}
close(txt);
11)Parse all the links in a webpage into an array and print
#parse all the links in a web page into an array organized like this: ($link, $description)
use strict;
use warnings;
my $link;
use WWW::Mechanize;
my $url = " http://www.xyz.com ";
#my $url = "http://www.domain.com/webpage.html";
my $mech = WWW::Mechanize->new();
$mech->get( $url );
my @links = $mech->links();
foreach my $link (@links) {
print "LINK: " . $link->url() . "\n";
print "DESCRIPTION: " . $link->text() . "\n";
}
12) Display the line number of matching text
#!/usr/bin/perl
use strict;
use warnings;
my $line;
open('txt', 'D:\sample.txt');
my $i = 0;
while ($line = <txt>)
{
$i = $i+1;
print "$i\n" if $line =~ /appended/; #Only prints the Line number that matches the text.
if($line =~ /appended/){ #Prints Line number and full sentence.
print "$i -- ";print"$line";}
}
13) Dumping webpage content into XML writing into a file
use strict;
use warnings;
use Data::Dumper;
use XML::Simple;
use LWP::Simple;
my $line;
open my $MYFILE, '>', 'outfile.txt';
my $parser = new XML::Simple;
my $url = 'http://www.xyz.com &dumpXML=1';
my $content = get $url or die "Unable to get $url\n";
my $data = $parser->XMLin($content);
#print Dumper($data);
#print FILE Data::Dumper->Dump($data);
print $MYFILE Dumper($data);
close $MYFILE;