Downloading Files : A Comparative Study
The idea :
A simple case study of downloading files in perl, ruby, python and php.
The algorithm :
Assumption : The input for these scripts are obtained from a simple cgi form.
- 0. Hit an URL which in turn responds with a list of URLs ( Like RealMedia Metafiles )
- 1. Download the contents of each URL that the is returned from the master.
- 2. Zip all the downloaded files and exit.
Downloading files in perl :
#!/usr/bin/perl -w
use strict;
use warnings;
use CGI qw(:standard);
use LWP::Simple qw(!head);
use File::Basename;
use Archive::Zip;
# URL from the input form.
my $url = param('url');
# Validate the input
if(defined $url and head($url))
{
# Get the ablum name,
# in this case the second last part.
my @pieces = (split /\//,$url);
my $album = $pieces[-2];
my @songs = ();
# Fetch the urls
my $urls = get($url);
# For each url in the url list.
foreach my $url (split /\n/, $urls) {
# Push the song_names to songs
push(@songs,basename($url));
# The major part of downlaading.
getstore($url,basename($url));
}
my $compressor = Archive::Zip->new();
# For each song that was pushed before.
foreach my $song (@songs)
{
# Add them to the zip archive.
$compressor->addFile($song);
}
# Write the zip file.
$compressor->writeToFileNamed($album.'.zip');
}
else
{
print "<b> Please check the URL! </b>";
}
Downloading files in ruby :
#!/usr/bin/ruby
require 'uri'
urls = cgi.params['url'].to_s
if (!(urls =~ URI::regexp).nil?)
album = urls.split('/') [-2]
songs = []
open(urls).read.each_line do |url|
song = url.split("/")[-1].chomp
Zip::ZipFile.open(album+'.zip', Zip::ZipFile::CREATE) {
|zipit|
zipit.get_output_stream(song) { |f| f.puts open(url.chomp).read }
}
end
else
puts "Please check the input!"
end
Downloading files in python:
import cgi
import cgitb
import urllib
import zipfile
cgitb.enable()
form = cgi.FieldStorage()
url = form.getvalue("url", "None")
if url != "None":
type = url [ -3: ]
album = url.split('/')[-2]
try:
urls = urllib.urlopen(url)
except IOError:
print "Please check the URL"
exit
songs = []
for url in urls:
song = url.split("/")[-1]
urllib.urlretrieve(url,song)
songs.append(song)
zipit = zipfile.ZipFile(album+'.zip', 'w')
for song in songs:
zipit.write(song)
Downloading files in PHP:
<?php
if (isset($params['host']) && $params['host']) $header[]="Host: " . $params['host'];
/* creates a compressed zip file */
function create_zip($files = array(),$destination = '',$overwrite = true) {
//avoid over write
if(file_exists($destination) && !$overwrite) { return false; }
$valid_files = array();
if(is_array($files)) {
// check each file
foreach($files as $file) {
if(file_exists($file)) {
$valid_files[] = $file;
}
}
}
if(count($valid_files)) {
//create the archive
$zip = new ZipArchive();
if($zip->open($destination,$overwrite ? ZIPARCHIVE::OVERWRITE : ZIPARCHIVE::CREATE) !== true) {
return false;
}
//add the files
foreach($valid_files as $file) {
$zip->addFile($file,$file);
}
$zip->close();
return file_exists($destination);
}
else
{
return false;
}
}
function get_data($url)
{
$url = chop($url);
$ch = curl_init();
$timeout = 5;
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
function isValidURL($url)
{
return preg_match('|^http(s)?://[a-z0-9-]+(.[a-z0-9-]+)*(:[0-9]+)?(/.*)?$|i', $url);
}
if(isset($_POST['submit']))
{
$url = chop($_POST['url']);
$songs = array();
$pieces = explode('/',parse_url($url, PHP_URL_PATH));
$album = $pieces[count($pieces)-2];
if(isValidURL($url) and substr(trim($url), -4) === ".php")
{
$url = str_replace(".php","/All.ram", $url);
$url = str_replace("/home","",$url);
$album = $pieces[count($pieces) - 1];
$album = str_replace(".php","",$album);
}
if(!isValidURL($url) or substr(trim($url), -4) != ".ram"){
echo "<p>Error! Please check the URL guru!</p>";
}
else{
$returned_content = get_data($url);
foreach(explode("\n", get_data($url)) as $dload){
$song=rtrim(end(explode("/",$dload)));
if(!empty($song))
{
array_push($songs,$song);
file_put_contents($song,get_data($dload));
}
}
$result = create_zip($songs,$album.'.zip');
}
}
?>
The average runtime ( 3 runs ) on a 3MB/s line for a file size of 13MB was :
| Language | Time in secs |
|---|---|
| PHP | 11.49 |
| Perl | 9.0 |
| Ruby | 17.22 |
| Python | 0.24 |
All said and done, a simple wget -i took : real 0m5.231s user 0m0.012s sys 0m0.131s
P.S : This was a very specific case, this can’t be a generic benchmark, there are always better way of tuning the code above!
#javascript#linux
About Hemanth HM
Hemanth HM is a Sr. Staff Engineer at PayPal, Google Developer Expert, TC39 delegate, FOSS advocate, and community leader with a passion for programming, AI, and open-source contributions..