Downloading files : A Comparative study
The idea :
A simple case study of downloading files in perl, ruby, python and php.
The algorithm :
Assumption : The input for these scripts are obtained from a simple cgi form.
- 0. Hit an URL which in turn responds with a list of URLs ( Like RealMedia Metafiles )
- 1. Download the contents of each URL that the is returned from the master.
- 2. Zip all the downloaded files and exit.
Downloading files in perl :
#!/usr/bin/perl -w use strict; use warnings; use CGI qw(:standard); use LWP::Simple qw(!head); use File::Basename; use Archive::Zip; # URL from the input form. my $url = param('url'); # Validate the input if(defined $url and head($url)) { # Get the ablum name, # in this case the second last part. my @pieces = (split /\//,$url); my $album = $pieces[-2]; my @songs = (); # Fetch the urls my $urls = get($url); # For each url in the url list. foreach my $url (split /\n/, $urls) { # Push the song_names to songs push(@songs,basename($url)); # The major part of downlaading. getstore($url,basename($url)); } my $compressor = Archive::Zip->new(); # For each song that was pushed before. foreach my $song (@songs) { # Add them to the zip archive. $compressor->addFile($song); } # Write the zip file. $compressor->writeToFileNamed($album.'.zip'); } else { print "<b> Please check the URL! </b>"; }
Downloading files in ruby :
#!/usr/bin/ruby require 'uri' urls = cgi.params['url'].to_s if (!(urls =~ URI::regexp).nil?) album = urls.split('/') [-2] songs = [] open(urls).read.each_line do |url| song = url.split("/")[-1].chomp'.zip', Zip::ZipFile::CREATE) { |zipit| zipit.get_output_stream(song) { |f| f.puts open(url.chomp).read } } end else puts "Please check the input!" end
Downloading files in python:
import cgi import cgitb import urllib import zipfile cgitb.enable() form = cgi.FieldStorage() url = form.getvalue("url", "None") if url != "None": type = url [ -3: ] album = url.split('/')[-2] try: urls = urllib.urlopen(url) except IOError: print "Please check the URL" exit songs = [] for url in urls: song = url.split("/")[-1] urllib.urlretrieve(url,song) songs.append(song) zipit = zipfile.ZipFile(album+'.zip', 'w') for song in songs: zipit.write(song)
Downloading files in PHP:
<?php if (isset($params['host']) && $params['host']) $header[]="Host: " . $params['host']; /* creates a compressed zip file */ function create_zip($files = array(),$destination = '',$overwrite = true) { //avoid over write if(file_exists($destination) && !$overwrite) { return false; } $valid_files = array(); if(is_array($files)) { // check each file foreach($files as $file) { if(file_exists($file)) { $valid_files[] = $file; } } } if(count($valid_files)) { //create the archive $zip = new ZipArchive(); if($zip->open($destination,$overwrite ? ZIPARCHIVE::OVERWRITE : ZIPARCHIVE::CREATE) !== true) { return false; } //add the files foreach($valid_files as $file) { $zip->addFile($file,$file); } $zip->close(); return file_exists($destination); } else { return false; } } function get_data($url) { $url = chop($url); $ch = curl_init(); $timeout = 5; curl_setopt($ch,CURLOPT_URL,$url); curl_setopt($ch,CURLOPT_RETURNTRANSFER,1); curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout); $data = curl_exec($ch); curl_close($ch); return $data; } function isValidURL($url) { return preg_match('|^http(s)?://[a-z0-9-]+(.[a-z0-9-]+)*(:[0-9]+)?(/.*)?$|i', $url); } if(isset($_POST['submit'])) { $url = chop($_POST['url']); $songs = array(); $pieces = explode('/',parse_url($url, PHP_URL_PATH)); $album = $pieces[count($pieces)-2]; if(isValidURL($url) and substr(trim($url), -4) === ".php") { $url = str_replace(".php","/All.ram", $url); $url = str_replace("/home","",$url); $album = $pieces[count($pieces) - 1]; $album = str_replace(".php","",$album); } if(!isValidURL($url) or substr(trim($url), -4) != ".ram"){ echo "<p>Error! Please check the URL guru!</p>"; } else{ $returned_content = get_data($url); foreach(explode("\n", get_data($url)) as $dload){ $song=rtrim(end(explode("/",$dload))); if(!empty($song)) { array_push($songs,$song); file_put_contents($song,get_data($dload)); } } $result = create_zip($songs,$album.'.zip'); } } ?>
The average runtime ( 3 runs ) on a 3MB/s line for a file size of 13MB was :
Language | Time in secs |
PHP | 11.49 |
Perl | 9.0 |
Ruby | 17.22 |
Python | 0.24 |
All said and done, a simple wget -i took :
real 0m5.231s
user 0m0.012s
sys 0m0.131s
P.S : This was a very specific case, this can't be a generic benchmark, there are always better way of tuning the code above!

