require_once "multiCurl.class";
$mc= new multiCurl(10); // max 10 simultaneous downloads
$mc->addURL("http://www.google.com/search?q=php"); // start 1
$mc->addURL("http://www.google.com/search?q=web"); // start 2
$mc->endWait(); // wait for compltion
print_r($mc->getResults()); // print results
To download using POST with a timeout of 10 seconds:
$mc->addURL("http://someserver.com/","dat1=somedata&dat2=moredata",10);
To download 4 URLs, only 2 concurrently:
$mc->addURL("http://www.google.com/search?q=php"); // it starts downloading
$mc->addURL("http://www.google.com/search?q=php","",0,-2); // -2 => waits for host=www.google.com
$mc->addURL("http://es.php.net/curl"); // it starts inmediately
$mc->addURL("http://www.imdb.com/","",0,-1); // -1 => waits for previous request
$mc->endWait();
To download N sets of URLs, every set sequentially, with timeouts of 5 seconds:
$mc= new multiCurl(some number greater than N);
foreach(array(array("url11","url12", ...),
array("url21","url22", ...),
.......................) as $set) {
$sq=0;
foreach($set as $url){
$mc->addURL($url,"",5,$sq);
$sq=-1;
}
}
$mc->endWait();
// creates object - 10 max simultaneous downloads
$mc=new multiCurl(10);
// set a result handler that simply writes info
// args are the result info (array) and the request order
function Hand1(&$r,$q) {
echo "title {$r['url']} proc {$r['seq']} req $q",
strftime(" %c ",$r['start']),strftime("%c ",$r['done']),
"err={$r['error']} sz=",strlen($r['result'])."\n";
$r=array(1); // frees memory, array cannot be empty
}
// As php 5.3 you can set a no-name function
$mc->resultHandler(Hand1);
// generates 20 random IMDB urls and schedules them
for($i=0; $i <20; $i++)
$mc->addURL("http://www.imdb.com/title/tt".rand(0,10000));
// wait for completion
$mc->endWait();
This example shows how to sequence some downloads.
$mc=new multiCurl(10);
// set a result handler only to avoid boring
function Hand2($dum,$i) {
echo "$i ";
};
$mc->resultHandler(Hand2);
echo "Completed ";
// very slow url?
$mc->addURL('http://www.iana.org/assignments/enterprise-numbers');
// here we can perform some computation, downloading is in process
// blah blah
// queries google
$mc->addURL('http://www.google.com/search?q=php');
// next url will be downloaded after previous completes
$mc->addURL('http://www.google.com/search?q=curl','',0,-1);
// idem, stores in $id the sequence id
$id=$mc->addURL('http://www.google.com/search?q=html','',0,-1);
// here we perform more computation while TWO of the 4 urls are downloaded in parallell
// blah blah
// downloads in parallell
$mc->addURL('http://www.php.net/');
// sequentiates more google
$mc->addURL('http://www.google.com/search?q=web','',0,$id);
// more google sequential based in url
$mc->addURL('http://www.google.com/search?q=js','',0,-2);
$mc->addURL('http://www.google.com/search?q=ajax','',0,-2);
$mc->endWait();
echo "\n";
// Three sequences are informed:
// - the order in which the urls where requested
// - the order they start procesing
// - the ordey they finished
// results are presented in completion order, indexed by the program request order
// 'seq' represents the order of processing start
// 'start' and 'done' are the times when processing started and finished
foreach($mc->getResults() as $i => $one)
echo "req $i proc {$one['seq']} err {$one['error']} url {$one['url']}",
" times {$one['start']} {$one['done']} len ".strlen($one['result'])."\n";
This example is a very simple spider. When a URL is completed, the downloaded content is (basically) parsed to obtain the href fields, and all the URLs encountered are scheduled to be downloaded. As this example is recursive, multiCurl controls recursion level to avoid too many nested calls (but always downloading all the requested URLs).
// a simple link spider
$mc=new multiCurl(50);
$howmany=$argv[2] or $howmany=300;
// recursive handler
function Hand3(&$rs,$q) {
global $mc,$howmany;
echo $rs['url']." $q\n";
if (preg_match('%^Content-type:\s+text/html%mi',$rs['header'])) {
preg_match_all('%href="?(http:[^" ]*)%',$rs['result'],$m);
$rs=array(1); // free results to avoid memory exhaust, array cannot be empty
foreach($m[1] as $u)
if ($howmany) {
$howmany--;
$mc->addURL($u);
}
}
}
$mc->resultHandler(Hand3);
$mc->addURL('http://www.google.com/search?q=php');
$mc->endWait();
In this example, an URL that downloads about 1Mbyte is scheduled concurrently with 1 second of computation, and this is repeated 10 times. Using the default values, during computation, not download occurs, all the download is done at endWait. If class is instantiated with msschedule=1000. it will schedule downloads after requesting, so a better performance is obtained. The best results are achieved calling wait during computation. This is valid2 in this example but it is not true if the computation is not really strong, as frequent calls to wait cause delays in execution.
function Hand4(&$r,$i) { echo "."; $r=array(1); }
$sz=$argv[3] or $sz=1000000;
foreach(array(
array('msg'=>"No tuning", 'ms'=>1, 'wait'=>false),
array('msg'=>"Should be a better tuning", 'ms'=>1000, 'wait'=>false),
array('msg'=>"Should be the best tuning", 'ms'=>1, 'wait'=>true),
) as $sm) {
echo "**** {$sm['msg']}\n";
$mc=new multiCurl(20,$sm['ms']);
$mc->resultHandler(Hand4);
$in=microtime(true);
for ($i=0; $i<10; $i++) {
$mc->addURL("http://multicurl.nisu.org/downl?b=$sz"); // downloads $sz bytes
// simulates 1 second of computation
echo "$i "; sleep(1);
if ($sm['wait'])
// schedules 100 miliseconds
$mc->wait(100);
}
$mc->endWait();
echo "Total time ".(microtime(true)-$in)."\n";
}
This example shows how to recursively download a web and store the pages in a directory tree.
// Example by Marius van Rijnsoever < mariusvr - AT - gmail.com >
$mc= new multiCurl(10); // max 10 simultaneous downloads
// recursive file handler
function SaveFile(&$rs,$q)
{
//remove the starting protocol
$rs['url'] = str_replace(array('http://','https://'), '', $rs['url']);
//parse the url into an array
$urlparts = explode('/', $rs['url']);
//loop through url parts and create directory if required
$currentdir = getcwd();
$numItems = count($urlparts);
$i = 0;
foreach($urlparts as $urlpart) {
//check to see if last item to detect directory listings and save these as index.html
if ($i+1 == $numItems) {
if (empty($urlpart)) {
$currentdir .= '/index.html';
} else {
$currentdir .= '/' .$urlpart;
}
} else {
//create the directory if it does not exist
$currentdir .= '/' . $urlpart;
if (!is_dir($currentdir)) {
//create direcory
mkdir($currentdir);
}
}
$i++;
}
//save the actual file
file_put_contents($currentdir, $rs['result']);
//empty the memory
$rs=array(1);
}
$mc->resultHandler('SaveFile');
$mc->addURL("http://www.google.com/");
$mc->addURL("http://www.google.com/search?q=web");
$mc->endWait(); // wait for completion