php ch curl_init curl_setopt_array ch array CURLOPT_RETURNTRANSF ER tr

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
<?php
$ch = curl_init();
curl_setopt_array($ch, array(
CURLOPT_RETURNTRANSFER => true,
CURLOPT_USERAGENT => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36',
CURLOPT_COOKIE => 'sid=1',
CURLOPT_ENCODING => "deflate"
));
$hits = 0;
$last_request_time = 0;
$hits_per_req = 3;
$req_period = 1000;
while (true) {
$res = request($ch, 'http://spaces.ru/mail2/');
preg_match_all("/this,(\{.*?\})/si", $res, $jsons);
foreach ($jsons[1] as $json) {
$contact =json_decode(str_replace("'", '"', $json), true);
echo $contact['userName']."\n";
if (isset($contact['spamLink']) && $contact['spamLink']) {
request($ch, $contact['spamLink']);
} elseif (isset($contact['archiveLink']) && $contact['archiveLink']) {
request($ch, $contact['archiveLink']);
}
}
if (!$jsons[1])
exit("DONE!\n");
}
function request($ch, $url) {
global $last_request_time, $hits_per_req, $req_period, $hits;
$diff = (microtime(true) - $last_request_time) * 1000;
if ($diff < $req_period) {
if ($hits >= $hits_per_req) {
usleep(($req_period - $diff) * 1000);
$hits = 0;
}
} else {
$hits = 0;
}
curl_setopt($ch, CURLOPT_URL, $url);
$res = curl_exec($ch);
$last_request_time = microtime(true);
++$hits;
return $res;
}