Working multi-account html grabber

This commit is contained in:
magnolia-fan
2011-04-02 20:44:45 +04:00
parent 1a8baf57a5
commit d54b66a317
7 changed files with 110 additions and 53 deletions
+57 -15
View File
@@ -4,30 +4,72 @@
chdir('../..');
require_once 'common.php';
$db = Db::getInstance();
$vk = new Vkontakte();
// Минимальный интервал между запросами
define('VKTIMEOUT', 10);
define('QUEUE_PACK', 30);
define('EMPTY_QUEUE_TIMEOUT', 60);
$continue = true;
while ($continue) {
$queue = $db->getRows($db->q("SELECT * FROM beathaven.queue WHERE status=0 OR status=2 ORDER BY priority DESC, times_failed ASC LIMIT 10"));
// Получаем имя бота
if (!isset($argv[1]) || !Config::get('bot:'. $argv[1])) {
die('Wrong bot name: '. @$argv[1]);
}
$bot_name = ucfirst($argv[1]);
// Инициализация бота по имени
$vk = new Vkontakte($bot_name);
// Данные о работе бота
$stats = array(
'started_job' => time(),
'eneded_job' => time(),
'pid' => getmypid(),
'good_results' => 0,
'bad_results' => 0,
'queue_size' => 0,
'last_request' => ''
);
// Устанавливаем коннект с БД
$db = Db::getInstance();
// Бот работает все время
while (true) {
$queue = $db->getRows($db->q("SELECT * FROM beathaven.queue WHERE status=0 OR status=2 ORDER BY priority DESC, times_failed ASC LIMIT ". QUEUE_PACK));
if (!$queue || count($queue) == 0) {
$continue = false;
sleep(EMPTY_QUEUE_TIMEOUT);
} else {
$stats['queue_size'] = count($queue);
foreach ($queue as $t) {
echo "#{$t['track_id']} {$t['track_title']}\n";
$t1 = microtime(true);
echo "#{$t['track_id']} {$t['track_title']} -- ";
$ok = $vk->getTracks($t['track_title']);
$vk->parse($t['track_title']);
echo $vk->getHtml();
$db->q("UPDATE beathaven.queue SET status=1 WHERE track_id=". $t['track_id']);
if ($ok) {
echo "OK\n";
$db->q("UPDATE beathaven.queue SET status=1 WHERE track_id=". $t['track_id']);
$file_name = Config::get('app:Parser:good_html_dir'). $t['track_id'] .'.html';
$stats['good_results']++;
} else {
echo "FAILED\n\n";
echo "FAILED\n";
$db->q("UPDATE beathaven.queue SET status = 2, times_failed = times_failed + 1 WHERE track_id=". $t['track_id']);
$file_name = Config::get('app:Parser:bad_html_dir'). $t['track_id'] .'.html';
$stats['bad_results']++;
}
file_put_contents($file_name, $vk->getHtml());
chmod($file_name, 0777);
$stats['last_request'] = $t['track_title'];
$stats['queue_size']--;
$stats['eneded_job'] = time();
$bot_stats_file_name = Config::get('app:Parser:bot_stats_dir'). $bot_name .'.json';
file_put_contents($bot_stats_file_name, json_encode($stats));
chmod($bot_stats_file_name, 0777);
$t2 = microtime(true);
if ($t2 - $t1 < 5) {
sleep(ceil(5 - ($t2 - $t1)));
if ($t2 - $t1 < VKTIMEOUT) {
sleep(ceil(VKTIMEOUT - ($t2 - $t1)));
}
}
}
}
}