Working multi-account html grabber
This commit is contained in:
parent
1a8baf57a5
commit
d54b66a317
|
@ -1,5 +0,0 @@
|
||||||
D
|
|
||||||
RewriteRule ^(artist|album|loadtrack)/([a-z]+)/$ ror_proxy.php?controller=$1&action=$2
|
|
||||||
RewriteRule ^(artist|album|loadtrack)/([a-z]+)/([a-z0-9-\+]+)(/|\.html)?$ ror_proxy.php?controller=$1&action=$2&id=$3
|
|
||||||
|
|
||||||
RewriteRule ^$ ror_proxy.php
|
|
|
@ -4,30 +4,72 @@
|
||||||
chdir('../..');
|
chdir('../..');
|
||||||
require_once 'common.php';
|
require_once 'common.php';
|
||||||
|
|
||||||
$db = Db::getInstance();
|
// Минимальный интервал между запросами
|
||||||
$vk = new Vkontakte();
|
define('VKTIMEOUT', 10);
|
||||||
|
define('QUEUE_PACK', 30);
|
||||||
|
define('EMPTY_QUEUE_TIMEOUT', 60);
|
||||||
|
|
||||||
$continue = true;
|
// Получаем имя бота
|
||||||
while ($continue) {
|
if (!isset($argv[1]) || !Config::get('bot:'. $argv[1])) {
|
||||||
$queue = $db->getRows($db->q("SELECT * FROM beathaven.queue WHERE status=0 OR status=2 ORDER BY priority DESC, times_failed ASC LIMIT 10"));
|
die('Wrong bot name: '. @$argv[1]);
|
||||||
|
}
|
||||||
|
$bot_name = ucfirst($argv[1]);
|
||||||
|
|
||||||
|
// Инициализация бота по имени
|
||||||
|
$vk = new Vkontakte($bot_name);
|
||||||
|
|
||||||
|
// Данные о работе бота
|
||||||
|
$stats = array(
|
||||||
|
'started_job' => time(),
|
||||||
|
'eneded_job' => time(),
|
||||||
|
'pid' => getmypid(),
|
||||||
|
'good_results' => 0,
|
||||||
|
'bad_results' => 0,
|
||||||
|
'queue_size' => 0,
|
||||||
|
'last_request' => ''
|
||||||
|
);
|
||||||
|
|
||||||
|
// Устанавливаем коннект с БД
|
||||||
|
$db = Db::getInstance();
|
||||||
|
|
||||||
|
// Бот работает все время
|
||||||
|
while (true) {
|
||||||
|
$queue = $db->getRows($db->q("SELECT * FROM beathaven.queue WHERE status=0 OR status=2 ORDER BY priority DESC, times_failed ASC LIMIT ". QUEUE_PACK));
|
||||||
if (!$queue || count($queue) == 0) {
|
if (!$queue || count($queue) == 0) {
|
||||||
$continue = false;
|
sleep(EMPTY_QUEUE_TIMEOUT);
|
||||||
} else {
|
} else {
|
||||||
|
$stats['queue_size'] = count($queue);
|
||||||
foreach ($queue as $t) {
|
foreach ($queue as $t) {
|
||||||
echo "#{$t['track_id']} {$t['track_title']}\n";
|
$t1 = microtime(true);
|
||||||
|
echo "#{$t['track_id']} {$t['track_title']} -- ";
|
||||||
|
$ok = $vk->getTracks($t['track_title']);
|
||||||
|
|
||||||
$vk->parse($t['track_title']);
|
if ($ok) {
|
||||||
echo $vk->getHtml();
|
echo "OK\n";
|
||||||
|
$db->q("UPDATE beathaven.queue SET status=1 WHERE track_id=". $t['track_id']);
|
||||||
$db->q("UPDATE beathaven.queue SET status=1 WHERE track_id=". $t['track_id']);
|
$file_name = Config::get('app:Parser:good_html_dir'). $t['track_id'] .'.html';
|
||||||
|
$stats['good_results']++;
|
||||||
} else {
|
} else {
|
||||||
echo "FAILED\n\n";
|
echo "FAILED\n";
|
||||||
$db->q("UPDATE beathaven.queue SET status = 2, times_failed = times_failed + 1 WHERE track_id=". $t['track_id']);
|
$db->q("UPDATE beathaven.queue SET status = 2, times_failed = times_failed + 1 WHERE track_id=". $t['track_id']);
|
||||||
|
$file_name = Config::get('app:Parser:bad_html_dir'). $t['track_id'] .'.html';
|
||||||
|
$stats['bad_results']++;
|
||||||
}
|
}
|
||||||
|
file_put_contents($file_name, $vk->getHtml());
|
||||||
|
chmod($file_name, 0777);
|
||||||
|
|
||||||
|
$stats['last_request'] = $t['track_title'];
|
||||||
|
$stats['queue_size']--;
|
||||||
|
$stats['eneded_job'] = time();
|
||||||
|
|
||||||
|
$bot_stats_file_name = Config::get('app:Parser:bot_stats_dir'). $bot_name .'.json';
|
||||||
|
file_put_contents($bot_stats_file_name, json_encode($stats));
|
||||||
|
chmod($bot_stats_file_name, 0777);
|
||||||
|
|
||||||
$t2 = microtime(true);
|
$t2 = microtime(true);
|
||||||
if ($t2 - $t1 < 5) {
|
if ($t2 - $t1 < VKTIMEOUT) {
|
||||||
sleep(ceil(5 - ($t2 - $t1)));
|
sleep(ceil(VKTIMEOUT - ($t2 - $t1)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -3,4 +3,9 @@
|
||||||
define('ROOT_DIR', getcwd());
|
define('ROOT_DIR', getcwd());
|
||||||
|
|
||||||
require_once ROOT_DIR .'/autoload.php';
|
require_once ROOT_DIR .'/autoload.php';
|
||||||
|
|
||||||
|
// Конфиг приложения
|
||||||
Config::loadFile('app', 'config/app.ini');
|
Config::loadFile('app', 'config/app.ini');
|
||||||
|
|
||||||
|
// Конфиг ботов
|
||||||
|
Config::loadFile('bot', 'config/accounts.ini');
|
||||||
|
|
|
@ -1,23 +1,23 @@
|
||||||
[Bach]
|
[Bach]
|
||||||
user_id = 5728795
|
user_id = "5728795"
|
||||||
email = chezzzy@yandex.ru
|
email = "chezzzy@yandex.ru"
|
||||||
password = yabach!
|
password = "yabach!"
|
||||||
remixsid = 47c2f5501b22a3e3aa6947e5e74d1a72381267df2502570eb75c94481ade
|
remixsid = "47c2f5501b22a3e3aa6947e5e74d1a72381267df2502570eb75c94481ade"
|
||||||
remixchk = 5
|
remixchk = "5"
|
||||||
user_agent = Mozilla/5.0 (Windows; U; Windows NT 5.1; ru; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13
|
user_agent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; ru; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13"
|
||||||
|
|
||||||
[Green]
|
[Green]
|
||||||
user_id = 69139853
|
user_id = "69139853"
|
||||||
email = alexgreen1978@gmail.com
|
email = "alexgreen1978@gmail.com"
|
||||||
password = fbcn136
|
password = "fbcn136"
|
||||||
remixsid = bc5386a4f49f8bf7df20e11bdd311a7120818d83c23d93cd08177d5d3674
|
remixsid = "bc5386a4f49f8bf7df20e11bdd311a7120818d83c23d93cd08177d5d3674"
|
||||||
remixchk = 5
|
remixchk = "5"
|
||||||
user_agent = Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.9 (KHTML, like Gecko) Chrome/5.0.307.9 Safari/532.9
|
user_agent = "Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.9 (KHTML, like Gecko) Chrome/5.0.307.9 Safari/532.9"
|
||||||
|
|
||||||
[Chez]
|
[Chez]
|
||||||
user_id = 1217744
|
user_id = "1217744"
|
||||||
email = phpdev.ru@gmail.com
|
email = "phpdev.ru@gmail.com"
|
||||||
password = yanebach!
|
password = "yanebach!"
|
||||||
remixsid = fc27c3a7874bc0b84477015e187e5e0bd3a71bdca02d98327595ef255773
|
remixsid = "fc27c3a7874bc0b84477015e187e5e0bd3a71bdca02d98327595ef255773"
|
||||||
remixchk = 5
|
remixchk = "5"
|
||||||
user_agent = Mozilla/5.0 (Macintosh; U; PPC Max OS X Mach-O; en-US; rv:1.8.0.7) Gecko/200609211 Camino/1.0.3
|
user_agent = "Mozilla/5.0 (Macintosh; U; PPC Max OS X Mach-O; en-US; rv:1.8.0.7) Gecko/200609211 Camino/1.0.3"
|
|
@ -7,4 +7,9 @@ host = localhost
|
||||||
port = 5432
|
port = 5432
|
||||||
dbname = beathaven
|
dbname = beathaven
|
||||||
login = postgres
|
login = postgres
|
||||||
pass = password
|
pass = password
|
||||||
|
|
||||||
|
[Parser]
|
||||||
|
good_html_dir = "/www/parser_data/html/good/"
|
||||||
|
bad_html_dir = "/www/parser_data/html/bad/"
|
||||||
|
bot_stats_dir = "/www/parser_data/stats/"
|
|
@ -3,7 +3,7 @@
|
||||||
/*****************************************************************
|
/*****************************************************************
|
||||||
Пример использования:
|
Пример использования:
|
||||||
|
|
||||||
$vk_parser = new Vkontakte();
|
$vk_parser = new Vkontakte($bot_name);
|
||||||
$vk_parser->parse('Blondie - Call Me');
|
$vk_parser->parse('Blondie - Call Me');
|
||||||
$files = $vk_parser->getFiles();
|
$files = $vk_parser->getFiles();
|
||||||
*****************************************************************/
|
*****************************************************************/
|
||||||
|
@ -22,6 +22,12 @@ class Vkontakte {
|
||||||
private $_html; // HTML, полученый от вконтактика
|
private $_html; // HTML, полученый от вконтактика
|
||||||
private $_files; // Распарсеные массивы с информацией о файле
|
private $_files; // Распарсеные массивы с информацией о файле
|
||||||
|
|
||||||
|
private $_bot_info; // Информация о боте
|
||||||
|
|
||||||
|
public function __construct($bot_name) {
|
||||||
|
$this->_bot_info = Config::get('bot:'. $bot_name);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Оболочка парсера
|
* Оболочка парсера
|
||||||
*
|
*
|
||||||
|
@ -29,7 +35,7 @@ class Vkontakte {
|
||||||
* @return array Массив с файлами
|
* @return array Массив с файлами
|
||||||
* @author chez
|
* @author chez
|
||||||
**/
|
**/
|
||||||
public function parse($q) {
|
public function getTracks($q) {
|
||||||
$this->_query = $q;
|
$this->_query = $q;
|
||||||
$this->auth();
|
$this->auth();
|
||||||
$cookie = array();
|
$cookie = array();
|
||||||
|
@ -42,18 +48,20 @@ class Vkontakte {
|
||||||
'X-Requested-With: XMLHttpRequest',
|
'X-Requested-With: XMLHttpRequest',
|
||||||
'Origin: http://vkontakte.ru',
|
'Origin: http://vkontakte.ru',
|
||||||
'Content-Type: application/x-www-form-urlencoded',
|
'Content-Type: application/x-www-form-urlencoded',
|
||||||
'User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.151 Safari/534.16',
|
'User-Agent: '. $this->_bot_info['user_agent'],
|
||||||
'Connection: close'
|
'Connection: close'
|
||||||
));
|
));
|
||||||
$this->setHtml(RemoteFile::getData('http://vkontakte.ru/audio', array(
|
$html = RemoteFile::getData('http://vkontakte.ru/audio', array(
|
||||||
'act' =>'search',
|
'act' => 'search',
|
||||||
'al' =>'1',
|
'al' => '1',
|
||||||
'gid' =>'0',
|
'gid' => '0',
|
||||||
'id' =>'5728795',
|
'id' => $this->_bot_info['user_id'],
|
||||||
'offset' =>'0',
|
'offset' => '0',
|
||||||
'q' => urlencode($this->_query),
|
'q' => urlencode($this->_query),
|
||||||
'sort' =>'2'
|
'sort' => '2'
|
||||||
)));
|
));
|
||||||
|
$this->setHtml($html);
|
||||||
|
return (strlen($html) > 150);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -96,8 +104,8 @@ class Vkontakte {
|
||||||
**/
|
**/
|
||||||
private function auth() {
|
private function auth() {
|
||||||
$this->_cookies = array(
|
$this->_cookies = array(
|
||||||
'remixchk' => 5,
|
'remixchk' => $this->_bot_info['remixchk'],
|
||||||
'remixsid' => 'cf8bdd79d451422c1d484532a58205d92fc46b79caab663a40624c812e01',
|
'remixsid' => $this->_bot_info['remixsid'],
|
||||||
'remixlang' => 777
|
'remixlang' => 777
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,7 +29,9 @@ class BeatDB {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return (bool) file_put_contents($part . $path[$i], json_encode($data));
|
file_put_contents($part . $path[$i], json_encode($data));
|
||||||
|
chmod($part . $path[$i], 0777);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static function delete($key) {
|
public static function delete($key) {
|
||||||
|
|
Loading…
Reference in New Issue