diff --git a/php/.htaccess.save b/php/.htaccess.save deleted file mode 100644 index 3db8990..0000000 --- a/php/.htaccess.save +++ /dev/null @@ -1,5 +0,0 @@ -D -RewriteRule ^(artist|album|loadtrack)/([a-z]+)/$ ror_proxy.php?controller=$1&action=$2 -RewriteRule ^(artist|album|loadtrack)/([a-z]+)/([a-z0-9-\+]+)(/|\.html)?$ ror_proxy.php?controller=$1&action=$2&id=$3 - -RewriteRule ^$ ror_proxy.php diff --git a/php/bin/parser/worker_html_grabber.php b/php/bin/parser/worker_html_grabber.php old mode 100644 new mode 100755 index 8265d15..f65c79c --- a/php/bin/parser/worker_html_grabber.php +++ b/php/bin/parser/worker_html_grabber.php @@ -4,30 +4,72 @@ chdir('../..'); require_once 'common.php'; -$db = Db::getInstance(); -$vk = new Vkontakte(); +// Минимальный интервал между запросами +define('VKTIMEOUT', 10); +define('QUEUE_PACK', 30); +define('EMPTY_QUEUE_TIMEOUT', 60); -$continue = true; -while ($continue) { - $queue = $db->getRows($db->q("SELECT * FROM beathaven.queue WHERE status=0 OR status=2 ORDER BY priority DESC, times_failed ASC LIMIT 10")); +// Получаем имя бота +if (!isset($argv[1]) || !Config::get('bot:'. $argv[1])) { + die('Wrong bot name: '. @$argv[1]); +} +$bot_name = ucfirst($argv[1]); + +// Инициализация бота по имени +$vk = new Vkontakte($bot_name); + +// Данные о работе бота +$stats = array( + 'started_job' => time(), + 'eneded_job' => time(), + 'pid' => getmypid(), + 'good_results' => 0, + 'bad_results' => 0, + 'queue_size' => 0, + 'last_request' => '' +); + +// Устанавливаем коннект с БД +$db = Db::getInstance(); + +// Бот работает все время +while (true) { + $queue = $db->getRows($db->q("SELECT * FROM beathaven.queue WHERE status=0 OR status=2 ORDER BY priority DESC, times_failed ASC LIMIT ". QUEUE_PACK)); if (!$queue || count($queue) == 0) { - $continue = false; + sleep(EMPTY_QUEUE_TIMEOUT); } else { + $stats['queue_size'] = count($queue); foreach ($queue as $t) { - echo "#{$t['track_id']} {$t['track_title']}\n"; + $t1 = microtime(true); + echo "#{$t['track_id']} {$t['track_title']} -- "; + $ok = $vk->getTracks($t['track_title']); - $vk->parse($t['track_title']); - echo $vk->getHtml(); - - $db->q("UPDATE beathaven.queue SET status=1 WHERE track_id=". $t['track_id']); + if ($ok) { + echo "OK\n"; + $db->q("UPDATE beathaven.queue SET status=1 WHERE track_id=". $t['track_id']); + $file_name = Config::get('app:Parser:good_html_dir'). $t['track_id'] .'.html'; + $stats['good_results']++; } else { - echo "FAILED\n\n"; + echo "FAILED\n"; $db->q("UPDATE beathaven.queue SET status = 2, times_failed = times_failed + 1 WHERE track_id=". $t['track_id']); + $file_name = Config::get('app:Parser:bad_html_dir'). $t['track_id'] .'.html'; + $stats['bad_results']++; } + file_put_contents($file_name, $vk->getHtml()); + chmod($file_name, 0777); + + $stats['last_request'] = $t['track_title']; + $stats['queue_size']--; + $stats['eneded_job'] = time(); + + $bot_stats_file_name = Config::get('app:Parser:bot_stats_dir'). $bot_name .'.json'; + file_put_contents($bot_stats_file_name, json_encode($stats)); + chmod($bot_stats_file_name, 0777); + $t2 = microtime(true); - if ($t2 - $t1 < 5) { - sleep(ceil(5 - ($t2 - $t1))); + if ($t2 - $t1 < VKTIMEOUT) { + sleep(ceil(VKTIMEOUT - ($t2 - $t1))); } } } -} +} \ No newline at end of file diff --git a/php/common.php b/php/common.php index 403f4fb..1329f8a 100644 --- a/php/common.php +++ b/php/common.php @@ -3,4 +3,9 @@ define('ROOT_DIR', getcwd()); require_once ROOT_DIR .'/autoload.php'; + +// Конфиг приложения Config::loadFile('app', 'config/app.ini'); + +// Конфиг ботов +Config::loadFile('bot', 'config/accounts.ini'); diff --git a/php/config/accounts.ini b/php/config/accounts.ini index e7444c3..cf04b6e 100644 --- a/php/config/accounts.ini +++ b/php/config/accounts.ini @@ -1,23 +1,23 @@ [Bach] -user_id = 5728795 -email = chezzzy@yandex.ru -password = yabach! -remixsid = 47c2f5501b22a3e3aa6947e5e74d1a72381267df2502570eb75c94481ade -remixchk = 5 -user_agent = Mozilla/5.0 (Windows; U; Windows NT 5.1; ru; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13 +user_id = "5728795" +email = "chezzzy@yandex.ru" +password = "yabach!" +remixsid = "47c2f5501b22a3e3aa6947e5e74d1a72381267df2502570eb75c94481ade" +remixchk = "5" +user_agent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; ru; rv:1.9.2.13) Gecko/20101203 Firefox/3.6.13" [Green] -user_id = 69139853 -email = alexgreen1978@gmail.com -password = fbcn136 -remixsid = bc5386a4f49f8bf7df20e11bdd311a7120818d83c23d93cd08177d5d3674 -remixchk = 5 -user_agent = Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.9 (KHTML, like Gecko) Chrome/5.0.307.9 Safari/532.9 +user_id = "69139853" +email = "alexgreen1978@gmail.com" +password = "fbcn136" +remixsid = "bc5386a4f49f8bf7df20e11bdd311a7120818d83c23d93cd08177d5d3674" +remixchk = "5" +user_agent = "Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.9 (KHTML, like Gecko) Chrome/5.0.307.9 Safari/532.9" [Chez] -user_id = 1217744 -email = phpdev.ru@gmail.com -password = yanebach! -remixsid = fc27c3a7874bc0b84477015e187e5e0bd3a71bdca02d98327595ef255773 -remixchk = 5 -user_agent = Mozilla/5.0 (Macintosh; U; PPC Max OS X Mach-O; en-US; rv:1.8.0.7) Gecko/200609211 Camino/1.0.3 \ No newline at end of file +user_id = "1217744" +email = "phpdev.ru@gmail.com" +password = "yanebach!" +remixsid = "fc27c3a7874bc0b84477015e187e5e0bd3a71bdca02d98327595ef255773" +remixchk = "5" +user_agent = "Mozilla/5.0 (Macintosh; U; PPC Max OS X Mach-O; en-US; rv:1.8.0.7) Gecko/200609211 Camino/1.0.3" \ No newline at end of file diff --git a/php/config/app.ini b/php/config/app.ini index 92517ad..0385fbe 100644 --- a/php/config/app.ini +++ b/php/config/app.ini @@ -7,4 +7,9 @@ host = localhost port = 5432 dbname = beathaven login = postgres -pass = password \ No newline at end of file +pass = password + +[Parser] +good_html_dir = "/www/parser_data/html/good/" +bad_html_dir = "/www/parser_data/html/bad/" +bot_stats_dir = "/www/parser_data/stats/" \ No newline at end of file diff --git a/php/core/classes/Vkontakte.class.php b/php/core/classes/Vkontakte.class.php index 823dea1..2c6fbbe 100644 --- a/php/core/classes/Vkontakte.class.php +++ b/php/core/classes/Vkontakte.class.php @@ -3,7 +3,7 @@ /***************************************************************** Пример использования: -$vk_parser = new Vkontakte(); +$vk_parser = new Vkontakte($bot_name); $vk_parser->parse('Blondie - Call Me'); $files = $vk_parser->getFiles(); *****************************************************************/ @@ -22,6 +22,12 @@ class Vkontakte { private $_html; // HTML, полученый от вконтактика private $_files; // Распарсеные массивы с информацией о файле + private $_bot_info; // Информация о боте + + public function __construct($bot_name) { + $this->_bot_info = Config::get('bot:'. $bot_name); + } + /** * Оболочка парсера * @@ -29,7 +35,7 @@ class Vkontakte { * @return array Массив с файлами * @author chez **/ - public function parse($q) { + public function getTracks($q) { $this->_query = $q; $this->auth(); $cookie = array(); @@ -42,18 +48,20 @@ class Vkontakte { 'X-Requested-With: XMLHttpRequest', 'Origin: http://vkontakte.ru', 'Content-Type: application/x-www-form-urlencoded', - 'User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.151 Safari/534.16', + 'User-Agent: '. $this->_bot_info['user_agent'], 'Connection: close' )); - $this->setHtml(RemoteFile::getData('http://vkontakte.ru/audio', array( - 'act' =>'search', - 'al' =>'1', - 'gid' =>'0', - 'id' =>'5728795', - 'offset' =>'0', + $html = RemoteFile::getData('http://vkontakte.ru/audio', array( + 'act' => 'search', + 'al' => '1', + 'gid' => '0', + 'id' => $this->_bot_info['user_id'], + 'offset' => '0', 'q' => urlencode($this->_query), - 'sort' =>'2' - ))); + 'sort' => '2' + )); + $this->setHtml($html); + return (strlen($html) > 150); } /** @@ -96,8 +104,8 @@ class Vkontakte { **/ private function auth() { $this->_cookies = array( - 'remixchk' => 5, - 'remixsid' => 'cf8bdd79d451422c1d484532a58205d92fc46b79caab663a40624c812e01', + 'remixchk' => $this->_bot_info['remixchk'], + 'remixsid' => $this->_bot_info['remixsid'], 'remixlang' => 777 ); } diff --git a/php/core/classes/db/BeatDB.class.php b/php/core/classes/db/BeatDB.class.php index 2238467..a697657 100644 --- a/php/core/classes/db/BeatDB.class.php +++ b/php/core/classes/db/BeatDB.class.php @@ -29,7 +29,9 @@ class BeatDB { return false; } } - return (bool) file_put_contents($part . $path[$i], json_encode($data)); + file_put_contents($part . $path[$i], json_encode($data)); + chmod($part . $path[$i], 0777); + return true; } public static function delete($key) {