<?php
/**
* Created by PhpStorm.
* User: koder_alex
* Date: 26.04.2018
* Time: 14:11
*/
class vkParserNew {
const cleanDirFile = 6000;
const cleanDirCover = 86400;
const timeResponseCache = 21600;
const urlSite = 'https://m.vk.com/audio';
const httpUserAgent = 'Mozilla/5.0 Windows NT 6.1; Win64; x64 AppleWebKit/537.36 KHTML, like Gecko Chrome/61.0.3163.91 Safari/537.36';
const countRequestPage = 4;
private $urlReferer;
private $html, $doc;
private $nameFileCache = '';
private $response = array();
/**
* vkParserNew constructor.
*/
function __construct()
{
if (is_dir(__DIR__ . '/response') === false) {
mkdir(__DIR__ . '/response');
file_put_contents(__DIR__ . '/response/.htaccess', 'Deny From All');
}
$this->doc = new DOMDocument();
$this->response['mtRand'] = 0;
$this->cleanDirFile();
}
/**
* vkParserNew destruct.
*/
public function __destruct()
{
$this->doc = null;
}
public function fetch($strUrl)
{
$this->setUrlRef($strUrl);
$this->setNameFile($strUrl);
$this->response['mtRand'] = 0;
$this->response = $this->getDataResponse();
if ($this->response['timeUpdate'] > time()) {
return $this->response;
} else {
$this->response['item'] = array();
}
/**
* Инициализируем мульти-сеанс cURL
*/
$multi = curl_multi_init();
$ch = array();
for ($i = 0; $i < (strpos($strUrl, 'popular') === false ? self::countRequestPage : 1); $i++) {
$_page = $i * 50;
$key = 'offset_' . $_page;
$ch[$key] = curl_init();
$this->addSetOpt($ch[$key], array(CURLOPT_URL => self::urlSite . $strUrl . ($_page > 0 ? '&offset=' . $_page : '')));
$this->setUrlRef($strUrl . ($_page > 0 ? '&offset=' . $_page : ''));
if($i === 0) {
$this->response['referer'] = $this->getUrlRef();
}
curl_multi_add_handle($multi, $ch[$key]);
}
if (count($ch) <= 0) {
return $this->response;
}
do {
$status = curl_multi_exec($multi, $active);
$info = curl_multi_info_read($multi);
if ($info === false)
continue;
if ($info['result'] !== CURLM_OK) {
curl_multi_remove_handle($multi, $info['handle']);
unset($ch[array_search($info['handle'], $ch)]);
}
} while ($status === CURLM_CALL_MULTI_PERFORM || $active);
foreach ($ch as $key => $chItem) {
$this->html = curl_multi_getcontent($chItem);
$this->_newHtmlDoc();
curl_multi_remove_handle($multi, $chItem);
$domXPath = new DOMXPath($this->doc);
$elements = $domXPath->query("//div[@class='audios_block audios_list _si_container']");
if($elements->length <= 0)
continue;
$elements = $elements->item(0)->childNodes;
if($elements->length <= 0)
continue;
$this->parserAudio($elements);
}
$ch = null;
curl_multi_close($multi);
$this->response['timeUpdate'] = time() + self::timeResponseCache;
$this->writeDataResponse();
return $this->response;
}
/**
* @param DOMNodeList $elements
*/
private function parserAudio(DOMNodeList $elements)
{
$response = array();
/** @var DOMElement $pars */
foreach ($elements AS $item) {
if (($item instanceof DOMElement) === false) {
continue;
}
$i = mt_rand() . '_' . mt_rand() . '_' . mt_rand(); // data-id
$pars = $item->childNodes->item(1); // div[class="ai_info"]
$cover = '';
if(($pars instanceof DOMElement) === false) {
continue;
}
$cover = $pars->getElementsByTagName('div')->item(0)->getAttribute('style');
$cover = preg_replace('/background\-image\:url\((.*)\)/i', '$1', $cover);
$pars = $pars->childNodes->item(5); // div[class="al_body"]
$ai_file = $pars->getElementsByTagName('input'); // input
$file = '';
if($ai_file->length > 0) {
$file = $ai_file = $ai_file->item(0)->getAttribute('value');
}
if(empty($file)) {
continue;
}
$ai_file = $file;
$response[$i] = array();
$tmpPars = $pars->getElementsByTagName('div')->item(0); // div[class="ai_dur"]
$response[$i]['dur'] = $tmpPars->getAttribute('data-dur');
$response[$i]['cDur'] = $tmpPars->textContent;
$tmpPars = $pars->getElementsByTagName('div')->item(1)->getElementsByTagName('span'); // div[class="ai_label"] -> span
$response[$i]['title'] = $tmpPars->item(0)->textContent;
$response[$i]['artist'] = $tmpPars->item(2)->textContent;
$response[$i]['url'] = $ai_file; // ai_file
$response[$i]['file'] = $i . '.mp3'; // ai_file
$coverPath = pathinfo(parse_url($cover, PHP_URL_PATH), PATHINFO_EXTENSION);
if(empty($coverPath) === false) {
$response[$i]['cover'] = $i . '.' . $coverPath;
$response[$i]['coverServer'] = $cover;
}
}
$this->response['item'] = array_merge($this->response['item'], $response);
unset($tmpPars, $response);
}
private function getDataResponse()
{
$cacheFile = __DIR__ . '/response/' . $this->getNameFile() . '.data.r';
if ($this->response['mtRand'] > 0 && count($this->response['item']) > 0) {
return $this->response;
}
$this->response = array();
$this->response['timeUpdate'] = 0;
$this->response['item'] = array();
$this->response['nameCache'] = $this->getNameFile();
$this->response['referer'] = 'https://m.vk.com/audio';
if (is_readable($cacheFile) === true) {
$this->response = include_once $cacheFile;
$this->response['mtRand'] = mt_rand();
}
return $this->response;
}
private function writeDataResponse()
{
$cacheFile = __DIR__ . '/response/' . $this->getNameFile() . '.data.r';
$fp = fopen($cacheFile, 'wb');
$this->fwrite_stream($fp, '<?php return ' . var_export($this->response, true) . ';');
fclose($fp);
return true;
}
private function fwrite_stream($fp, $string)
{
if (is_resource($fp) === false) {
return false;
}
for ($written = 0; $written < strlen($string); $written += $fwrite) {
$fwrite = fwrite($fp, substr($string, $written));
if ($fwrite === false)
return $written;
}
return $written;
}
private function setNameFile($strUrl)
{
$this->nameFileCache = md5(strtolower($strUrl));
}
private function getNameFile()
{
if (empty($this->nameFileCache) === true) {
throw new Exception("Error set name file cache");
}
return $this->nameFileCache;
}
private function setUrlRef($strUrl)
{
$this->urlReferer = self::urlSite . $strUrl;
}
private function getUrlRef()
{
if (empty($this->urlReferer) === true) {
throw new Exception("Error set url referer");
}
return $this->urlReferer;
}
private function _newHtmlDoc()
{
$old_libxml_error = libxml_use_internal_errors(true);
$this->doc->loadHTML($this->html);
libxml_use_internal_errors($old_libxml_error);
}
/**
* @param $ch
* @return mixed|string
* @throws Exception
*/
private function lastUrl(&$ch)
{
$referer = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
if (empty($referer) === true)
$referer = $this->getUrlRef();
return $referer;
}
private function addSetOpt(&$ch, array $opt = array())
{
if (is_resource($ch) === false) {
return;
}
curl_setopt($ch, CURLOPT_USERAGENT, self::httpUserAgent);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_TIMEOUT, 15);
curl_setopt($ch, CURLOPT_FAILONERROR, true);
curl_setopt($ch, CURLOPT_REFERER, $this->lastUrl($ch));
###########PROXY##########
// curl_setopt($ch, CURLOPT_PROXY, '194.116.163.110:65234');
// curl_setopt($ch, CURLOPT_PROXYUSERPWD,'Kalitinua:W9e0DoM');
// curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_COOKIEFILE,__DIR__ . '/cookie/cookieUserVk.cook');
if (empty($opt) === false) {
curl_setopt_array($ch, $opt);
}
return;
}
private function isAjax()
{
$server = getenv('HTTP_X_REQUESTED_WITH');
$server = ($server !== false ? strtolower($server) : null);
return $server === 'xmlhttprequest';
}
/**
* Clear dir
*/
private function cleanDirFile()
{
$fileDIR = filesPathAudio . 'file/';
if ($this->isAjax() === false) {
foreach (glob($fileDIR . $_SESSION['audioSession'] . '*.mp3') AS $item)
unlink($item);
}
if (is_readable($fileDIR . 'lastCleanTimeAudio.touch') === false)
touch($fileDIR . 'lastCleanTimeAudio.touch');
$timeAgo = time() - self::cleanDirFile;
if (filemtime($fileDIR . 'lastCleanTimeAudio.touch') < $timeAgo) {
foreach (glob($fileDIR . '*.mp3') AS $item)
if (filemtime($item) < $timeAgo)
unlink($item);
touch($fileDIR . 'lastCleanTimeAudio.touch');
}
$fileDIR = filesPathAudio . 'cover/';
if (is_readable($fileDIR . 'lastCleanTimeCover.touch') === false)
touch($fileDIR . 'lastCleanTimeCover.touch');
$timeAgo = time() - self::cleanDirCover;
if (filemtime($fileDIR . 'lastCleanTimeCover.touch') < $timeAgo) {
foreach (glob($fileDIR . '*') AS $item)
if (filemtime($item) < $timeAgo)
unlink($item);
touch($fileDIR . 'lastCleanTimeCover.touch');
}
}
}