Commit f8cd1a77 authored by hj's avatar hj

更新提交

parent bf17422f
...@@ -50,10 +50,16 @@ class CheMenuService ...@@ -50,10 +50,16 @@ class CheMenuService
{ {
try{ try{
$pageUrl = $url['url']; $pageUrl = $url['url'];
SimpleLogs::writeLog('开始抓取'.$pageUrl.'分类页面数据', 'chemenu info', 'info');
while(true){ while(true){
$responseHtml = $this->apiService->getPage($pageUrl); $responseHtml = $this->apiService->getPage($pageUrl);
//file_put_contents('./test2.txt', $responseHtml); //file_put_contents('./test2.txt', $responseHtml);
if($responseHtml){ if($responseHtml){
if(preg_match('/<span[\s]*?class=[\\\'|\"]caption-subject font-blue-sharp bold[\\\'|\"]>Sorry ! You visit too frequently, please complete validation[\s\S]*?<\/span><\/h2>/i', $responseHtml)){
SimpleLogs::writeLog('遇到反爬系统', 'chemenu info', 'info');
return true;
}
SimpleLogs::writeLog('开始分析分类页面数据', 'chemenu info', 'info');
$titleDivRegx = '/<div[\s]*?class=[\\\'|\"]cate_desc[\\\'|\"]>[\S\s]*<h2[\s]*?class=[\\\'|\"]desc_title[\\\'|\"]>(?<title>[\s\S]+?)<\/h2>[\s\S]*?<p[\s]*?class=[\\\'|\"]desc_content[\\\'|\"]>(?<content>[\s\S]*?)<\/p>/i'; $titleDivRegx = '/<div[\s]*?class=[\\\'|\"]cate_desc[\\\'|\"]>[\S\s]*<h2[\s]*?class=[\\\'|\"]desc_title[\\\'|\"]>(?<title>[\s\S]+?)<\/h2>[\s\S]*?<p[\s]*?class=[\\\'|\"]desc_content[\\\'|\"]>(?<content>[\s\S]*?)<\/p>/i';
preg_match($titleDivRegx, $responseHtml,$titleHtml); preg_match($titleDivRegx, $responseHtml,$titleHtml);
...@@ -76,9 +82,12 @@ class CheMenuService ...@@ -76,9 +82,12 @@ class CheMenuService
} }
preg_match('/<li[\s]*?id=[\\\'|\"]pagination_next[\\\'|\"]>[\s]*?<a[\S\s]*?href=[\\\'|\"](?<nextHref>[\s\S]+?)[\\\'|\"]/i', $responseHtml, $nextHref); preg_match('/<li[\s]*?id=[\\\'|\"]pagination_next[\\\'|\"]>[\s]*?<a[\S\s]*?href=[\\\'|\"](?<nextHref>[\s\S]+?)[\\\'|\"]/i', $responseHtml, $nextHref);
$pageUrl = str_replace($this->apiService->getDomain(), '', $nextHref['nextHref']); $pageUrl = str_replace($this->apiService->getDomain(), '', $nextHref['nextHref']);
SimpleLogs::writeLog('开始下一页抓取,链接:'.$pageUrl, 'chemenu info', 'info');
} }
} }
usleep(rand(50000,150000)); $randomFloat = 3 + mt_rand(0, 3) + (mt_rand(0, 999) / 1000);
usleep($randomFloat * 1000000); // 微秒级延迟
} }
} }
}catch(\Throwable $exception){ }catch(\Throwable $exception){
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment