·设为首页收藏本站📧邮箱修改🎁免费下载专区💎积分✅卡密📒收藏夹👽聊天室
12下一页
返回列表 发布新帖

纯php采集周松松最新文章

马上注册,免费下载更多dz插件网资源。

您需要 登录 才可以下载或查看,没有账号?立即注册

×
纯php采集周松松最新文章与邮件订阅发送
  1. <?php$url = 'https://zhousongsong.com/'; $ip = rand(0,255).'.'.rand(0,255).'.'.rand(0,255).'.'.rand(0,255) ;        //随机IP$uaagent=["Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) AppleWebKit/605.1.15 (KHTML, like Gecko)","Mozilla/5.0 (Windows NT 6.1; Win64; x64; +http://url-classification.io/wiki/index.php?title=URL_server_crawler) KStandBot/1.0","Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5","Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5","Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5","Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1","MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1","Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10","Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13","Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+","Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0","NOKIA5700/ UCWEB7.0.2.37/28/999","Openwave/ UCWEB7.0.2.37/28/999","Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999","Mozilla/5.0 (Linux; Android 6.0; 1503-M02 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile MQQBrowser/6.2 TBS/036558 Safari/537.36 MicroMessenger/6.3.25.861 NetType/WIFI Language/zh_CN"];$randomKey = array_rand($uaagent);$randomUserAgent = $uaagent[$randomKey];//curl封装function fetchurl($url, $headers, $postData = null, $method = 'GET') {  $ch = curl_init();  curl_setopt($ch, CURLOPT_URL, $url);   curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);  curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);  curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);  if ($postData !== null && $method === 'POST') {  curl_setopt($ch, CURLOPT_POST, 1);  curl_setopt($ch, CURLOPT_POSTFIELDS, $postData);  }  if ($method === 'POST') {  curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST');  } else {  curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');  }  $result = curl_exec($ch);  if (curl_errno($ch)) {  $error_msg = curl_error($ch);  curl_close($ch);  throw new Exception("cURL Error: " . $error_msg);  }  curl_close($ch);  return $result;  }  $headers = array('X-FORWARDED-FOR' =>$ip,'CLIENT-IP' =>$ip,'Refererr'=>'https://www.qq.com/','Accept-Encoding' =>'gzip, deflate','User-Agent' =>$randomUserAgent,);$result = fetchurl($url, $headers);         preg_match_all('/<title>(.*?)<\/title>/', $result, $m);  $webname = $m[1][0];$webname =substr($webname, 0, strpos($webname, '-'));preg_match_all('/name="description" content="(.*?)"/', $result, $m);  $description = $m[1][0];if (!extension_loaded('dom')) {die('DOMDocument扩展未加载,请检查PHP配置文件。');}  $dom = new DOMDocument();  @$dom->loadHTML($result);$newstext = '';$divtext = $dom->getElementById('con_one_1'); //跟随首页模板最新列表模板html改动foreach ($divtext->childNodes as $child) {  $newstext .=$child->ownerDocument->saveHTML($child);} $newstext=strip_tags($newstext, "<h1><h2><h3><h5><h6><br><p><a>");  @$dom->loadHTML('<?xml encoding="UTF-8">' .$newstext);    $h2Tags = $dom->getElementsByTagName('h2');   $xpath = new DOMXPath($dom);$allinfo = [];//初始化文章列表数组$webinfo=[];//初始化网站信息数组$comment= []; //初始化评论列表数组$newarticleinfo= []; //初始化最新文章信息数组/**获取文章列表循环获取对应数据比如浏览量,标题,内容等字段*/$h2Nodes = $xpath->query('//h2');if ($h2Nodes->length > 0) {     foreach ($h2Nodes as $h2) {    $title = $h2->textContent;    $link = $h2->getElementsByTagName('a')->item(0)->getAttribute('href');    $h6 = $h2->nextSibling;    while ($h6 && $h6->nodeName !== 'h6') {    $h6 = $h6->nextSibling;    }        $h6p = $h6->nextSibling;    while ($h6p&& $h6p->nodeName === 'p') {    $pcontent = $h6p->textContent;     $h6p = $node->nextSibling;     }    $resulttlist = fetchUrl($link, $headers);    $domInner = new DOMDocument();    @$domInner->loadHTML($resulttlist);    $xpathInner = new DOMXPath($domInner);    $nodes = $xpathInner->query('//dd[@class="con"]');//跟随内容模板html改动    if ($nodes->length > 0) {    $conNode = $nodes->item(0);    $newstexts = '';    foreach ($conNode->getElementsByTagName('p') as $pNode) {                if ($pNode->hasAttribute('style')) {            $pNode->removeAttribute('style');        }            $newstexts .= $domInner->saveHTML($pNode);    }    $newstexts = strip_tags($newstexts, '<h1><h2><h3><h5><h6><br><p><a><img>');     $sourceIndex = strpos($newstexts, '来源:');      if ($sourceIndex !== false) {          $newstexts = substr($newstexts, 0, $sourceIndex);      }      $newstexts=preg_replace('/<a[^>]*>(.*?)<\/a>/', '$1',$newstexts);    }    if ($h6 instanceof DOMElement) {        $h6Content = $h6->nodeValue;                preg_match('/\d{4}年\d{2}月\d{2}日/', $h6Content, $matches);        $date = $matches[0];          preg_match("/浏览:(\d+)/", $h6Content, $matchess);        $views = (int) $matchess[1];        preg_match("/评论:(\d+)/", $h6Content, $matchesss);        $comments = (int) $matchesss[1];        } else {        $date = '';        $views = 0;        $comments = 0;    }    }            $allinfo[] = [    'title' => $title,    'link' => $link,    'date' => $date,        'smalltext' => $pcontent,     'onclick' => $views,        'newstext' =>addcslashes($newstexts, '"'),    'plnum' => $comments,    ];}}if ($h2Tags->length > 0) { $firstH2Content = $h2Tags->item(0)->nodeValue; $title="有来自".$webname."的最新文章";$firstH2 = $h2Tags->item(0);   $aTags = $firstH2->getElementsByTagName('a');   $firstA = $aTags->item(0);  $linkHref = $firstA->getAttribute('href');  $text="".$webname."最新文章标题为《".$firstH2Content."》,地址:<a href='".$linkHref."' target='_blank'>".$linkHref."</a>";$resultt= fetchurl($linkHref, $headers);         @$dom->loadHTML($resultt);$xpath = new DOMXPath($dom);$nodes = $xpath->query('//dd[@class="con"]');  //跟随内容模板html改动if ($nodes->length > 0) {      $conNode = $nodes->item(0);      $newstext = '';      foreach ($conNode->getElementsByTagName('p') as $pNode) {              if ($pNode->hasAttribute('style')) {            $pNode->removeAttribute('style');        }        $newstext .= $pNode->ownerDocument->saveHTML($pNode);    }      $sourceIndex = strpos($newstext, '来源:');      if ($sourceIndex !== false) {          $newstext = substr($newstext, 0, $sourceIndex);      }      $newstext=preg_replace('/<a[^>]*>(.*?)<\/a>/', '$1',$newstext);}preg_match('/<span class="commentViewNums">(.*?)<\/span>/',$resultt, $viewNumMatch);$viewNum = $viewNumMatch[1];$h6Tags = $dom->getElementsByTagName('h6');  $h6Tag = $h6Tags->item(0);  $h6Text = trim($h6Tag->textContent);  preg_match('/\d{4}年\d{2}月\d{2}日 \d{2}:\d{2}/', $h6Text, $matches);$datetime = $matches[0];  $newarticleinfo[] = ['title' => $firstH2Content,'link' => $linkHref,'date' => $datetime,'onclick' =>str_replace('浏览量: ', '', $viewNum),'newstext' => $newstext,];} @$dom->loadHTML($result);$divStatistics = $dom->getElementById('divStatistics'); //跟随首页模板实时数据模板html改动  $ul = $divStatistics->getElementsByTagName('ul')->item(0); $totalarticle = ''; // 文章总数  $totalplnum = ''; // 评论总数  $totalonclick = ''; // 浏览总数  foreach ($ul->getElementsByTagName('li') as $li) {      $textContent = trim($li->textContent);      preg_match('/文章总数:\s*(\d+)/', $textContent, $matchescc);     preg_match('/评论总数:\s*(\d+)/', $textContent, $matchesbb);    preg_match('/浏览总数:\s*(\d+)/', $textContent, $matchesaa);    $totalarticle .= $matchescc[1]; // 累加文章总数      $totalplnum .= $matchesbb[1]; // 累加评论总数      $totalonclick .= $matchesaa[1]; // 累加浏览总数  }  $divComments= $dom->getElementById('divComments'); //跟随首页模板最新评论模板html改动  $comments = $divComments->getElementsByTagName('ul')->item(0);foreach ($comments->getElementsByTagName('li') as $li) { $comment[]= trim($li->textContent); }  preg_match('/BA号:.*?<\/a>/i', $result, $match);$beianhao = trim(strip_tags($match[0])); $beianhao = str_replace('BA号:', '', $beianhao);preg_match('/站长QQ:.*?<\/a>/i', $result, $matchh);$QQhao = $matchh[0]; $QQhao  = str_replace('站长QQ:', '', $QQhao );$webinfo[]= [  'totalarticle' => $totalarticle,  'totalplnum' => $totalplnum, 'beianhao' => $beianhao, 'qq' =>(int)$QQhao,  'totalonclick' => $totalonclick  ];$content= array('webname'=>$webname,'newarticleinfo'=>$newarticleinfo,'allinfo'=>$allinfo,'webinfo'=>$webinfo,'comment'=>$comment,'emailmsg'=>$emailmsg,'description'=>$description,'code'=>200,'msg'=>'获取成功');$Json=json_encode($content,JSON_PRETTY_PRINT|JSON_UNESCAPED_UNICODE);echo stripslashes($Json);
复制代码
输出JSON如下:

纯php采集周松松最新文章
我要说一句 收起回复
创宇盾启航版免费网站防御网站加速服务

评论11

婷姐Lv.8 发表于 前天 16:30 | 查看全部
厉害
我要说一句 收起回复
TyCodingLv.8 发表于 前天 16:31 | 查看全部
你口味真重啊
我要说一句 收起回复
拾光Lv.8 发表于 前天 16:32 | 查看全部
这是抛砖引玉,稍微看懂流程的搬运下就能实现微信公众号采集了!正则+php解析DOM
我要说一句 收起回复
浅生Lv.8 发表于 前天 16:32 | 查看全部
微信公众号的地址怎么获取?
我要说一句 收起回复
IT618发布Lv.8 发表于 前天 16:33 | 查看全部
浏览器打开不就看到地址了吗
我要说一句 收起回复
浅生Lv.8 发表于 前天 16:34 | 查看全部
技术杠杠的,厉害。
我要说一句 收起回复
婷姐Lv.8 发表于 前天 16:34 | 查看全部
$ip = rand(0,255).'.'.rand(0,255).'.'.rand(0,255).'.'.rand(0,255) ;        //随机IP

我突然意识到了什么,最近我的网站貌似被采集了(疑似这种随机伪造IP采集)|
--------------------------------------------------------------------------------------------
难怪我站 收录骤减,昨天还被采集 持续了3个多小时,10多万文章 估计被撸个遍~!
我要说一句 收起回复
独家记忆Lv.8 发表于 前天 16:35 | 查看全部
这随机ip能起作用吗
我要说一句 收起回复
浅生Lv.8 发表于 前天 16:36 | 查看全部
这是为了防止松松查看日志伪造的IP访问
我要说一句 收起回复

回复

 懒得打字嘛,点击右侧快捷回复【查看最新发布】   【应用商城享更多资源】
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

创宇盾启航版免费网站防御网站加速服务
投诉/建议联系

discuzaddons@vip.qq.com

未经授权禁止转载,复制和建立镜像,
如有违反,按照公告处理!!!
  • 联系QQ客服
  • 添加微信客服

联系DZ插件网微信客服|最近更新|Archiver|手机版|小黑屋|DZ插件网! ( 鄂ICP备20010621号-1 )|网站地图 知道创宇云防御

您的IP:3.129.42.59,GMT+8, 2025-1-6 19:03 , Processed in 0.402970 second(s), 129 queries , Gzip On, Redis On.

Powered by Discuz! X5.0 Licensed

© 2001-2025 Discuz! Team.

关灯 在本版发帖
扫一扫添加微信客服
QQ客服返回顶部
快速回复 返回顶部 返回列表