去评论
dz插件网

纯php采集周松松最新文章

饾暦饾枎饾枒饾枏饾枂饾枅饾枑
2025/01/04 16:30:23
纯php采集周松松最新文章与邮件订阅发送
  1. <?php$url = 'https://zhousongsong.com/'; $ip = rand(0,255).'.'.rand(0,255).'.'.rand(0,255).'.'.rand(0,255) ;        //随机IP$uaagent=["Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15) AppleWebKit/605.1.15 (KHTML, like Gecko)","Mozilla/5.0 (Windows NT 6.1; Win64; x64; +http://url-classification.io/wiki/index.php?title=URL_server_crawler) KStandBot/1.0","Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5","Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5","Mozilla/5.0 (iPad; U; CPU OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5","Mozilla/5.0 (Linux; U; Android 2.3.7; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1","MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1","Opera/9.80 (Android 2.3.4; Linux; Opera Mobi/build-1107180945; U; en-GB) Presto/2.8.149 Version/11.10","Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13","Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en) AppleWebKit/534.1+ (KHTML, like Gecko) Version/6.0.0.337 Mobile Safari/534.1+","Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.0; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/233.70 Safari/534.6 TouchPad/1.0","NOKIA5700/ UCWEB7.0.2.37/28/999","Openwave/ UCWEB7.0.2.37/28/999","Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999","Mozilla/5.0 (Linux; Android 6.0; 1503-M02 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile MQQBrowser/6.2 TBS/036558 Safari/537.36 MicroMessenger/6.3.25.861 NetType/WIFI Language/zh_CN"];$randomKey = array_rand($uaagent);$randomUserAgent = $uaagent[$randomKey];//curl封装function fetchurl($url, $headers, $postData = null, $method = 'GET') {  $ch = curl_init();  curl_setopt($ch, CURLOPT_URL, $url);   curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);  curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);  curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);  if ($postData !== null && $method === 'POST') {  curl_setopt($ch, CURLOPT_POST, 1);  curl_setopt($ch, CURLOPT_POSTFIELDS, $postData);  }  if ($method === 'POST') {  curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST');  } else {  curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');  }  $result = curl_exec($ch);  if (curl_errno($ch)) {  $error_msg = curl_error($ch);  curl_close($ch);  throw new Exception("cURL Error: " . $error_msg);  }  curl_close($ch);  return $result;  }  $headers = array('X-FORWARDED-FOR' =>$ip,'CLIENT-IP' =>$ip,'Refererr'=>'https://www.qq.com/','Accept-Encoding' =>'gzip, deflate','User-Agent' =>$randomUserAgent,);$result = fetchurl($url, $headers);         preg_match_all('/<title>(.*?)<\/title>/', $result, $m);  $webname = $m[1][0];$webname =substr($webname, 0, strpos($webname, '-'));preg_match_all('/name="description" content="(.*?)"/', $result, $m);  $description = $m[1][0];if (!extension_loaded('dom')) {die('DOMDocument扩展未加载,请检查PHP配置文件。');}  $dom = new DOMDocument();  @$dom->loadHTML($result);$newstext = '';$divtext = $dom->getElementById('con_one_1'); //跟随首页模板最新列表模板html改动foreach ($divtext->childNodes as $child) {  $newstext .=$child->ownerDocument->saveHTML($child);} $newstext=strip_tags($newstext, "<h1><h2><h3><h5><h6><br><p><a>");  @$dom->loadHTML('<?xml encoding="UTF-8">' .$newstext);    $h2Tags = $dom->getElementsByTagName('h2');   $xpath = new DOMXPath($dom);$allinfo = [];//初始化文章列表数组$webinfo=[];//初始化网站信息数组$comment= []; //初始化评论列表数组$newarticleinfo= []; //初始化最新文章信息数组/**获取文章列表循环获取对应数据比如浏览量,标题,内容等字段*/$h2Nodes = $xpath->query('//h2');if ($h2Nodes->length > 0) {     foreach ($h2Nodes as $h2) {    $title = $h2->textContent;    $link = $h2->getElementsByTagName('a')->item(0)->getAttribute('href');    $h6 = $h2->nextSibling;    while ($h6 && $h6->nodeName !== 'h6') {    $h6 = $h6->nextSibling;    }        $h6p = $h6->nextSibling;    while ($h6p&& $h6p->nodeName === 'p') {    $pcontent = $h6p->textContent;     $h6p = $node->nextSibling;     }    $resulttlist = fetchUrl($link, $headers);    $domInner = new DOMDocument();    @$domInner->loadHTML($resulttlist);    $xpathInner = new DOMXPath($domInner);    $nodes = $xpathInner->query('//dd[@class="con"]');//跟随内容模板html改动    if ($nodes->length > 0) {    $conNode = $nodes->item(0);    $newstexts = '';    foreach ($conNode->getElementsByTagName('p') as $pNode) {                if ($pNode->hasAttribute('style')) {            $pNode->removeAttribute('style');        }            $newstexts .= $domInner->saveHTML($pNode);    }    $newstexts = strip_tags($newstexts, '<h1><h2><h3><h5><h6><br><p><a><img>');     $sourceIndex = strpos($newstexts, '来源:');      if ($sourceIndex !== false) {          $newstexts = substr($newstexts, 0, $sourceIndex);      }      $newstexts=preg_replace('/<a[^>]*>(.*?)<\/a>/', '$1',$newstexts);    }    if ($h6 instanceof DOMElement) {        $h6Content = $h6->nodeValue;                preg_match('/\d{4}年\d{2}月\d{2}日/', $h6Content, $matches);        $date = $matches[0];          preg_match("/浏览:(\d+)/", $h6Content, $matchess);        $views = (int) $matchess[1];        preg_match("/评论:(\d+)/", $h6Content, $matchesss);        $comments = (int) $matchesss[1];        } else {        $date = '';        $views = 0;        $comments = 0;    }    }            $allinfo[] = [    'title' => $title,    'link' => $link,    'date' => $date,        'smalltext' => $pcontent,     'onclick' => $views,        'newstext' =>addcslashes($newstexts, '"'),    'plnum' => $comments,    ];}}if ($h2Tags->length > 0) { $firstH2Content = $h2Tags->item(0)->nodeValue; $title="有来自".$webname."的最新文章";$firstH2 = $h2Tags->item(0);   $aTags = $firstH2->getElementsByTagName('a');   $firstA = $aTags->item(0);  $linkHref = $firstA->getAttribute('href');  $text="".$webname."最新文章标题为《".$firstH2Content."》,地址:<a href='".$linkHref."' target='_blank'>".$linkHref."</a>";$resultt= fetchurl($linkHref, $headers);         @$dom->loadHTML($resultt);$xpath = new DOMXPath($dom);$nodes = $xpath->query('//dd[@class="con"]');  //跟随内容模板html改动if ($nodes->length > 0) {      $conNode = $nodes->item(0);      $newstext = '';      foreach ($conNode->getElementsByTagName('p') as $pNode) {              if ($pNode->hasAttribute('style')) {            $pNode->removeAttribute('style');        }        $newstext .= $pNode->ownerDocument->saveHTML($pNode);    }      $sourceIndex = strpos($newstext, '来源:');      if ($sourceIndex !== false) {          $newstext = substr($newstext, 0, $sourceIndex);      }      $newstext=preg_replace('/<a[^>]*>(.*?)<\/a>/', '$1',$newstext);}preg_match('/<span class="commentViewNums">(.*?)<\/span>/',$resultt, $viewNumMatch);$viewNum = $viewNumMatch[1];$h6Tags = $dom->getElementsByTagName('h6');  $h6Tag = $h6Tags->item(0);  $h6Text = trim($h6Tag->textContent);  preg_match('/\d{4}年\d{2}月\d{2}日 \d{2}:\d{2}/', $h6Text, $matches);$datetime = $matches[0];  $newarticleinfo[] = ['title' => $firstH2Content,'link' => $linkHref,'date' => $datetime,'onclick' =>str_replace('浏览量: ', '', $viewNum),'newstext' => $newstext,];} @$dom->loadHTML($result);$divStatistics = $dom->getElementById('divStatistics'); //跟随首页模板实时数据模板html改动  $ul = $divStatistics->getElementsByTagName('ul')->item(0); $totalarticle = ''; // 文章总数  $totalplnum = ''; // 评论总数  $totalonclick = ''; // 浏览总数  foreach ($ul->getElementsByTagName('li') as $li) {      $textContent = trim($li->textContent);      preg_match('/文章总数:\s*(\d+)/', $textContent, $matchescc);     preg_match('/评论总数:\s*(\d+)/', $textContent, $matchesbb);    preg_match('/浏览总数:\s*(\d+)/', $textContent, $matchesaa);    $totalarticle .= $matchescc[1]; // 累加文章总数      $totalplnum .= $matchesbb[1]; // 累加评论总数      $totalonclick .= $matchesaa[1]; // 累加浏览总数  }  $divComments= $dom->getElementById('divComments'); //跟随首页模板最新评论模板html改动  $comments = $divComments->getElementsByTagName('ul')->item(0);foreach ($comments->getElementsByTagName('li') as $li) { $comment[]= trim($li->textContent); }  preg_match('/BA号:.*?<\/a>/i', $result, $match);$beianhao = trim(strip_tags($match[0])); $beianhao = str_replace('BA号:', '', $beianhao);preg_match('/站长QQ:.*?<\/a>/i', $result, $matchh);$QQhao = $matchh[0]; $QQhao  = str_replace('站长QQ:', '', $QQhao );$webinfo[]= [  'totalarticle' => $totalarticle,  'totalplnum' => $totalplnum, 'beianhao' => $beianhao, 'qq' =>(int)$QQhao,  'totalonclick' => $totalonclick  ];$content= array('webname'=>$webname,'newarticleinfo'=>$newarticleinfo,'allinfo'=>$allinfo,'webinfo'=>$webinfo,'comment'=>$comment,'emailmsg'=>$emailmsg,'description'=>$description,'code'=>200,'msg'=>'获取成功');$Json=json_encode($content,JSON_PRETTY_PRINT|JSON_UNESCAPED_UNICODE);echo stripslashes($Json);
输出JSON如下: