中文字幕在线观看,亚洲а∨天堂久久精品9966,亚洲成a人片在线观看你懂的,亚洲av成人片无码网站,亚洲国产精品无码久久久五月天

應(yīng)用curl擴(kuò)展抓取網(wǎng)頁(yè)

2018-07-20    來源:open-open

容器云強(qiáng)勢(shì)上線!快速搭建集群,上萬Linux鏡像隨意使用
    <?php  
    namespace Think;  
    header("Content-Type: text/html;charset=utf-8");  
    class Mycurl  
    {  
        public $ch = null;  
        public $data = null;  
      
        public function __construct($url)  
        {  
            $this->ch = curl_init($url);  
            curl_setopt($this->ch, CURLOPT_HEADER, false);   //不返回頭部信息  
            //將 curl_exec()獲取的信息以文件流的形式返回,而不是直接輸出。  
            curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, true);   
            $this->data = curl_exec($this->ch);    
        }  
        public function __destruct()  //釋放資源   
        {    
            curl_close($this->ch);  
        }  
      
        public function regmatch()   //正則方式抓取  
        {  
            $reg = '/(?<=<title>)(.*)(?=<\/title>)/i';  //抓取標(biāo)題  
            $reg = '/<div\sid="article_content"\sclass="article_content">([^(?<\/div>)]*)<\/div>/si'; //抓取文章內(nèi)容  
            preg_match($reg,$this->data,$out);     
            return $out[1];  
        }   
        public function result($pos1,$pos2)   //字符串方式抓取  
        {  
            $len = strlen($pos1);  
            $flag1 = stripos($this->data, $pos1);  
            $flag2 = stripos($this->data, $pos2);  
            $str = substr($this->data,$flag1,$flag2-$flag1);  
            return $str;  
        }  
        public function exec()   //獲取抓取數(shù)據(jù)  
        {  
            $data = Array();  
            $data['title'] = self::result('<title>','-盧松松博客</title>');  
            $data['title'] = substr($data['title'],7);  //參數(shù)7偏移是為了過濾上一步字符串抓取結(jié)果中的前面<title>  
            $data['content'] = self::result('<dd class="post-info">','<center>');  
            $data['content'] = str_ireplace("/upload/","http://lusongsong.com/upload/",$data['content']);  //這一步解決抓取文章的圖片地址錯(cuò)誤  
            $data['content'] = str_ireplace("http://lusongsong.comhttp://lusongsong.com","http://lusongsong.com",$data['content']); //解決上一步產(chǎn)生的副作用,   
            $data['content'] = str_ireplace("bloghttp://lusongsong.com","blog",$data['content']);  //繼續(xù)解決上兩步產(chǎn)生的副作用  
            $data['atime'] = time();  
            $data['author'] = 'Internet';  
            $data['sort'] = '精彩博文';  
            // $data['oldlink'] = '';  
            $data['summary'] = substr(strip_tags($data['content']),0,180);  //截取文章摘要  
            return $data;  
        }  
      
    }  
      
    // 測(cè)試  
    $url = 'http://lusongsong.com/reed/';  
    $num = 100;   //住區(qū)文章數(shù)目  
    $start = 350;  //抓取起點(diǎn)  
      
    $Art = M('article');  
      
    for($i=$start; $i < $start+$num ; $i++)  
    {   
      
        $posurl = $url.$i.'.html';  
        $curl = new Mycurl($posurl);  
        $data = $curl->exec();  
        $data['oldlink'] = $posurl;  
        if($pos = strpos($data['title'], "出現(xiàn)404錯(cuò)誤頁(yè)面了"))  
        {  
            continue;  
        }  
        $Art->add($data);  
        $curl = null;  
    }  
    $this->success("執(zhí)行完成!","index");  
      
      
      
    ?>  

標(biāo)簽:

版權(quán)申明:本站文章部分自網(wǎng)絡(luò),如有侵權(quán),請(qǐng)聯(lián)系:west999com@outlook.com
特別注意:本站所有轉(zhuǎn)載文章言論不代表本站觀點(diǎn)!
本站所提供的圖片等素材,版權(quán)歸原作者所有,如需使用,請(qǐng)與原作者聯(lián)系。

上一篇:C++ 計(jì)算n天后的日期

下一篇:關(guān)鍵路徑算法C++實(shí)現(xiàn)代碼