中文字幕在线观看,亚洲а∨天堂久久精品9966,亚洲成a人片在线观看你懂的,亚洲av成人片无码网站,亚洲国产精品无码久久久五月天

PHP敏感詞過濾

2018-07-20    來源:open-open

容器云強勢上線!快速搭建集群,上萬Linux鏡像隨意使用
 
/**
 * 禁詞過濾
 * 執(zhí)行效率:每篇用時0.05秒
 * @author liuxu
 *
 */
class Logic_BlackWord
{
      
    const APP_FORUM = 1;
    const APP_BLOG  = 2;
    const APP_VOTE  = 3;
  
    /**
     * 過濾得到禁詞
     * @param unknown $txt
     * @return Ambigous <multitype:, unknown>
     */
    public function getHitList($txt)
    {
        $hitList = array();
  
        //對禁詞分批過濾
        $max = $this->getMax();
        if($max)
        {
            $size = 1000;
            $last = ceil($max/$size);
            for($page=1;$page<=$last;$page++)
            {
                $result = $this->getHitListByPage($txt,$page,$size);
                if($result) $hitList = array_merge($hitList,$result);
            }
        }
  
        $hitList2 = array();
        foreach($hitList as $hit=>$type)
        {
            $hitList2[$type][] = $hit;
        }
  
        return $hitList2;
    }
  
    private function getMax()
    {
        $redis = Rds::factory();
        $memKey = 'blackWord_max';
        $max = $redis->get($memKey);
        if($max===false)
        {
            $max = 0;
            $blackWord = new Model_BlackWord_BlackWord();
            $para['field'] = "MAX(id) AS max";
            $result = $blackWord->search($para);
            if(isset($result[0]['max'])) $max = $result[0]['max'];
  
            $redis->setex($memKey,300,$max);
        }
  
        return $max;
    }
  
    /**
     * 分批過濾得到禁詞
     * @param unknown $txt
     * @param number $page
     * @param number $size
     * @return multitype:Ambigous <multitype:unknown, multitype:arr >
     */
    private function getHitListByPage($txt,$page=1,$size=1000)
    {
        $hitList = array();
  
        //分批得到禁詞樹
        $wordTree = $this->getWordTreeByPage($page,$size);
      
        $txt = strip_tags($txt);
        $txt = preg_replace('/[^a-zA-Z0-9\\x{4e00}-\\x{9fa5}]/iu','',$txt);
  
        $len = mb_strlen($txt,'UTF-8');
        for($i=0;$i<$len;$i++)
        {
            $char = mb_substr($txt,$i,1,'UTF-8');
            if(isset($wordTree[$char]))
            {
                $result = $this->getHitListByTree(mb_substr($txt,$i,50,'UTF-8'),$wordTree);
                if($result)
                {
                    foreach($result as $hit=>$type)
                    {
                        $hitList[$hit] = $type;
                    }
                }
            }
        }
  
        return $hitList;
    }
      
    /**
     * 是否禁詞
     * @param str $txt
     * @param arr $wordTree
     * @return multitype:unknown
     */
    private function getHitListByTree($txt,&$wordTree)
    {
        $len = mb_strlen($txt,'UTF-8');
        $point = & $wordTree;
        $hit = '';
        $hitList = array();
        for($i=0;$i<$len;$i++)
        {
            $char = mb_substr($txt,$i,1,'UTF-8');
            if(isset($point[$char]))
            {
                $hit .= $char;
                $point = & $point[$char];
  
                if(isset($point['type']))//匹配成功
                {
                    $hitList[$hit] = $point['type'];
                }
            }
            else
            {
                break;
            }
  
        }
  
        return $hitList;
    }
  
    /**
     * 分批得到禁詞樹
     * @param int $page
     * @param int $size
     * @return arr:
     */
    private function getWordTreeByPage($page=1,$size=1000)
    {
        $redis = Rds::factory();
        $memKey = 'blackWord_tree_'.$page.'_'.$size;
        $wordTree = $redis->get($memKey);
        if($wordTree===false)
        {
            $wordTree = array();
            $blackWord = new Model_BlackWord_BlackWord();
            $start = ($page-1)*$size;
            $end = $start + $size;
            $para['where'] = "status=1 AND id>".$start." AND id<=".$end;
            $result = $blackWord->search($para);
            if($result)
            {
                foreach($result as $value)
                {
                    if($value['word'])
                    {
                        $value['word'] = preg_split('/(?<!^)(?!$)/u',$value['word']);
                        $point = & $wordTree;
                        foreach($value['word'] as $char)
                        {
                            $point = & $point[$char];
                        }
      
                        $point['type'] = $value['type'];
                    }
                }
            }
              
            $redis->setex($memKey,300,$wordTree);
        }
  
        return $wordTree;
    }
  
}
 

標簽:

版權申明:本站文章部分自網(wǎng)絡,如有侵權,請聯(lián)系:west999com@outlook.com
特別注意:本站所有轉載文章言論不代表本站觀點!
本站所提供的圖片等素材,版權歸原作者所有,如需使用,請與原作者聯(lián)系。

上一篇:PHP無限極分類相關代碼

下一篇:14行PHP代碼獲取客戶端IP地址經(jīng)緯度及所在城市