聯(lián)系方式識別(php版本)

 <?php
/**
 * @name   聯(lián)系方式識別
 * @name   識別出來的數(shù)字最長為20位,多組滿足需求的數(shù)字也都提取出來
 * @author Rohn(253133755@qq.com)
 * @date   2018/4/25
 */
class ContactRecognize
{

    //識別文本
    private $_arr = array();
    //符合條件的數(shù)字組合
    private $_box = array();
    //棧數(shù)組
    private $_shed = array();

    private $_variation_num = array(
        '①' => 1,
        '②' => 2,
        '③' => 3,
        '④' => 4,
        '⑤' => 5,
        '⑥' => 6,
        '⑦' => 7,
        '⑧' => 8,
        '⑨' => 9,
        '㈠' => 1,
        '㈡' => 2,
        '㈢' => 3,
        '㈣' => 4,
        '㈤' => 5,
        '㈥' => 6,
        '㈦' => 7,
        '㈧' => 8,
        '㈨' => 9,
        '⑴' => 1,
        '⑵' => 2,
        '⑶' => 3,
        '⑷' => 4,
        '⑸' => 5,
        '⑹' => 6,
        '⑺' => 7,
        '⑻' => 8,
        '⑼' => 9,
        'Ⅰ' => 1,
        'Ⅱ' => 2,
        'Ⅲ' => 3,
        'Ⅳ' => 4,
        'Ⅴ' => 5,
        'Ⅵ' => 6,
        'Ⅶ' => 7,
        'Ⅷ' => 8,
        'Ⅸ' => 9,
        //簡體中文
        '一' => 1,
        '二' => 2,
        '三' => 3,
        '四' => 4,
        '五' => 5,
        '六' => 6,
        '七' => 7,
        '八' => 8,
        '九' => 9,
        '久' => 9,
        //繁體中文
        '零' => 0,
        '壹' => 1,
        '貳' => 2,
        '叁' => 3,
        '肆' => 4,
        '伍' => 5,
        '陸' => 6,
        '柒' => 7,
        '捌' => 8,
        '玖' => 9,
        //字母
        'o' => 0,
        'O' => 0,
        'l' => 1,
        'I' => 1,
    );

    //最大匹配的數(shù)字長度
    const MAX_NUMBER_LENGTH = 6;
    //狀態(tài)重置的標記
    const FLAG_RESET = 'reset';

    /**
     * ContactRecognize constructor.
     * @param $str
     */
    public function __construct($str){

        $this->_arr = $this->_ch2arr($str);
    }

    /**
     * 識別主體
     * @return mixed
     */
    public function recognize(){

        foreach($this->_arr as $char){

            $number = $this->_formatChar($char);
            //干擾字符,忽略
            if($number === false){
                continue;
            }
            switch($curState){
                case 0:
                    if($number != self::FLAG_RESET){
                        $curState = $this->_moveState($number, $curState);
                    }
                    break;
                case 1:
                case 2:
                case 3:
                case 4:
                    $curState = $this->_setState($number, $curState);
                    break;
                case 5:
                case 6:
                case 7:
                case 8:
                case 9:
                case 10:
                case 11:
                case 12:
                case 13:
                case 14:
                case 15:
                case 16:
                case 17:
                case 18:
                case 19:
                    $curState = $this->_setState($number, $curState);
                    break;
                default:
                    //超過20位的不再做檢查,直接做判定處理
                    if(!$this->_isExempt()){
                        array_push($this->_box, $this->_shed);
                    }
                    break;
            }
        }
        //結(jié)束檢查一次,是否可以把最后一組數(shù)據(jù)放入box中
        $this->_intoBox($curState);
        if(count($this->_box) > 0){
            //return implode('', $this->_shed);
            return json_encode($this->_box);
        }

        return false;
    }

    /**
     * 是否豁免
     * a)豁免重復(fù)數(shù)字,如555555,6666666666
     */
    private function _isExempt(){

        if($this->_isAllRepeat()){
            return true;
        }
    }

    /**
     * 豁免重復(fù)數(shù)字,如555555,6666666666
     */
    private function _isAllRepeat(){

        return count(array_count_values(array_slice($this->_shed, -self::MAX_NUMBER_LENGTH))) == 1;
    }

    /**
     * 設(shè)置狀態(tài)位與數(shù)字盒子
     * @param $number
     * @param $curState
     * @return int
     */
    private function _setState($number, $curState){

        if($number == self::FLAG_RESET){

            $this->_intoBox($curState);
            $curState = $this->_resetState();
        }else{

            $curState = $this->_moveState($number, $curState);
        }

        return $curState;
    }

    /**
     * 重置之前檢查是否是全重復(fù),滿足條件加入到box中
     * @param $curState
     */
    private function _intoBox($curState){

        if($curState >= self::MAX_NUMBER_LENGTH){
            if(!$this->_isExempt()){
                array_push($this->_box, $this->_shed);
            }
        }
    }

    /**
     * 狀態(tài)前移
     * @param $number
     * @param $curState
     * @return mixed
     */
    private function _moveState($number, $curState){

        array_push($this->_shed, $number);
        $curState++;

        return $curState;
    }

    /**
     * 歸初始位
     * @return int
     */
    private function _resetState(){

        $this->_shed = array();
        $curState    = 0;

        return $curState;
    }

    /**
     * 字符格式化
     * @param $char
     * @return
     *  number 數(shù)字
     *  FLAG_RESET 重置
     *  false 字符豁免忽略
     */
    private function _formatChar($char){

        //普通數(shù)字
        if(is_numeric($char)){
            return $char;
        }
        //變種數(shù)字
        $rs = $this->_isVariation($char);
        if($rs !== false){
            return $rs;
        }
        //狀態(tài)重置
        $rs = $this->_isRest($char);
        if($rs !== false){
            return self::FLAG_RESET;
        }

        return false;
    }

    /**
     * 包含是中文、英文大小寫重置
     * @param $char
     * @return bool
     */
    private function _isRest($char){

        //英文
        if(preg_match("/[a-zA-Z\s]/", $char)){
            return true;
        }
        //中文
        if(preg_match('/[\x{4e00}-\x{9fa5}]/u', $char) > 0){
            return true;
        }

        return false;
    }

    /**
     * 是否是變種數(shù)字
     * @param $char
     * @return bool|mixed
     */
    private function _isVariation($char){

        return isset($this->_variation_num[$char])?$this->_variation_num[$char]:false;
    }

    /**
     * 漢字轉(zhuǎn)字符串
     * @param $str
     * @param string $charset
     * @return array
     */
    private function _ch2arr($str, $charset = 'utf-8'){

        $length = mb_strlen($str, $charset);
        $array  = array();
        for($i = 0; $i < $length; $i++){
            $array[] = mb_substr($str, $i, 1, $charset);
        }

        return $array;
    }
}

//測試
$s = '12資源12 零3456哈哈12Ⅶ 34567@a1234567890O00o001';
$obj = new ContactRecognize($s);
$s   = $obj->recognize();
print_r($s);
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容