<?php
/**
* @name 聯(lián)系方式識別
* @name 識別出來的數(shù)字最長為20位,多組滿足需求的數(shù)字也都提取出來
* @author Rohn(253133755@qq.com)
* @date 2018/4/25
*/
class ContactRecognize
{
//識別文本
private $_arr = array();
//符合條件的數(shù)字組合
private $_box = array();
//棧數(shù)組
private $_shed = array();
private $_variation_num = array(
'①' => 1,
'②' => 2,
'③' => 3,
'④' => 4,
'⑤' => 5,
'⑥' => 6,
'⑦' => 7,
'⑧' => 8,
'⑨' => 9,
'㈠' => 1,
'㈡' => 2,
'㈢' => 3,
'㈣' => 4,
'㈤' => 5,
'㈥' => 6,
'㈦' => 7,
'㈧' => 8,
'㈨' => 9,
'⑴' => 1,
'⑵' => 2,
'⑶' => 3,
'⑷' => 4,
'⑸' => 5,
'⑹' => 6,
'⑺' => 7,
'⑻' => 8,
'⑼' => 9,
'Ⅰ' => 1,
'Ⅱ' => 2,
'Ⅲ' => 3,
'Ⅳ' => 4,
'Ⅴ' => 5,
'Ⅵ' => 6,
'Ⅶ' => 7,
'Ⅷ' => 8,
'Ⅸ' => 9,
//簡體中文
'一' => 1,
'二' => 2,
'三' => 3,
'四' => 4,
'五' => 5,
'六' => 6,
'七' => 7,
'八' => 8,
'九' => 9,
'久' => 9,
//繁體中文
'零' => 0,
'壹' => 1,
'貳' => 2,
'叁' => 3,
'肆' => 4,
'伍' => 5,
'陸' => 6,
'柒' => 7,
'捌' => 8,
'玖' => 9,
//字母
'o' => 0,
'O' => 0,
'l' => 1,
'I' => 1,
);
//最大匹配的數(shù)字長度
const MAX_NUMBER_LENGTH = 6;
//狀態(tài)重置的標記
const FLAG_RESET = 'reset';
/**
* ContactRecognize constructor.
* @param $str
*/
public function __construct($str){
$this->_arr = $this->_ch2arr($str);
}
/**
* 識別主體
* @return mixed
*/
public function recognize(){
foreach($this->_arr as $char){
$number = $this->_formatChar($char);
//干擾字符,忽略
if($number === false){
continue;
}
switch($curState){
case 0:
if($number != self::FLAG_RESET){
$curState = $this->_moveState($number, $curState);
}
break;
case 1:
case 2:
case 3:
case 4:
$curState = $this->_setState($number, $curState);
break;
case 5:
case 6:
case 7:
case 8:
case 9:
case 10:
case 11:
case 12:
case 13:
case 14:
case 15:
case 16:
case 17:
case 18:
case 19:
$curState = $this->_setState($number, $curState);
break;
default:
//超過20位的不再做檢查,直接做判定處理
if(!$this->_isExempt()){
array_push($this->_box, $this->_shed);
}
break;
}
}
//結(jié)束檢查一次,是否可以把最后一組數(shù)據(jù)放入box中
$this->_intoBox($curState);
if(count($this->_box) > 0){
//return implode('', $this->_shed);
return json_encode($this->_box);
}
return false;
}
/**
* 是否豁免
* a)豁免重復(fù)數(shù)字,如555555,6666666666
*/
private function _isExempt(){
if($this->_isAllRepeat()){
return true;
}
}
/**
* 豁免重復(fù)數(shù)字,如555555,6666666666
*/
private function _isAllRepeat(){
return count(array_count_values(array_slice($this->_shed, -self::MAX_NUMBER_LENGTH))) == 1;
}
/**
* 設(shè)置狀態(tài)位與數(shù)字盒子
* @param $number
* @param $curState
* @return int
*/
private function _setState($number, $curState){
if($number == self::FLAG_RESET){
$this->_intoBox($curState);
$curState = $this->_resetState();
}else{
$curState = $this->_moveState($number, $curState);
}
return $curState;
}
/**
* 重置之前檢查是否是全重復(fù),滿足條件加入到box中
* @param $curState
*/
private function _intoBox($curState){
if($curState >= self::MAX_NUMBER_LENGTH){
if(!$this->_isExempt()){
array_push($this->_box, $this->_shed);
}
}
}
/**
* 狀態(tài)前移
* @param $number
* @param $curState
* @return mixed
*/
private function _moveState($number, $curState){
array_push($this->_shed, $number);
$curState++;
return $curState;
}
/**
* 歸初始位
* @return int
*/
private function _resetState(){
$this->_shed = array();
$curState = 0;
return $curState;
}
/**
* 字符格式化
* @param $char
* @return
* number 數(shù)字
* FLAG_RESET 重置
* false 字符豁免忽略
*/
private function _formatChar($char){
//普通數(shù)字
if(is_numeric($char)){
return $char;
}
//變種數(shù)字
$rs = $this->_isVariation($char);
if($rs !== false){
return $rs;
}
//狀態(tài)重置
$rs = $this->_isRest($char);
if($rs !== false){
return self::FLAG_RESET;
}
return false;
}
/**
* 包含是中文、英文大小寫重置
* @param $char
* @return bool
*/
private function _isRest($char){
//英文
if(preg_match("/[a-zA-Z\s]/", $char)){
return true;
}
//中文
if(preg_match('/[\x{4e00}-\x{9fa5}]/u', $char) > 0){
return true;
}
return false;
}
/**
* 是否是變種數(shù)字
* @param $char
* @return bool|mixed
*/
private function _isVariation($char){
return isset($this->_variation_num[$char])?$this->_variation_num[$char]:false;
}
/**
* 漢字轉(zhuǎn)字符串
* @param $str
* @param string $charset
* @return array
*/
private function _ch2arr($str, $charset = 'utf-8'){
$length = mb_strlen($str, $charset);
$array = array();
for($i = 0; $i < $length; $i++){
$array[] = mb_substr($str, $i, 1, $charset);
}
return $array;
}
}
//測試
$s = '12資源12 零3456哈哈12Ⅶ 34567@a1234567890O00o001';
$obj = new ContactRecognize($s);
$s = $obj->recognize();
print_r($s);
聯(lián)系方式識別(php版本)
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。
相關(guān)閱讀更多精彩內(nèi)容
- 一、我存在的價值 當(dāng)看到這句:“世界的真相是【萬事萬物是按照宇宙法則運行的,與個人努力無關(guān)】,但你認為人不是存在本...