程式師世界 >> 編程語言 >> 網頁編程 >> PHP編程 >> PHP綜合 >> PHP 字符串編碼截取函數（兼容utf-8和gb2312）

PHP 字符串編碼截取函數（兼容utf-8和gb2312）

編輯：PHP綜合

復制代碼代碼如下:
//截取字符串長度。支持utf-8和gb2312編碼。若為gb2312，先將其轉為utf-8，在utf-8的基礎上截取然後再轉換回來
function cut_string($str,$from=1,$length=10,$code='utf-8',$rear='...'){
    if($code!='utf-8'){//總是將字符串轉為utf-8編碼
        $str=iconv($code,'utf-8',$str);
    }
    $str_len=mb_strlen($str,'utf-8');//字符串的長度
    if($from>$str_len){//如果截取開始位置大於字符串長度，截取後面$length個
        $from=$str_len-$length+1;
        $from=($from<1?1:$from);
    }
    //兼容ucs-4編碼
    $i=0;//字節計數
    $from_i=0;//開始截取的字節位置
    $from_len=0;//開始截取的字符位置
    $tag=true;//標志$from_len是否已經被賦值
    for($temp_len=0;($temp_len-$from_len<$length)||$tag;$temp_len++){
        $byte_code=ord(substr($str,$i,1));//一個字節的編碼
        if($temp_len+1==$from){//記錄開始截取的開始字節位置
            $from_i=$i;$from_len=$temp_len;$tag=false;
        }
        if($byte_code>=0&&$byte_code<128){//字符是占用幾個字節，utf-8是變長編碼，根據每個字符的第一個字節可判斷出該字符占幾個字節
            $i++;
        }
        if($byte_code>191&&$byte_code<224){
            $i+=2;
        }
        if($byte_code>223&&$byte_code<240){
            $i+=3;
        }
        if($byte_code>239&&$byte_code<248){
            $i+=4;
        }
        if($byte_code>248&&$byte_code<252){
            $i+=5;
        }
        if($byte_code>252&&$byte_code<255){
            $i+=6;
        }
    }
    return iconv('utf-8',$code,substr($str,$from_i,$i-$from_i).$rear);
}