程式師世界 >> 編程語言 >> 網頁編程 >> PHP編程 >> 關於PHP編程 >> 從源碼去理解PHP的explode()函數

從源碼去理解PHP的explode()函數

編輯：關於PHP編程

當我們需要將一個數組根據某個字符或字串進行分割成數組的時候，explode()函數很好用，但是你知道explode()是怎麼工作的麼？截取字串的問題，都會避免不了重新分配空間的消耗，explode也是會分配空間的，毫無疑問。

//文件1：ext/standard/string.c
//先來看下explode的源代碼
PHP_FUNCTION(explode)
{
	char *str, *delim;
	int str_len = 0, delim_len = 0;
	long limit = LONG_MAX; /* No limit */
	zval zdelim, zstr;
   
	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &delim, &delim_len, &str, &str_len, &limit) == FAILURE) {
		return;
	}
	if (delim_len == 0) {
		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
		RETURN_FALSE;
	}
  
	//這裡會開辟一個數組，用來存放分割後的數據
	array_init(return_value);
		//因為這個，我們用explode('|', '');成為了合法的
		if (str_len == 0) {
			if (limit >= 0) {
				add_next_index_stringl(return_value, "", sizeof("") - 1, 1);
			} 
			return;
		}
		
		//下面這兩個是將原字串和分割符都構建成_zval_struct 結構，
		//ZVAL_STRINGL會分配空間哦～～源代碼隨後貼出
		ZVAL_STRINGL(&zstr, str, str_len, 0);   
		ZVAL_STRINGL(&zdelim, delim, delim_len, 0);
		//limit值是explode中允許傳遞的explode的第三個參數，它允許正負
		if (limit > 1) {
			php_explode(&zdelim, &zstr, return_value, limit);
		} else if (limit < 0) {
			php_explode_negative_limit(&zdelim, &zstr, return_value, limit);
		} else {
			add_index_stringl(return_value, 0, str, str_len, 1);
		}
	}

再來看一段：

//ZVAL_STRINGL的源代碼：  
//文件2：zend/zend_API.c    
#define ZVAL_STRINGL(z, s, l, duplicate) {    \
	const char *__s=(s); int __l=l;        \
	Z_STRLEN_P(z) = __l;                \
	Z_STRVAL_P(z) = (duplicate?estrndup(__s, __l):(char*)__s);\
	Z_TYPE_P(z) = IS_STRING;            \
}
	....
//estrndup才是主菜：
//文件3：zend/zend_alloc.h
#define estrndup(s, length)    _estrndup((s), (length) ZEND_FILE_LINE_CC ZEND_FILE_LINE_EMPTY_CC)
	....
//_estrndup的實現： zend/zend_alloc.c
ZEND_API char *_estrndup(const char *s, uint length ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC)
{
	char *p;
	p = (char *) _emalloc(length+1 ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
	if (UNEXPECTED(p == NULL)) {
		return p;
	}
	memcpy(p, s, length);   //分配空間
	p[length] = 0;
	return p;
}
//另外在substr和strrchr strstr中用到的ZVAL_STRING也是使用了上訴的實現

下面根據explode的第三個參數limit來分析調用：條件對應的是explode中最後的三行，對limit條件的不同。注： limit在缺省的時候（沒有傳遞），他的默認值是LONG_MAX，也就是屬於分支1的情況。

1、limit > 1 :

調用php_explode方法，該方法也可以在ext/standard/string.c中找到，並且是緊接著explode實現的上面出現（所以在查找本函數中調用來自本文件的方法的時候很方便，幾乎無一列外都是在該函數的緊接著的上面^_^）。

PHPAPI void php_explode(zval *delim, zval *str, zval *return_value, long limit) 
{
	char *p1, *p2, *endp;
	//先得到的是源字串的末尾位置的指針
	endp = Z_STRVAL_P(str) + Z_STRLEN_P(str);
	//記錄開始位置
	p1 = Z_STRVAL_P(str);
	//下面這個是獲得分割符在str中的位置，可以看到在strrpos和strpos中也用到了這個方法去定位
	p2 = php_memnstr(Z_STRVAL_P(str), Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp);
	
	if (p2 == NULL) {
		//因為這個，所以當我們調用explode('|', 'abc');是合法的，出來的的就是array(0 => 'abc')
		add_next_index_stringl(return_value, p1, Z_STRLEN_P(str), 1);
	} else {
		//依次循環獲得下一個分隔符的位置，直到結束
		do {
		//將得到的子字串（上個位置到這個位置中間的一段，第一次的時候上個位置就是開始
		add_next_index_stringl(return_value, p1, p2 - p1, 1);
		//定位到分隔符位置p2+分隔符的長度的位置
		//比如,分隔符='|', 原字串= ’ab|c', p2 = 2,  則p1=2+1=3
		p1 = p2 + Z_STRLEN_P(delim);
	} while ((p2 = php_memnstr(p1, Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp)) != NULL &&
                 --limit > 1);
	//將最後的一個分隔符後面的字串放到結果數組中
	//explode('|', 'avc|sdf');   => array(0 => 'avc', 1= > 'sdf')
		if (p1 <= endp)
			add_next_index_stringl(return_value, p1, endp-p1, 1);
	}
}

2、limit < 0 :

調用php_explode_negative_limit方法：

PHPAPI void php_explode_negative_limit(zval *delim, zval *str, zval *return_value, long limit) 
{
	#define EXPLODE_ALLOC_STEP 64
	char *p1, *p2, *endp;
	
	endp = Z_STRVAL_P(str) + Z_STRLEN_P(str);
	
	p1 = Z_STRVAL_P(str);
	p2 = php_memnstr(Z_STRVAL_P(str), Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp);
	
	if (p2 == NULL) {
	//它這裡竟然沒有處理，那explode('|', 'abc', -1) 就成非法的了，獲得不了任何值
		/*
		do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0
		by doing nothing we return empty array
	*/
	} else {
		int allocated = EXPLODE_ALLOC_STEP, found = 0;
        long i, to_return;
		char **positions = emalloc(allocated * sizeof(char *));
		//注意這裡的positions的聲明，這個數組是用來保存所有子字串的讀取位置
		positions[found++] = p1;   //當然起始位置還是需要保存
		//下面兩個循環，第一個是循環所有在字符串中出現的分隔符位置，並保存下一個子字串讀取位置起來
		do {
			if (found >= allocated) {
				allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */
				positions = erealloc(positions, allocated*sizeof(char *));
			}
			positions[found++] = p1 = p2 + Z_STRLEN_P(delim);
		} while ((p2 = php_memnstr(p1, Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp)) != NULL);
		//這個就是從數組中開始獲得返回的結果將從哪個子字串開始讀        
		to_return = limit + found;
			/* limit is at least -1 therefore no need of bounds checking : i will be always less than found */
			for (i = 0;i < to_return;i++) { /* this checks also for to_return > 0 */
				add_next_index_stringl(return_value, positions[i], 
					(positions[i+1] - Z_STRLEN_P(delim)) - positions[i],
						1
					);
			}
			efree(positions);//很重要，釋放內存
		}
	#undef EXPLODE_ALLOC_STEP
}

3、limit = 1 or limit = 0 :

當所有第一和第二條件都不滿足的時候，就進入的這個分支，這個分支很簡單就是將源字串放到輸出數組中，explode('|', 'avc|sd', 1) or explode('|', 'avc|sd', 0) 都將返回array(0 => 'avc|sd');

//add_index_stringl源代碼
//文件4：zend/zend_API.c
ZEND_API int add_next_index_stringl(zval *arg, const char *str, uint length, int duplicate) /* {{{ */
{
	zval *tmp;
	MAKE_STD_ZVAL(tmp);
	ZVAL_STRINGL(tmp, str, length, duplicate);
	return zend_hash_next_index_insert(Z_ARRVAL_P(arg), &tmp, sizeof(zval *), NULL);
}
//zend_hash_next_index_insert
//zend/zend_hash.h
#define zend_hash_next_index_insert(ht, pData, nDataSize, pDest) \
         _zend_hash_index_update_or_next_insert(ht, 0, pData, nDataSize, pDest, HASH_NEXT_INSERT ZEND_FILE_LINE_CC)
//zend/zend_hash.c
///太長了～～～～不貼了

可見（不包含分配空間這些），當limit>1的時候，效率是O（N）【N為limit值】，當limit < 0的時候，效率是O（N+M）【N為limit值, M 為分割符出現次數】，當limit=1 or limit=0 的時候，效率是O（1）。