DELIMITER $$ USE `xxx`$$ DROP FUNCTION IF EXISTS `F_getBuluDates`$$ CREATE DEFINER=`root`@`localhost` FUNCTION `F_getBuluDates`(`PuserId` INT(11)) RETURNS VARCHAR(3000) CHARSET gbk BEGIN DECLARE buluDate,buluDateTime,LiWaibuluDates,nowdate,bldate VARCHAR(3000) DEFAULT ""; DECLARE rtMsg,RuserName,maxRiZhiDate,btjrqs,blyqrq,tmpdate,yxblsj VARCHAR(255) DEFAULT ""; DECLARE Ra51count INT(30); DECLARE RbeginDate,RendDate,RtmpDate VARCHAR(2550); DECLARE u,gqcount,a21Count INT(30) DEFAULT 0; DECLARE allReadRows,readRows,mySYL,blCount,recordCount,btjryCount,btjrqCount,yxblts,i INT(11) DEFAULT 0; SELECT a5204,a5205 INTO yxblts,yxblsj FROM a52 WHERE a5218="xxxxxxx" LIMIT 0,1; SELECT username INTO RuserName FROM USER WHERE id=PuserId; SELECT COUNT(id) INTO btjryCount FROM a52 WHERE a5214=PuserId AND a5218='xxxxx'; IF btjryCount=0 THEN SELECT GROUP_CONCAT(a5215) INTO btjrqs FROM a52 WHERE a5218='btjrq'; SET tmpdate=CURDATE(); SET nowdate=DATE_SUB(CURDATE(),INTERVAL yxblts DAY); REPEAT SELECT MAX(a2104) INTO maxRiZhiDate FROM a21 WHERE issubmit='y' AND a2104<tmpdate AND FIND_IN_SET(a2104,bldate)=0; SET tmpdate=maxRiZhiDate; IF tmpdate>=nowdate THEN IF F_isWorkDay(maxRiZhiDate)=1 OR ISNULL(maxRiZhiDate) THEN IF NOT ISNULL(maxRiZhiDate) AND LENGTH(maxRiZhiDate)>0 THEN IF FIND_IN_SET(maxRiZhiDate,btjrqs)=0 THEN SELECT COUNT(id) INTO recordCount FROM a21 WHERE a2104=maxRiZhiDate AND creatorid=PuserId; IF recordCount=0 THEN IF DAYOFWEEK(maxRiZhiDate)=6 THEN SET buluDateTime=CONCAT(FROM_DAYS(TO_DAYS(maxRiZhiDate)+2+yxblts)," ",yxblsj); ELSEIF DAYOFWEEK(maxRiZhiDate)=7 THEN SET buluDateTime=CONCAT(FROM_DAYS(TO_DAYS(maxRiZhiDate)+1+yxblts)," ",yxblsj); ELSE SET buluDateTime=CONCAT(FROM_DAYS(TO_DAYS(maxRiZhiDate)+yxblts)," ",yxblsj); END IF; IF(CURRENT_TIMESTAMP()<buluDateTime) THEN SET buluDate=maxRiZhiDate; END IF; END IF; END IF; END IF; END IF; IF CHAR_LENGTH(buluDate)>0 THEN IF CHAR_LENGTH(bldate)>0 THEN SET bldate=CONCAT(buluDate,",",bldate); ELSE SET bldate=buluDate; END IF; END IF; END IF; SET buluDate=''; UNTIL tmpdate<=nowdate END REPEAT; SELECT COUNT(id) INTO gqcount FROM a52 WHERE a5216=CURDATE() AND a5218='xxxxxx'; IF gqcount>0 THEN SELECT a5215 INTO blyqrq FROM a52 WHERE a5216=CURDATE() AND a5218='xxxxxx'; SELECT COUNT(id) INTO a21Count FROM a21 WHERE a2104=blyqrq AND creatorid=PuserId; IF a21Count=0 THEN IF FIND_IN_SET(blyqrq,bldate)=0 THEN IF CHAR_LENGTH(bldate)>0 THEN SET bldate=CONCAT(blyqrq,",",bldate); ELSE SET bldate=blyqrq; END IF; END IF; END IF; END IF; IF ISNULL(btjrqs) THEN SET btjrqs=""; END IF; SELECT GROUP_CONCAT(a5108) INTO LiWaibuluDates FROM a51 WHERE a5110='y' AND F_ifInSet(a5108,btjrqs)=0 AND a5106 LIKE RuserName AND a5112='否' AND CONCAT(a5108," ",a5113)<CURRENT_TIMESTAMP() AND CURRENT_TIMESTAMP()<CONCAT(a5114," ",a5113); IF NOT ISNULL(LiWaibuluDates) AND LENGTH(TRIM(LiWaibuluDates))>0 THEN IF CHAR_LENGTH(bldate)>0 THEN SET bldate=CONCAT(LiWaibuluDates,",",bldate); ELSE SET bldate=LiWaibuluDates; END IF; END IF; SELECT COUNT(id) INTO RA51count FROM a51 WHERE a5110='n' AND F_getid1value2(a5105,1)=PuserId; IF RA51count>0 THEN SET u=0; WHILE u< RA51count DO SELECT a5108,a5109 INTO RbeginDate,RendDate FROM a51 WHERE a5110='n' AND F_getid1value2(a5105,1)=PuserId ORDER BY id LIMIT u,1; SET RtmpDate=RbeginDate; WHILE RtmpDate<=RendDate DO IF FIND_IN_SET(RtmpDate,bldate)>0 THEN SET bldate=F_removeFromSet(RtmpDate,bldate); END IF; SET RtmpDate=DATE_ADD(RtmpDate, INTERVAL 1 DAY); END WHILE; SET u=u+1; END WHILE; END IF; END IF; RETURN bldate; END$$ DELIMITER ;
在老系統中該函數調用一次需要話20多秒到30秒左右。
拿到sql之後,首先要確定思路。不能著急這下手。
1. 首先查看各個表的數據量:
select count(*) from xxx;
發現只有 a21 的數據量達到了十幾萬,其他表數據量都比較小。所以重點是 a21表,仔細閱讀了一遍函數的定義,發現涉及 到 a21 的有一處循環:
REPEAT SELECT MAX(a2104) INTO maxRiZhiDate FROM a21 WHERE issubmit='y' AND a2104<tmpdate AND FIND_IN_SET(a2104,bldate)=0;
很顯然使用了 find_in_set 函數,所以改語句無法使用索引。所以想要簡單的通過增加索引來解決問題,應該是行不通的。
這樣就定位到了函數運行慢的問題。結合業務理解改函數的含義。
為了使用索引,我們需要去掉 find_in_set 函數,理解了業務和原函數的功能之後,對改函數的 repeat 循環部分進行了重寫:
DELIMITER $$ USE `oa`$$ DROP FUNCTION IF EXISTS `F_getBuluDates_inner`$$ CREATE DEFINER=`root`@`localhost` FUNCTION `F_getBuluDates_inner`(`PuserId` INT(11), tmpdate VARCHAR(32), nowdate VARCHAR(32)) RETURNS VARCHAR(3000) CHARSET gbk BEGIN DECLARE buluDate,buluDateTime,LiWaibuluDates,bldate VARCHAR(3000) DEFAULT ""; DECLARE rtMsg,RuserName,maxRiZhiDate,btjrqs,blyqrq,yxblsj VARCHAR(255) DEFAULT ""; DECLARE Ra51count INT(30); DECLARE RbeginDate,RendDate,RtmpDate VARCHAR(2550); DECLARE u,gqcount,a21Count INT(30) DEFAULT 0; DECLARE allReadRows,readRows,mySYL,blCount,recordCount,btjryCount,btjrqCount,yxblts,i INT(11) DEFAULT 0; DECLARE no_more_data INT DEFAULT 0; DECLARE my_cursor CURSOR FOR SELECT DISTINCT a2104 FROM a21 WHERE issubmit='y' AND a2104 < tmpdate AND a2104 >= nowdate ORDER BY a2104 DESC; DECLARE CONTINUE HANDLER FOR NOT FOUND SET no_more_data = 1; SELECT a5204,a5205 INTO yxblts,yxblsj FROM a52 WHERE a5218="gzrz" LIMIT 0,1; SELECT username INTO RuserName FROM USER WHERE id=PuserId; SELECT COUNT(id) INTO btjryCount FROM a52 WHERE a5214=PuserId AND a5218='xxxxx'; IF btjryCount=0 THEN SELECT GROUP_CONCAT(a5215) INTO btjrqs FROM a52 WHERE a5218='xxx'; OPEN my_cursor; FETCH my_cursor INTO maxRiZhiDate; REPEAT IF F_isWorkDay(maxRiZhiDate)=1 OR ISNULL(maxRiZhiDate) THEN IF NOT ISNULL(maxRiZhiDate) AND LENGTH(maxRiZhiDate)>0 THEN IF FIND_IN_SET(maxRiZhiDate,btjrqs)=0 THEN SELECT COUNT(id) INTO recordCount FROM a21 WHERE a2104=maxRiZhiDate AND creatorid=PuserId; IF recordCount=0 THEN IF DAYOFWEEK(maxRiZhiDate)=6 THEN SET buluDateTime=CONCAT(FROM_DAYS(TO_DAYS(maxRiZhiDate)+2+yxblts)," ",yxblsj); ELSEIF DAYOFWEEK(maxRiZhiDate)=7 THEN SET buluDateTime=CONCAT(FROM_DAYS(TO_DAYS(maxRiZhiDate)+1+yxblts)," ",yxblsj); ELSE SET buluDateTime=CONCAT(FROM_DAYS(TO_DAYS(maxRiZhiDate)+yxblts)," ",yxblsj); END IF; IF(CURRENT_TIMESTAMP()<buluDateTime) THEN SET buluDate=maxRiZhiDate; END IF; END IF; END IF; END IF; END IF; IF CHAR_LENGTH(buluDate)>0 THEN IF CHAR_LENGTH(bldate)>0 THEN SET bldate=CONCAT(buluDate,",",bldate); ELSE SET bldate=buluDate; END IF; END IF; FETCH my_cursor INTO maxRiZhiDate; SET buluDate=''; UNTIL no_more_data = 1 END REPEAT; SELECT COUNT(id) INTO gqcount FROM a52 WHERE a5216=CURDATE() AND a5218='xxx'; IF gqcount>0 THEN SELECT a5215 INTO blyqrq FROM a52 WHERE a5216=CURDATE() AND a5218='xxxxxxxxxx'; SELECT COUNT(id) INTO a21Count FROM a21 WHERE a2104=blyqrq AND creatorid=PuserId; IF a21Count=0 THEN IF FIND_IN_SET(blyqrq,bldate)=0 THEN IF CHAR_LENGTH(bldate)>0 THEN SET bldate=CONCAT(blyqrq,",",bldate); ELSE SET bldate=blyqrq; END IF; END IF; END IF; END IF; IF ISNULL(btjrqs) THEN SET btjrqs=""; END IF; SELECT GROUP_CONCAT(a5108) INTO LiWaibuluDates FROM a51 WHERE a5110='y' AND F_ifInSet(a5108,btjrqs)=0 AND a5106 LIKE RuserName AND a5112='否' AND CONCAT(a5108," ",a5113)<CURRENT_TIMESTAMP() AND CURRENT_TIMESTAMP()<CONCAT(a5114," ",a5113); IF NOT ISNULL(LiWaibuluDates) AND LENGTH(TRIM(LiWaibuluDates))>0 THEN IF CHAR_LENGTH(bldate)>0 THEN SET bldate=CONCAT(LiWaibuluDates,",",bldate); ELSE SET bldate=LiWaibuluDates; END IF; END IF; SELECT COUNT(id) INTO RA51count FROM a51 WHERE a5110='n' AND F_getid1value2(a5105,1)=PuserId; IF RA51count>0 THEN SET u=0; WHILE u< RA51count DO SELECT a5108,a5109 INTO RbeginDate,RendDate FROM a51 WHERE a5110='n' AND F_getid1value2(a5105,1)=PuserId ORDER BY id LIMIT u,1; SET RtmpDate=RbeginDate; WHILE RtmpDate<=RendDate DO IF FIND_IN_SET(RtmpDate,bldate)>0 THEN SET bldate=F_removeFromSet(RtmpDate,bldate); END IF; SET RtmpDate=DATE_ADD(RtmpDate, INTERVAL 1 DAY); END WHILE; SET u=u+1; END WHILE; END IF; END IF; RETURN bldate; END$$ DELIMITER ;
然後為了讓新的 sql 使用索引:
DECLARE my_cursor CURSOR FOR
SELECT DISTINCT a2104 FROM a21 WHERE issubmit='y' AND a2104 < tmpdate AND a2104 >= nowdate ORDER BY a2104 DESC;
新增了索引:
add index issubmit_a2104(issubmit,a2104);
最終的提供給外部的調用函數:
DELIMITER $$ CREATE FUNCTION F_getBuluDates_new(`PuserId` INT(11)) RETURNS VARCHAR(3000) CHARSET gbk BEGIN DECLARE bldate VARCHAR(3000) DEFAULT ""; DECLARE tmpdate,nowdate VARCHAR(32) DEFAULT ""; DECLARE yxblts INT; SELECT a5204 INTO yxblts FROM a52 WHERE a5218="gzrz" LIMIT 0,1; SET tmpdate = CURDATE(); SET nowdate = DATE_SUB(tmpdate,INTERVAL yxblts DAY); SET bldate = F_getBuluDates_inner(PuserId,tmpdate,nowdate); RETURN bldate; END $$
測試效果:
mysql> select F_getBuluDates_new(10687); +---------------------------------------------------------------------------------------------------------------+ | F_getBuluDates_new(10687) | +---------------------------------------------------------------------------------------------------------------+ | 2016-08-04,2016-08-05,2016-08-08,2016-08-09,2016-08-10,2016-08-11,2016-08-12,2016-08-15,2016-08-16,2016-08-17 | +---------------------------------------------------------------------------------------------------------------+ 1 row in set (1.10 sec) mysql> select F_getBuluDates(10687); +---------------------------------------------------------------------------------------------------------------+ | F_getBuluDates(10687) | +---------------------------------------------------------------------------------------------------------------+ | 2016-08-04,2016-08-05,2016-08-08,2016-08-09,2016-08-10,2016-08-11,2016-08-12,2016-08-15,2016-08-16,2016-08-17 | +---------------------------------------------------------------------------------------------------------------+ 1 row in set (13.10 sec)
性能提升了 11 倍多。客戶對效果很滿意。
新的執行計劃:
mysql> explain SELECT DISTINCT a2104 FROM a21 WHERE issubmit='y' AND a2104 < '2016-08-16' AND a2104 >= '2016-08-10' ORDER BY a2104 DESC; +----+-------------+-------+-------+----------------------+----------------+---------+------+------+--------------------------+ | id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra | +----+-------------+-------+-------+----------------------+----------------+---------+------+------+--------------------------+ | 1 | SIMPLE | a21 | range | a2104,issubmit_a2104 | issubmit_a2104 | 516 | NULL | 834 | Using where; Using index | +----+-------------+-------+-------+----------------------+----------------+---------+------+------+--------------------------+ 1 row in set (0.00 sec)
總結:
從原函數實現來看,在數量小時,還是可以的。但是一旦數據量大了,就會出現性能問題。主要問題是在 where 條件中使用了太多的自定義函數,而且這個函數的參數還是表的數據列。
導致無法使用索引。無法使用索引就會進行全表掃描,所以數據量大時會導致性能問題。