##================================================================##
先來個笑話:
剛交的女朋友讓我去他家玩,我覺得空手不好,於是告訴她我買點東西就過去,誰知道她突然來一句:不用買,家裡還有一盒!
##================================================================##
昨天在折騰一台磁盤出現性能問題的MySQL時,根據監控去慢日志裡找,發現短短10分鐘有好幾萬行慢日志,由於對慢日志結構不熟悉,打開一個600+MB的文件拖著滾動條看,好多慢SQL一條SQL就有幾百行,看來看去都沒找到問題最根本原因,總算功夫不負有心人,總算在不經意間找到,要不然讓我在無知中慢慢老去???
問題SQL如下:
造成的影響:
懷疑造成問題的原因:
由於子查詢中沒有任何過濾條件,進行全表掃描,而服務器內存不夠,導致掃描出來的數據有寫到臨時文件中,同時服務器上還有其他業務運行,服務器無法及時處理完所有請求,造成積壓,而積壓請求又增加服務器壓力,最終導致相互影響惡性循環。
也不知道研發為啥蛋疼地這麼套一下,反正Mysql語句優化處理能力這麼差,大家盡可能簡化SQL吧
##===================================================##
言歸正傳,由於這一個SQL執行頻率較低,使用pt工具分析不能准確定位到該SQL,而使用肉眼看又比較蛋疼,因此寫個小工具來處理,同時還可以解決慢日志較大閱讀不便的問題。
python腳本如下:
# coding: utf-8 import datetime from optparse import OptionParser MAX_QUERY_TIME_SECONDS = 10 START_DATETIME = datetime.datetime.min END_DATETIME = datetime.datetime.max SLOW_LOG_PATH = "" class EventInfo(object): def __init__(self): self.exec_time = None self.login_user = "" self.query_time_seconds = 0 self.lock_time_seconds = 0 self.rows_sent = 0 self.rows_examined = 0 self.command_text = "" self.file_line_num = 0 def get_datetime_by_time_line(time_line): if time_line.startswith("# Time:"): time_str = "20" + time_line.replace("# Time:", "").replace(" ", " ").strip() return datetime.datetime.strptime(time_str, "%Y%m%d %H:%M:%S") else: return None def set_query_info(event_info, query_info_line): line = query_info_line if line.startswith("# Query_time:"): tmp_line = line.replace("#", "").replace(": ", ":").replace(" ", " ").replace(" ", " ") tmp_items = tmp_line.split(" ") for tmp_item in tmp_items: # print(tmp_item) if tmp_item.strip().startswith("Query_time:"): event_info.query_time_seconds = float(tmp_item.replace("Query_time:", "")) if tmp_item.strip().startswith("Lock_time:"): event_info.lock_time_seconds = float(tmp_item.replace("Lock_time:", "")) if tmp_item.strip().startswith("Rows_sent:"): event_info.rows_sent = float(tmp_item.replace("Rows_sent:", "")) if tmp_item.strip().startswith("Rows_examined:"): event_info.rows_examined = float(tmp_item.replace("Rows_examined:", "")) def set_login_user(event_info, login_user_line): line = login_user_line if line.startswith("# User@Host:"): event_info.login_user = line.replace("# User@Host:", "").strip() def set_command_text(event_info, command_text_line): line = command_text_line event_info.command_text += chr(13) + line def print_event_info(event_info): print_info = """ file_line_num: {7} Time: {0} User@Host: {1} Query_time: {2:.5f} seconds Lock_time: {3:.5f} seconds Rows_sent: {4} Rows_examined: {5} command_text: {6} """.format(event_info.exec_time, event_info.login_user, event_info.query_time_seconds, event_info.lock_time_seconds, event_info.rows_sent, event_info.rows_examined, event_info.command_text, event_info.file_line_num) print("*" * 70) print(print_info) def get_user_paras(): try: opt = OptionParser() opt.add_option('--slow_log', dest='slow_log', type=str, help='the file path of the slow log ') opt.add_option('--long_query_seconds', dest='long_query_seconds', type=float, help='long query seconds') opt.add_option('--start_datetime', dest="start_datetime", type=str, help="start datetime, format is %Y-%m-%d %H:%M:%S") opt.add_option('--end_datetime', dest="end_datetime", type=str, help="end datetime, format is %Y-%m-%d %H:%M:%S") (options, args) = opt.parse_args() error_messages = [] slow_log = options.slow_log start_datetime = options.start_datetime long_query_seconds = options.long_query_seconds end_datetime = options.end_datetime if not slow_log: error_messages.append("slow_log must be set;") else: global SLOW_LOG_PATH SLOW_LOG_PATH = slow_log if not long_query_seconds: error_messages.append("long_query_seconds must be set;") else: global MAX_QUERY_TIME_SECONDS MAX_QUERY_TIME_SECONDS = long_query_seconds try: if start_datetime is not None: tmp_start_datetime = datetime.datetime.strptime(start_datetime, "%Y-%m-%d %H:%M:%S") global START_DATETIME START_DATETIME = tmp_start_datetime except: error_messages.append("the format of start_datetime was wrong ;") try: if end_datetime is not None: tmp_end_datetime = datetime.datetime.strptime(end_datetime, "%Y-%m-%d %H:%M:%S") global END_DATETIME END_DATETIME = tmp_end_datetime except: error_messages.append("the format of end_datetime was wrong ;") if len(error_messages) > 0: opt.print_help() for error_message in error_messages: print(error_message) return False else: return True except Exception as ex: print("exception :{0}".format(str(ex))) return False def read_log(): global MAX_QUERY_TIME_SECONDS file_path = SLOW_LOG_PATH file_handler = open(file_path) line = file_handler.readline() line_num = 1 find_event_count = 0 current_event_info = EventInfo() current_time = "" has_time = False while line: if line.startswith("# Time:"): current_time = get_datetime_by_time_line(line) if current_time < START_DATETIME: line = file_handler.readline() line_num += 1 continue if current_time > END_DATETIME: break line = file_handler.readline() line_num += 1 has_time = True continue if has_time: if line.startswith("# User@Host:"): if current_event_info.query_time_seconds > MAX_QUERY_TIME_SECONDS: print_event_info(event_info=current_event_info) find_event_count += 1 current_event_info = EventInfo() current_event_info.file_line_num = line_num current_event_info.exec_time = current_time set_login_user(event_info=current_event_info, login_user_line=line) elif line.startswith("# Query_time:"): set_query_info(event_info=current_event_info, query_info_line=line) else: set_command_text(event_info=current_event_info, command_text_line=line) line = file_handler.readline() line_num += 1 print("*" * 70) print("parse completely") print("find {0} events".format(find_event_count)) def main(): if get_user_paras(): read_log() if __name__ == '__main__': main()
腳本調用方式如:
C:\Python27\python.exe D:/GGA/Code/slow_query_find/find_slow_quer.py --slow_log="D:\slow.log.2016010900" --long_query_seconds=4 --start_datetime="2016-01-08 15:46:00" --end_datetime="2016-01-08 15:50:00"
顯示結果如:
##==================================================================##