不能分詞八進制和數字類型加前/後綴的情況
拿這個詞法分析器跑了一遍整個Nginx源碼,基本都能正確的分出結果,後面有測試例子~
_STATES_H_ #include
_TOKEN_H_ #include <> #include NONE, KEYWORD, INT, FLOAT, SYMBOL, VARIABLE, token() :name(), line_no( std:: set_all( std::& _line, set_name( std::& str){ name = set_line( l){ line_no = set_attr( token_attr a){ attr =
_SCANNER_H_ #include <algorithm> #include <> #include <vector> #include #include scanner( std::& _code) :code(_code), index(), line_no( std::vector<token> inline is_num( & ch); inline is_alpha( & ch); inline is_symbol( & ch); inline is_variable( & ch); inline inline std:: std:: std::vector<token>
#include <cctype> #include <fstream> #include <iostream> #include <> #include #include #include #include std::vector<token> std:: line = _states state = ch = (isblank(ch) || isspace(ch) || ch == (ch == ++ ch = ( ch = (ch == && state != ++ (ch == EOF || index == IN_NONE: str = state = str = state = (ch == || ch == || ch == || ch == || ch == || ch == || ch == || ch == || ch == || ch == || ch == str = -> state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = (ch == str = state = str += state = (ch == || ch == str += state = (ch == str += state = (str == && (ch == || ch == str += state = -> state = (is_num(ch) || (ch >= && ch <= ) || (ch >= && ch <= str += state = -> state = (ch == str += state = (ch == str += state = str += state = (is_alpha(ch) || is_num(ch) || ch == str += state = (keywords.find(str) == -> state = (ch == { str += -> state = (ch == { str += -> state = (ch == { str += -> state = { str += state = { -> state = (ch == { str += -> state = { -> state = (ch == { str += -> state = (ch == { str += -> state = { str += state = { -> state = (ch == { str += -> state = { -> state = (ch == { str += -> state = (ch == { str += -> state = { -> state = (ch == { str += -> state = (ch == { state = (ch == { state = { -> state = (ch == ch = (ch == state = state = state = (ch == state = state = (ch == { str += -> state = { -> state = (ch == { str += -> state = (ch == { str += -> state = { -> state = (ch == { str += -> state = (ch == { str += -> state = { -> state = (ch == { str += -> state = { -> state = (ch == { str += -> state = { -> state = (ch == { str += -> state = (ch == { str += -> state = { -> state = str += state = -> state = (ch == { str += ch = (ch == str += state = (ch == str += -> state = str += state = (ch == str += -> state = -> state = (ch != str += state = str += -> state = } } scanner::is_alpha( & ((ch >= && ch <= ) || (ch >= && ch <= scanner::is_num( & (ch >= && ch <= scanner::is_variable( & (ch == || code[index++ -- __LOG_END_NAMESPACE
測試如下:
#include <iostream> #include <vector> #include <fstream> #include file_name( ofstream ofs( ifs.seekg( std::size_t len = ifs.seekg( _code.resize(len + ifs.read((*)&* _code[len + ] = vector<token> v = ( ofs << s.name << << s.line_no << system( }
分詞的代碼選取為Nginx源碼下的一個函數
ngx_epoll_process_events(ngx_cycle_t * ngx_event_t *rev, *wev, ** ngx_connection_t * i = , , ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, events = epoll_wait(ep, event_list, ( err = (events == -) ? ngx_errno : (flags & NGX_UPDATE_TIME || (err == ngx_event_timer_alarm = level = } level = ngx_log_error(level, cycle->log, err, (events == (timer != ngx_log_error(NGX_LOG_ALERT, cycle->log, (i = ; i < events; i++ c = instance = (uintptr_t) c & c = (ngx_connection_t *) ((uintptr_t) c & (uintptr_t) ~ rev = c-> (c->fd == - || rev->instance != ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, revents = ngx_log_debug3(NGX_LOG_DEBUG_EVENT, cycle->log, c-> (revents & (EPOLLERR| ngx_log_debug2(NGX_LOG_DEBUG_EVENT, cycle->log, c-> 0 (revents & ~(EPOLLIN|EPOLLOUT|EPOLLERR| ngx_log_error(NGX_LOG_ALERT, cycle->log, c-> ((revents & (EPOLLERR| && (revents & (EPOLLIN|EPOLLOUT)) == revents |= EPOLLIN| ((revents & EPOLLIN) && rev-> (NGX_HAVE_EPOLLRDHUP) (revents & rev->pending_eof = ((flags & NGX_POST_THREAD_EVENTS) && !rev-> rev->posted_ready = } rev->ready = (flags & queue = (ngx_event_t **) (rev->accept ? &ngx_posted_accept_events : & } rev-> wev = c-> ((revents & EPOLLOUT) && wev-> (c->fd == - || wev->instance != ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, (flags & wev->posted_ready = } wev->ready = (flags & ngx_locked_post_event(wev, & } wev-> }
分詞結果如下
ngx_int_t ngx_epoll_process_events ( ngx_cycle_t * cycle , ngx_msec_t timer , ngx_uint_t flags ) { events ; uint32_t revents ; ngx_int_t instance , i ; ngx_uint_t level ; ngx_err_t err ; ngx_event_t * rev , * wev , * * queue ; ngx_connection_t * c ; i = , , ; ngx_log_debug1 ( NGX_LOG_DEBUG_EVENT , cycle -> log , , , timer ) ; events = epoll_wait ( ep , event_list , ( ) nevents , timer ) ; err = ( events == - ) ? ngx_errno : ; ( flags & NGX_UPDATE_TIME || ngx_event_timer_alarm ) { ngx_time_update ( ) ; } ( err ) { ( err == NGX_EINTR ) { ( ngx_event_timer_alarm ) { ngx_event_timer_alarm = ; NGX_OK ; } level = NGX_LOG_INFO ; } { level = NGX_LOG_ALERT ; } ngx_log_error ( level , cycle -> log , err , ) ; NGX_ERROR ; } ( events == ) { ( timer != NGX_TIMER_INFINITE ) { NGX_OK ; } ngx_log_error ( NGX_LOG_ALERT , cycle -> log , , ) ; NGX_ERROR ; } ngx_mutex_lock ( ngx_posted_events_mutex ) ; ( i = ; i < events ; i ++ ) { c = event_list [ i ] . data . ptr ; instance = ( uintptr_t ) c & ; c = ( ngx_connection_t * ) ( ( uintptr_t ) c & ( uintptr_t ) ~ ) ; rev = c -> read ; ( c -> fd == - || rev -> instance != instance ) { ngx_log_debug1 ( NGX_LOG_DEBUG_EVENT , cycle -> log , , , c ) ; ; } revents = event_list [ i ] . events ; ngx_log_debug3 ( NGX_LOG_DEBUG_EVENT , cycle -> log , , , c -> fd , revents , event_list [ i ] . data . ptr ) ; ( revents & ( EPOLLERR | EPOLLHUP ) ) { ngx_log_debug2 ( NGX_LOG_DEBUG_EVENT , cycle -> log , , , c -> fd , revents ) ; } # ( revents & ~ ( EPOLLIN | EPOLLOUT | EPOLLERR | EPOLLHUP ) ) { ngx_log_error ( NGX_LOG_ALERT , cycle -> log , , , c -> fd , revents ) ; } # endif ( ( revents & ( EPOLLERR | EPOLLHUP ) ) && ( revents & ( EPOLLIN | EPOLLOUT ) ) == ) { revents |= EPOLLIN | EPOLLOUT ; } ( ( revents & EPOLLIN ) && rev -> active ) { # ( NGX_HAVE_EPOLLRDHUP ) ( revents & EPOLLRDHUP ) { rev -> pending_eof = ; } # endif ( ( flags & NGX_POST_THREAD_EVENTS ) && ! rev -> accept ) { rev -> posted_ready = ; } { rev -> ready = ; } ( flags & NGX_POST_EVENTS ) { queue = ( ngx_event_t * * ) ( rev -> accept ? & ngx_posted_accept_events : & ngx_posted_events ) ; ngx_locked_post_event ( rev , queue ) ; } { rev -> handler ( rev ) ; } } wev = c -> write ; ( ( revents & EPOLLOUT ) && wev -> active ) { ( c -> fd == - || wev -> instance != instance ) { ngx_log_debug1 ( NGX_LOG_DEBUG_EVENT , cycle -> log , , , c ) ; ; } ( flags & NGX_POST_THREAD_EVENTS ) { wev -> posted_ready = ; } { wev -> ready = ; } ( flags & NGX_POST_EVENTS ) { ngx_locked_post_event ( wev , & ngx_posted_events ) ; } { wev -> handler ( wev ) ; } } } ngx_mutex_unlock ( ngx_posted_events_mutex ) ; NGX_OK ; }