很久以前就想寫個C++ 的類解析器了,終於花了一晚上時間,寫了一個C++ 頭文件和CPP文件的解析。其實跟我上篇“編譯原理”的文章說的一樣,詞法分析和語法分析最基本就是一堆if-else 或者switch-case。不要被什麼有窮自動機嚇倒了。。。
下面的代碼是一個基本的詞法和語法分析,能工作,但現在這個實驗品,肯定不會很完善,請各位看官見諒。
能解析出來所包含的頭文件、類名(輸入.h 文件可以解析出類名)、函數名等,需要的功能自己再加吧。
程序我直接一個main函數寫下來的,完全C風格,不用跟我講什麼分函數、分類之類的話,我也知道,後期我在代碼上會分好,不過不一定貼上來了,要分你可以自己分一下,如果能共享出來給我學習一下,那更感謝了。
我這個程序就是讓你看看一個詞法分析和一個語法分析的最簡單的寫法,同時也實現一些有點用的功能。
下面就是這個程序代碼,能直接拷貝編譯運行的。代碼也有注釋,邏輯比較清楚了。
// CodeParser.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <stdio.h>
#include <tchar.h>
#include <stdlib.h>
#include <conio.h>
#include <vector>
#include <string>
#include <map>
using namespace std;
// 符號表
enum eType
{
Symbol
};
map<string, eType> g_mapSymbol;
enum eCharFlag
{
noFlag,
explain
};
eCharFlag g_charFlag = noFlag;
int _tmain(int argc, _TCHAR* argv[])
{
// 輸入常用標識符
g_mapSymbol["if"] = Symbol;
// 獲取輸入
int i=0,j=0,k=0;
char oneChar;
char fname[200];
memset(fname, 0, 200);
printf("請輸入要打開的文件名:\n");
scanf("%s",fname);
// for 測試
//strcpy(fname, "d:\\1.cpp");
// 打開文件
FILE *fp;
if ((fp= fopen(fname,"r"))== NULL)
{
printf("Cannot open infile.\n");
exit(0);
}
//int 0i;
// 分析文件
oneChar=fgetc(fp);
char oneWord[256]={0};
int nBuf = 0;
vector<string> vecWords;
while (oneChar!=EOF)
{
if(nBuf == 0)
{
if(oneChar == ' ' || oneChar == '\n' || oneChar == '\t')
{
}
else if(oneChar == '(' || oneChar == '{')
{
oneWord[nBuf++] = oneChar;
string strWord = oneWord;
vecWords.push_back(strWord);
nBuf = 0;
g_charFlag = noFlag;
memset(oneWord, 0, 256);
//continue;
}
else if(oneChar == ')' || oneChar == '}')
{
oneWord[nBuf++] = oneChar;
string strWord = oneWord;
vecWords.push_back(strWord);
nBuf = 0;
g_charFlag = noFlag;
memset(oneWord, 0, 256);
//continue;
}
else if(oneChar >= '0' && oneChar <= '9')
{
/*printf("error, 不可以數字打頭");
return -1;*/
oneWord[nBuf++] = oneChar;
}
else if(oneChar >= 'a' && oneChar <= 'z')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar >= 'A' && oneChar <= 'Z')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '#')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar == ':')
{
// 界限符號
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '/')
{
// 注釋的開始,找第二根橫線
// 或者是路徑描述
oneWord[nBuf++] = oneChar;
g_charFlag = explain;
}
else if(oneChar == '(')
{
// 函數的開始,找第二個括號
oneWord[nBuf++] = oneChar;
}
else if(oneChar == ')')
{
// 函數的結束
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '{')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '}')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar == ';')
{
// 語句結束符
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '~')
{
// 析構函數符
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '\"')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '=')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '_')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '?')
{
oneWord[nBuf++] = oneChar;
}
else
{
//printf("未處理情況:%c", oneChar);
oneWord[nBuf++] = oneChar;
}
}
else
{
if(oneChar == ' ' || oneChar == '\t' || oneChar == '\n')
{
if(g_charFlag != explain)
{
string strWord = oneWord;
vecWords.push_back(strWord);
nBuf = 0;
g_charFlag = noFlag;
memset(oneWord, 0, 256);
}
else if(g_charFlag == explain && oneChar == '\n')
{
// 如果是注釋狀態,則碰到回車才截斷
string strWord = oneWord;
vecWords.push_back(strWord);
nBuf = 0;
g_charFlag = noFlag;
memset(oneWord, 0, 256);
}
}
else if(oneChar == '(' || oneChar == '{')
{
//oneWord[nBuf++] = oneChar;
string strWord = oneWord;
vecWords.push_back(strWord);
nBuf = 0;
g_charFlag = noFlag;
memset(oneWord, 0, 256);
continue;
}
else if(oneChar == ')' || oneChar == '}')
{
//oneWord[nBuf++] = oneChar;
string strWord = oneWord;
vecWords.push_back(strWord);
nBuf = 0;
g_charFlag = noFlag;
memset(oneWord, 0, 256);
continue;
}
//else if(oneChar == '\n')
//{
// // 行末, 處理一些情況, 例如注釋等
//}
else if(oneChar >= '0' && oneChar <= '9')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar >= 'a' && oneChar <= 'z')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar >= 'A' && oneChar <= 'Z')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '_')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar == ':')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '(')
{
// 函數的開始,找第二個括號
oneWord[nBuf++] = oneChar;
}
else if(oneChar == ')')
{
// 函數的結束
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '{')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '}')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar == ';')
{
// 語句結束符
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '~')
{
// 析構函數符
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '\"')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '*')
{
// 定義指針
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '.')
{
// .h
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '=')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '/')
{
// 如果是注釋狀態, 等待一行結束
//if(g_charFlag == explain)
{
//oneWord[nBuf++] = oneChar;
//string strWord = oneWord;
//vecWords.push_back(strWord);
//nBuf = 0;
////g_charFlag = noFlag;
//memset(oneWord, 0, 256);
//oneWord[nBuf++] = oneChar;
}
//else
{
oneWord[nBuf++] = oneChar;
}
}
else if(oneChar == ',')
{
oneWord[nBuf++] = oneChar;
}
else if(oneChar == '?')
{
oneWord[nBuf++] = oneChar;
}
else
{
oneWord[nBuf++] = oneChar;
//printf("未處理情況:%c", oneChar);
//string strWord = "未處理情況:%s";
//printf("error, 未處理情況.");
//return -1;
}
}
oneChar=fgetc(fp);
}
fclose(fp);
// 寫入文件
FILE *fpIn;
if ((fpIn= fopen("d:\\words.txt","w+"))== NULL)
{
printf("Cannot open infile.\n");
exit(0);
}
for(int i = 0; i < vecWords.size(); i++)
{
char szBuf[300];
memset(szBuf, 0, 300);
sprintf(szBuf, "%d. %s\n", i, vecWords[i].c_str());
fwrite(szBuf, strlen(szBuf), 1, fpIn);
//printf("%d. %s\n", i, vecWords[i].c_str());
}
fclose(fpIn);
// 語法解析
// 首先要對括號進行匹配,驗證語法等
vector<string> vecClass; // 解析出類
vector<string> vecHeader; // 解析出頭文件
typedef struct
{
string funcName;
string paramList;
}stFunc;
vector<stFunc> vecFunc; // 解析出函數
for(int i = 0; i < vecWords.size(); i++)
{
if(vecWords[i] == "#include" )
{
// 語法分析會隨著詞法分析結果而變化
//if(i > 0)
//{
// // 沒被注釋掉的頭文件
// if(vecWords[i-1] != "//")
// {
// vecHeader.push_back(vecWords[i+1]);
// }
//}
vecHeader.push_back(vecWords[i+1]);
}
else if(vecWords[i] == "(")
{
// 前面是函數名, 後面是參數
if(i > 0)
{
if(g_mapSymbol.find(vecWords[i-1]) != g_mapSymbol.end())
{
// 說明是標准標示詞,不是函數
continue;
}
stFunc f;
f.funcName = vecWords[i-1];
string strParamList = "(";
while(vecWords[i++] != ")")
{
if(vecWords[i] == "(")
{
strParamList += "(";
while(vecWords[i++] != ")")
{
strParamList += " ";
strParamList += vecWords[i];
}
}
strParamList += " ";
strParamList += vecWords[i];
}
f.paramList = strParamList;
vecFunc.push_back(f);
}
}
else if(vecWords[i] == "()")
{
// 前面是函數名, 中間無參數
if(i > 0)
{
stFunc f;
f.funcName = vecWords[i-1];
f.paramList = "";
vecFunc.push_back(f);
}
}
else if(vecWords[i] == "class")
{
// 後一個是類名
string strClass = vecWords[i+1];
vecClass.push_back(strClass);
}
}
// Grammar 寫入文件
if ((fpIn= fopen("d:\\grammar.txt","w+"))== NULL)
{
printf("Cannot open infile.\n");
exit(0);
}
// 頭文件
for(int i = 0; i < vecHeader.size(); i++)
{
char szBuf[500];
memset(szBuf, 0, 500);
sprintf(szBuf, "%d. %s \n", i, vecHeader[i].c_str());
fwrite(szBuf, strlen(szBuf), 1, fpIn);
}
fwrite("\n\n\n", 2, 1, fpIn);
// 類名
for(int i = 0; i < vecClass.size(); i++)
{
char szBuf[500];
memset(szBuf, 0, 500);
sprintf(szBuf, "%d. %s \n", i, vecClass[i].c_str());
fwrite(szBuf, strlen(szBuf), 1, fpIn);
}
fwrite("\n\n\n", 2, 1, fpIn);
// 函數
for(int i = 0; i < vecFunc.size(); i++)
{
char szBuf[500];
memset(szBuf, 0, 500);
sprintf(szBuf, "%d. %s %s \n", i, vecFunc[i].funcName.c_str(), vecFunc[i].paramList.c_str());
fwrite(szBuf, strlen(szBuf), 1, fpIn);
//printf("%d. %s\n", i, vecWords[i].c_str());
}
fclose(fpIn);
// 解析完成
printf("解析完成...\n");
printf("詞法分析結果: d:\\words.txt 語法分析結果: d:\\grammar.txt, 請輸入任何鍵結束...\n");
getch();
return 0;
}
自己執行就可以看到了,如果有什麼問題,可以在下面提問
轉載請注明:出自 一雨田的專欄(http://blog.csdn.net/dylgsy)