最近經常會模擬網頁提交返回網頁源碼,然後獲得網頁中相應的元素,於是需要常常解析Html中相應的各種元素,網絡是個好東西,搜索一番,就找到了好幾個Delphi版本的HtmlParser的類庫,試著使用了幾個,發現解析起來都不完整,或多或少的回出現一些問題!於是想到了如果界面上有一個浏覽器,我們可以通過WebBrowser的Document接口對網頁元素進行操作,很是方便!但是模擬網頁提交,界面上是不一定要出現 WebBrowser的,肯定有辦法,不通過WebBrowser就直接解析HTML的,那便是我不要WebBrowser這個外殼,只要他裡面的 Document文檔接口對象就能實現對Html的解析了,查找了一番MSDN,然後Google一下,果然可行,構建方法如下:
//創建IHtmlDocument2接口
CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc);
接口創建好了之後就能夠對文檔元素進行解析了,很是爽快!
結合了我自己的特有操作,我對Combobox,Table,Frame等一些網頁元素做了相應的封裝,實現了一個HtmlParser,大致代碼如下:
這裡只給出聲明,代碼請在最後下載
代碼
(******************************************************)
(* 得閒工作室 *)
(* 網頁元素操作類庫 *)
(* *)
(* DxHtmlElement Unit *)
(* Copyright(c) 2008-2010 不得閒 *)
(* email:[email protected] QQ:75492895 *)
(******************************************************)
unit DxHtmlElement;
interface
uses Windows,sysUtils,Clipbrd,MSHtml,ActiveX,OleCtrls,Graphics,TypInfo;
{Get EleMent Type}
function IsSelectElement(eleElement: IHtmlElement): Boolean;
function IsPwdElement(eleElement: IHtmlElement): Boolean;
function IsTextElement(element: IHtmlElement): boolean;
function IsTableElement(element: IHtmlElement): Boolean;
function IsElementCollection(element: IHtmlElement): Boolean;
function IsChkElement(element: IHtmlElement): boolean;
function IsRadioBtnElement(element: IHtmlElement): boolean;
function IsMemoElement(element: IHtmlElement): boolean;
function IsFormElement(element: IHtmlElement): boolean;
function IsIMGElement(element: IHtmlElement): boolean;
function IsInIMGElement(element: IHtmlElement): boolean;
function IsLabelElement(element: IHtmlElement): boolean;
function IsLinkElement(element: IHtmlElement): boolean;
function IsListElement(element: IHtmlElement): boolean;
function IsControlElement(element: IHtmlElement): boolean;
function IsObjectElement(element: IHtmlElement): boolean;
function IsFrameElement(element: IHtmlElement): boolean;
function IsInPutBtnElement(element: IHtmlElement): boolean;
function IsInHiddenElement(element: IHtmlElement): boolean;
function IsSubmitElement(element: IHtmlElement): boolean;
{Get ImgElement Data}
function GetPicIndex(doc: IHtmlDocument2; Src: string; Alt: string): Integer;
function GetPicElement(doc: IHTMLDocument2;imgName: string;src: string;Alt: string): IHtmlImgElement;
function GetRegCodePic(doc: IHtmlDocument2;ImgName: string; Src: string; Alt: string): TPicture; overload;
function GetRegCodePic(doc: IHtmlDocument2;Index: integer): TPicture; overload;
function GetRegCodePic(doc: IHTMLDocument2;element: IHtmlIMGElement): TPicture;overload;
type
TObjectFromLResult = function(LRESULT: lResult;const IID: TIID; WPARAM: wParam;out pObject): HRESULT; stdcall;
TEleMentType = (ELE_UNKNOW,ELE_TEXT,ELE_PWD,ELE_SELECT,ELE_CHECKBOX,ELE_RADIOBTN,ELE_MEMO,ELE_FORM,ELE_IMAGE,
ELE_LABEL,ELE_LINK,ELE_LIST,ELE_CONTROL,ELE_OBJECT,ELE_FRAME,ELE_INPUTBTN,ELE_INIMAGE,ELE_INHIDDEN);
function GetElementType(element: IHtmlELEMENT): TEleMentType;
function GetElementTypeName(element: IHtmlELEMENT): string;
function GetHtmlTableCell(aTable: IHTMLTable;aRow,aCol: Integer): IHtmlElement;
function GetHtmlTable(aDoc: IHTMLDocument2; aIndex: Integer): IHtmlTable;
function GetWebBrowserHtmlTableCellText(Doc: IHtmlDocument2;
const TableIndex, RowIndex, ColIndex: Integer;var ResValue: string): Boolean;
function GetHtmlTableRowHtml(aTable: IHTMLTable; aRow: Integer): IHtmlElement;
function GetWebBrowserHtmlTableCellHtml(Doc: IHtmlDocument2;
const TableIndex,RowIndex,ColIndex: Integer;var ResValue: string): Boolean;
function GeHtmlTableHtml(aTable: IHTMLTable; aRow: Integer): IHtmlElement;
function GetWebBrowserHtmlTableHtml(Doc: IHtmlDocument2;
const TableIndex,RowIndex: Integer;var ResValue: string): Boolean;
type
TDxWebFrameCollection = class;
TDxWebElementCollection = class;
TLoadState = (Doc_Loading,Doc_Completed,Doc_Invalidate);
TDxWebFrame = class
private
FFrame: IHtmlWINDOW2;
FElementCollections: TDxWebElementCollection;
FWebFrameCollections: TDxWebFrameCollection;
function GetSrc: string;
function GetElementCount: integer;
function GetWebFrameCollections: TDxWebFrameCollection;
function GetElementCollections: TDxWebElementCollection;
function GetDocument: IHtmlDOCUMENT2;
function GetReadState: TLoadState;
function GetIsLoaded: boolean;
procedure SetFrame(const Value: IHtmlWINDOW2);
function GetName: string;
public
Constructor Create(IFrame: IHtmlWINDOW2);
Destructor Destroy;override;
property Frame: IHtmlWINDOW2 read FFrame write SetFrame;
property Src: string read GetSrc;
property Document: IHtmlDOCUMENT2 read GetDocument;
property Name: string read GetName;
property Frames: TDxWebFrameCollection read GetWebFrameCollections;
property ElementCount: integer read GetElementCount;
property ElementCollections: TDxWebElementCollection read GetElementCollections;
property ReadyState: TLoadState read GetReadState;
property IsLoaded: boolean read GetIsLoaded;
end;
TDxWebFrameCollection = Class
private
FFrameCollection: IHtmlFramesCollection2;
Frame: TDxWebFrame;
function GetCount: integer;
function GetFrameInterfaceByIndex(index: integer): IHtmlWINDOW2;
function GetFrameInterfaceByName(Name: string): IHtmlWINDOW2;
function GetFrameByIndex(index: integer): TDxWebFrame;
function GetFrameByName(Name: string): TDxWebFrame;
procedure SetFrameCollection(const Value: IHtmlFramesCollection2);
public
Constructor Create(ACollection: IHtmlFramesCollection2);
Destructor Destroy;override;
property FrameCollection: IHtmlFramesCollection2 read FFrameCollection write SetFrameCollection;
property Count: integer read GetCount;
property FrameInterfaceByIndex[index: integer]: IHtmlWINDOW2 read GetFrameInterfaceByIndex;
property FrameInterfaceByName[Name: string]: IHtmlWINDOW2 read GetFrameInterfaceByName;
property FrameByIndex[index: integer]: TDxWebFrame read GetFrameByIndex;
property FrameByName[Name: string]: TDxWebFrame read GetFrameByName;
end;
TDxWebElementCollection = class
private
FCollection: IHtmlElementCollection;
FChildCollection: TDxWebElementCollection;
function GetCollection(index: String): TDxWebElementCollection;
function GetCount: integer;
function GetElement(itemName: string; index: integer): IHtmlElement;
function GetElementByName(itemName: string): IHtmlELEMENT;
function GetElementByIndex(index: integer): IHtmlELEMENT;
procedure SetCollection(const Value: IHtmlElementCollection);
public
Constructor Create(ACollection: IHtmlElementCollection);
Destructor Destroy;override;
property Collection: IHtmlElementCollection read FCollection write SetCollection;
property ChildElementCollection[index: String]: TDxWebElementCollection read GetCollection;
property ElementCount: integer read GetCount;
property Element[itemName: string;index: integer]: IHtmlElement read GetElement;
property ElementByName[itemName: string]: IHtmlELEMENT read GetElementByName;
property ElementByIndex[index: integer]: IHtmlELEMENT read GetElementByIndex;
end;
TLinkCollection = class(TDxWebElementCollection)
end;
TDxWebTable = class;
TDxTableCollection = class
private
FTableCollection: IHtmlElementCollection;
FDocument: IHtmlDOCUMENT2;
FWebTable: TDxWebTable;
function GetTableInterfaceByName(AName: string): IHtmlTABLE;
procedure SetDocument(Value: IHtmlDOCUMENT2);
function GetTableInterfaceByIndex(index: integer): IHtmlTABLE;
function GetCount: integer;
function GetTableByIndex(index: integer): TDxWebTable;
function GetTableByName(AName: string): TDxWebTable;
public
Constructor Create(Doc: IHtmlDOCUMENT2);
destructor Destroy;override;
property TableInterfaceByName[AName: string]: IHtmlTABLE read GetTableInterfaceByName;
property TableInterfaceByIndex[index: integer]: IHtmlTABLE read GetTableInterfaceByIndex;
property TableByName[AName: string]: TDxWebTable read GetTableByName;
property TableByIndex[index: integer]: TDxWebTable read GetTableByIndex;
property Document: IHtmlDOCUMENT2 read FDocument write SetDocument;
property Count: integer read GetCount;
end;
TDxWebTable = class
private
FTableInterface: IHtmlTABLE;
function GetRowCount: integer;
procedure SetTableInterface(const Value: IHtmlTABLE);
function GetCell(ACol, ARow: integer): string;
function GetRowColCount(RowIndex: integer): integer;
function GetInnerHtml: string;
function GetInnerText: string;
function GetCellElement(ACol, ARow: Integer): IHtmlTableCell;
public
Constructor Create(ATable: IHtmlTABLE);
property TableInterface: IHtmlTABLE read FTableInterface write SetTableInterface;
property RowCount: integer read GetRowCount;
property Cell[ACol: integer;ARow: integer]: string read GetCell;
property CellElement[ACol: Integer;ARow: Integer]: IHtmlTableCell read GetCellElement;
property RowColCount[RowIndex: integer]: integer read GetRowColCount;
property InnerHtml: string read GetInnerHtml;
property InnerText: string read GetInnerText;
end;
TDxWebCombobox = class
private
FHtmlSelect: IHtmlSelectElement;
function GetCount: Integer;
procedure SetItemIndex(const Value: Integer);
function GetItemIndex: Integer;
function GetName: string;
procedure SetName(const Value: string);
function GetValue: string;
procedure SetValue(const Value: string);
procedure SetCombInterface(const Value: IHtmlSelectElement);
function GetItemByName(EleName: string): string;
function GetItemByIndex(index: integer): string;
function GetItemAttribute(index: Integer; AttribName: string): OleVariant;
public
constructor Create(AWebCombo: IHtmlSelectElement);
procedure Add(Ele: IHtmlElement);
procedure Insert(Ele: IHtmlElement;Index: Integer);
procedure Remove(index: Integer);
property CombInterface: IHTMLSelectElement read FHtmlSelect write SetCombInterface;
property Count: Integer read GetCount;
property ItemIndex: Integer read GetItemIndex write SetItemIndex;
property ItemByIndex[index: integer]: string read GetItemByIndex;
property ItemByName[EleName: string]: string read GetItemByName;
property ItemAttribute[index: Integer;AttribName: string]: OleVariant read GetItemAttribute;
property Name: string read GetName write SetName;
property value: string read GetValue write SetValue;
end;
implementation
end.
HtmlParser解析類的代碼實現單元
代碼
(******************************************************)
(* 得閒工作室 *)
(* Html解析單元庫 *)
(* *)
(* DxHtmlParser Unit *)
(* Copyright(c) 2008-2010 不得閒 *)
(* email:[email protected] QQ:75492895 *)
(******************************************************)
unit DxHtmlParser;
interface
uses Windows,MSHTML,ActiveX,DxHtmlElement,Forms;
type
TDxHtmlParser = class
private
FHtmlDoc: IHtmlDocument2;
FHtml: string;
FWebTables: TDxTableCollection;
FWebElements: TDxWebElementCollection;
FWebComb: TDxWebCombobox;
procedure SetHtml(const Value: string);
function GetWebCombobox(AName: string): TDxWebCombobox;
public
constructor Create;
destructor Destroy;override;
property HTML: string read FHTML write SetHtml;
property WebTables: TDxTableCollection read FWebTables;
property WebElements: TDxWebElementCollection read FWebElements;
property WebCombobox[Name: string]: TDxWebCombobox read GetWebCombobox;
end;
implementation
{ TDxHtmlParser }
constructor TDxHtmlParser.Create;
begin
CoInitialize(nil);
//創建IHtmlDocument2接口
CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc);
Assert(FHtmlDoc<>nil,'構建HtmlDocument接口失敗');
FHtmlDoc.Set_designMode('On'); //設置為設計模式,不執行腳本
while not (FHtmlDoc.readyState = 'complete') do
begin
sleep(1);
Application.ProcessMessages;
end;
FWebTables := TDxTableCollection.Create(FHtmlDoc);
FWebElements := TDxWebElementCollection.Create(nil);
FWebComb := TDxWebCombobox.Create(nil);
end;
destructor TDxHtmlParser.Destroy;
begin
FWebTables.Free;
FWebElements.Free;
FWebComb.Free;
CoUninitialize;
inherited;
end;
function TDxHtmlParser.GetWebCombobox(AName: string): TDxWebCombobox;
begin
if FWebElements.Collection <> nil then
begin
FWebComb.CombInterface := FWebElements.ElementByName[AName] as IHtmlSelectElement;
Result := FWebComb;
end
else Result := nil;
end;
procedure TDxHtmlParser.SetHtml(const Value: string);
begin
if FHtml <> Value then
begin
FHtml := Value;
FHtmlDoc.body.innerHTML := FHtml;
FWebElements.Collection := FHtmlDoc.all;
end;
end;
end.
本文示例源代碼或素材下載