怎么通过IHTMLDocument2提取网页中的Table表格数据
先下载一个mshtml.dll
主要逻辑如下:
//得到table的对象
IHTMLDocument2 document = webBrowser1.Document.DomDocument as IHTMLDocument2;
IHTMLElementCollection tables = (IHTMLElementCollection)document.all.tags("TABLE");
if (tables.item() == null)
{
//根据franme的name得到对象
HtmlDocument htmlDoc = webBrowser1.Document;
htmlDoc = webBrowser1.Document.Window.Frames["frmRpt"].Document;
string frameHtml = webBrowser1.Document.Window.Frames["frmRpt"].Document.Body.InnerHtml;
//得到所有的franme对象
HTMLDocument doc = (HTMLDocument)webBrowser1.Document.DomDocument;
object j;
for (int i = 0; i < doc.parentWindow.frames.length; i++)
{
j = i;
HTMLWindow2 frame = doc.parentWindow.frames.item(ref j) as HTMLWindow2;
//if (frame.name == "frmRpt")
//{
IHTMLElementCollection tabless = (IHTMLElementCollection)frame.document.all.tags("TABLE");
if (tabless.item() != null)
{
foreach (IHTMLTable table in tabless)
{
IHTMLElementCollection tts = table.rows;//因为TH标签是属于表格的表头信息;
foreach (IHTMLElement th in tts)
{
foreach (IHTMLElement td in th.children)
{
string ts = td.innerHTML;
}
}
}
// }
}
}
}
else
{
foreach (IHTMLTable table in tables)
{
IHTMLElementCollection tts = table.rows;//因为TH标签是属于表格的表头信息;
foreach (IHTMLElement th in tts)
{
foreach (IHTMLElement td in th.children)
{
string ts = td.innerHTML;
}
}
}
}