帮忙编写个例子,C#提取网页指定文字信息,比如现在我的这个提问,提取出来显示在textbox1中,非常感谢! 20
展开全部
using System;
using System.Net;
using System.IO;
/// <summary>
/// DataCollection 的摘要说明
/// </summary>
public class DataCollection
{
public string HTML = "";//成员变量,保存HTML代码
public DataCollection()
{
}
/// <summary>
/// 设定指定URL页面代码
/// </summary>
/// <param name="vUrl">指定URL</param>
/// <returns>返回成功信息</returns>
public int setHtmlString(string vUrl)
{
int iRet = 0;
try
{
WebRequest wreq = WebRequest.Create(vUrl);
HttpWebResponse wresp = (HttpWebResponse)wreq.GetResponse();
Stream s = wresp.GetResponseStream();
StreamReader objReader = new StreamReader(s, System.Text.Encoding.GetEncoding("GB2312"));
string sLine = "";
int i = 0;
while (sLine != null)
{
i++;
sLine = objReader.ReadLine();
if (sLine != null)
{
HTML += sLine;
}
}
int c = HTML.Length;
iRet = 1;
}
catch(Exception ex)
{
iRet = -1;
}
return iRet;
}
/// <summary>
/// 设定指定页面代码(重载)
/// </summary>
/// <param name="content">指定页面代码</param>
/// <param name="i"></param>
public void setHtmlString(string content,int i)
{
HTML = content;
}
/// <summary>
/// 返回指定标签在指定页面代码的索引
/// </summary>
/// <param name="vHtml">指定页面代码</param>
/// <param name="vSign">指定标签</param>
/// <returns>返回索引</returns>
public int[] getSignIndex(string vHtml, string vSign)
{
int sum = 0;
for (int i = 0; i < vHtml.Length - vSign.Length; i++)
{
//if (i == 22715)
//{
// string c = vHtml.Substring(i, vSign.Length);
//}
if (vSign.Equals(vHtml.Substring(i, vSign.Length)))
{
sum++;
}
}
int[] index = new int[sum];
sum = 0;
for (int i = 0; i < vHtml.Length - vSign.Length; i++)
{
if (vSign.Equals(vHtml.Substring(i, vSign.Length)))
{
index[sum] = i + vSign.Length;
sum++;
}
}
return index;
}
/// <summary>
/// 返回指定标签在指定页面代码的索引(重载)
/// </summary>
/// <param name="vHtml">指定页面代码</param>
/// <param name="vSign">指定标签</param>
/// <param name="startIndex">指定开始索引</param>
/// <returns>返回索引</returns>
public int[] getSignIndex(string vHtml, string vSign, int[] startIndex)
{
int[] index = new int[startIndex.Length];
for (int k = 0; k < startIndex.Length; k++)
{
int sum = 0;
for (int i = startIndex[k]; i < vHtml.Length - vSign.Length; i++)
{
if (vSign.Equals(vHtml.Substring(i, vSign.Length)))
{
if (sum == 0)
{
index[k] = i;
}
sum++;
}
}
}
return index;
}
/// <summary>
/// 删除指定的标签
/// </summary>
/// <param name="content">指定内容</param>
/// <param name="sign">指定标签</param>
/// <returns>返回删除后的内容</returns>
public string[] deleteSign(string[] content,string sign)
{
for (int i = 0; i < content.Length; i++)
{
content[i] = content[i].Replace(sign, "");
}
return content;
}
/// <summary>
/// 通过指定开始结束标签得到中间的字符串
/// </summary>
/// <param name="startSign">开始标签</param>
/// <param name="endSign">结束标签</param>
/// <returns>返回字符串</returns>
public string[] getString(string startSign, string endSign)
{
int[] start = getSignIndex(HTML, startSign);
int[] end = getSignIndex(HTML,endSign, start);
string[] msg = new string[start.Length];
for (int i = 0; i < msg.Length; i++)
{
msg[i] = HTML.Substring(start[i], end[i] - start[i]);
}
return msg;
}
}
using System.Net;
using System.IO;
/// <summary>
/// DataCollection 的摘要说明
/// </summary>
public class DataCollection
{
public string HTML = "";//成员变量,保存HTML代码
public DataCollection()
{
}
/// <summary>
/// 设定指定URL页面代码
/// </summary>
/// <param name="vUrl">指定URL</param>
/// <returns>返回成功信息</returns>
public int setHtmlString(string vUrl)
{
int iRet = 0;
try
{
WebRequest wreq = WebRequest.Create(vUrl);
HttpWebResponse wresp = (HttpWebResponse)wreq.GetResponse();
Stream s = wresp.GetResponseStream();
StreamReader objReader = new StreamReader(s, System.Text.Encoding.GetEncoding("GB2312"));
string sLine = "";
int i = 0;
while (sLine != null)
{
i++;
sLine = objReader.ReadLine();
if (sLine != null)
{
HTML += sLine;
}
}
int c = HTML.Length;
iRet = 1;
}
catch(Exception ex)
{
iRet = -1;
}
return iRet;
}
/// <summary>
/// 设定指定页面代码(重载)
/// </summary>
/// <param name="content">指定页面代码</param>
/// <param name="i"></param>
public void setHtmlString(string content,int i)
{
HTML = content;
}
/// <summary>
/// 返回指定标签在指定页面代码的索引
/// </summary>
/// <param name="vHtml">指定页面代码</param>
/// <param name="vSign">指定标签</param>
/// <returns>返回索引</returns>
public int[] getSignIndex(string vHtml, string vSign)
{
int sum = 0;
for (int i = 0; i < vHtml.Length - vSign.Length; i++)
{
//if (i == 22715)
//{
// string c = vHtml.Substring(i, vSign.Length);
//}
if (vSign.Equals(vHtml.Substring(i, vSign.Length)))
{
sum++;
}
}
int[] index = new int[sum];
sum = 0;
for (int i = 0; i < vHtml.Length - vSign.Length; i++)
{
if (vSign.Equals(vHtml.Substring(i, vSign.Length)))
{
index[sum] = i + vSign.Length;
sum++;
}
}
return index;
}
/// <summary>
/// 返回指定标签在指定页面代码的索引(重载)
/// </summary>
/// <param name="vHtml">指定页面代码</param>
/// <param name="vSign">指定标签</param>
/// <param name="startIndex">指定开始索引</param>
/// <returns>返回索引</returns>
public int[] getSignIndex(string vHtml, string vSign, int[] startIndex)
{
int[] index = new int[startIndex.Length];
for (int k = 0; k < startIndex.Length; k++)
{
int sum = 0;
for (int i = startIndex[k]; i < vHtml.Length - vSign.Length; i++)
{
if (vSign.Equals(vHtml.Substring(i, vSign.Length)))
{
if (sum == 0)
{
index[k] = i;
}
sum++;
}
}
}
return index;
}
/// <summary>
/// 删除指定的标签
/// </summary>
/// <param name="content">指定内容</param>
/// <param name="sign">指定标签</param>
/// <returns>返回删除后的内容</returns>
public string[] deleteSign(string[] content,string sign)
{
for (int i = 0; i < content.Length; i++)
{
content[i] = content[i].Replace(sign, "");
}
return content;
}
/// <summary>
/// 通过指定开始结束标签得到中间的字符串
/// </summary>
/// <param name="startSign">开始标签</param>
/// <param name="endSign">结束标签</param>
/// <returns>返回字符串</returns>
public string[] getString(string startSign, string endSign)
{
int[] start = getSignIndex(HTML, startSign);
int[] end = getSignIndex(HTML,endSign, start);
string[] msg = new string[start.Length];
for (int i = 0; i < msg.Length; i++)
{
msg[i] = HTML.Substring(start[i], end[i] - start[i]);
}
return msg;
}
}
已赞过
已踩过<
评论
收起
你对这个回答的评价是?
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询