C#获取网页源码问题 = = 20
http://passport.baidu.com/?business&un=a#0想获取这个的源码,用webclient和webrequest咋都不行呢==写的是win...
http://passport.baidu.com/?business&un=a#0 想获取这个的源码,用webclient和webrequest咋都不行呢= =写的是winform的程序
展开
2个回答
展开全部
///GetPageHtml.aspx.cs
using System;
using System.Collections;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Web;
using System.Web.SessionState;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.HtmlControls;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
public partial class LeadInBlog_GetPageHtml : System.Web.UI.Page
{
private string PageUrl = "";
protected void Page_Load(object sender, EventArgs e)
{
}
private void GetText_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
ContentHtml.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
ContentHtml.Text = Regex.Replace(ContentHtml.Text, "<[^>]*>", "");
//替换空格
ContentHtml.Text = Regex.Replace(ContentHtml.Text, "\\s+", " ");
}
protected void WebClientButton_Click1(object sender, EventArgs e)
{
PageUrl = UrlText.Text;
WebClient wc = new WebClient();
wc.Credentials = CredentialCache.DefaultCredentials;
///方法一:
//Byte[] pageData = wc.DownloadData(PageUrl);
//ContentHtml.Text = Encoding.Default.GetString(pageData);
/没念/猛姿/ 方法枯知困二:
/// ***************代码开始**********
Stream resStream = wc.OpenRead(PageUrl);
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
ContentHtml.Text = sr.ReadToEnd();
resStream.Close();
/// **************代码结束********
wc.Dispose();
}
protected void WebRequestButton_Click(object sender, EventArgs e)
{
PageUrl = UrlText.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.UTF8);
ContentHtml.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
}
protected void Button1_Click(object sender, EventArgs e)
{
//根据Url地址得到网页的html源码
string strResult = "";
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(UrlText.Text);
//声明一个HttpWebRequest请求
request.Timeout = 30000;
//设置连接超时时间
request.Headers.Set("Pragma", "no-cache");
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream streamReceive = response.GetResponseStream();
Encoding encoding = Encoding.GetEncoding("GB2312");
StreamReader streamReader = new StreamReader(streamReceive, encoding);
strResult = streamReader.ReadToEnd();
}
catch
{
}
ContentHtml.Text = strResult;
}
}
///GetPageHtml.aspx
<%@ Page Language="C#" AutoEventWireup="true" CodeFile="GetPageHtml.aspx.cs" Inherits="LeadInBlog_GetPageHtml" ValidateRequest="false" %>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" >
<html>
<head>
<title>得到网页源代码</title>
</head>
<body MS_POSITIONING="GridLayout">
<form id="aspNetBuffer" method="post" runat="server">
<div align="center" style="FONT-WEIGHT: bold">得到任意网页源代码</div>
<asp:TextBox id="UrlText" runat="server" Width="400px">http://blog.sina.com.cn/rss/xujinglei.xml</asp:TextBox>
<asp:Button id="WebClientButton" Runat="server" Text="用WebClient得到" OnClick="WebClientButton_Click1"></asp:Button>
<asp:Button id="WebRequestButton" runat="server" Text="用WebRequest得到" OnClick="WebRequestButton_Click"></asp:Button>
<asp:Button ID="Button1" runat="server" Text="Button" OnClick="Button1_Click" />
<br />
<asp:TextBox id="ContentHtml" runat="server" Width="100%" Height="360px" TextMode="MultiLine"></asp:TextBox>
</form>
</body>
</html>
using System;
using System.Collections;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Web;
using System.Web.SessionState;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.HtmlControls;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
public partial class LeadInBlog_GetPageHtml : System.Web.UI.Page
{
private string PageUrl = "";
protected void Page_Load(object sender, EventArgs e)
{
}
private void GetText_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
ContentHtml.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
ContentHtml.Text = Regex.Replace(ContentHtml.Text, "<[^>]*>", "");
//替换空格
ContentHtml.Text = Regex.Replace(ContentHtml.Text, "\\s+", " ");
}
protected void WebClientButton_Click1(object sender, EventArgs e)
{
PageUrl = UrlText.Text;
WebClient wc = new WebClient();
wc.Credentials = CredentialCache.DefaultCredentials;
///方法一:
//Byte[] pageData = wc.DownloadData(PageUrl);
//ContentHtml.Text = Encoding.Default.GetString(pageData);
/没念/猛姿/ 方法枯知困二:
/// ***************代码开始**********
Stream resStream = wc.OpenRead(PageUrl);
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
ContentHtml.Text = sr.ReadToEnd();
resStream.Close();
/// **************代码结束********
wc.Dispose();
}
protected void WebRequestButton_Click(object sender, EventArgs e)
{
PageUrl = UrlText.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.UTF8);
ContentHtml.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
}
protected void Button1_Click(object sender, EventArgs e)
{
//根据Url地址得到网页的html源码
string strResult = "";
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(UrlText.Text);
//声明一个HttpWebRequest请求
request.Timeout = 30000;
//设置连接超时时间
request.Headers.Set("Pragma", "no-cache");
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream streamReceive = response.GetResponseStream();
Encoding encoding = Encoding.GetEncoding("GB2312");
StreamReader streamReader = new StreamReader(streamReceive, encoding);
strResult = streamReader.ReadToEnd();
}
catch
{
}
ContentHtml.Text = strResult;
}
}
///GetPageHtml.aspx
<%@ Page Language="C#" AutoEventWireup="true" CodeFile="GetPageHtml.aspx.cs" Inherits="LeadInBlog_GetPageHtml" ValidateRequest="false" %>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" >
<html>
<head>
<title>得到网页源代码</title>
</head>
<body MS_POSITIONING="GridLayout">
<form id="aspNetBuffer" method="post" runat="server">
<div align="center" style="FONT-WEIGHT: bold">得到任意网页源代码</div>
<asp:TextBox id="UrlText" runat="server" Width="400px">http://blog.sina.com.cn/rss/xujinglei.xml</asp:TextBox>
<asp:Button id="WebClientButton" Runat="server" Text="用WebClient得到" OnClick="WebClientButton_Click1"></asp:Button>
<asp:Button id="WebRequestButton" runat="server" Text="用WebRequest得到" OnClick="WebRequestButton_Click"></asp:Button>
<asp:Button ID="Button1" runat="server" Text="Button" OnClick="Button1_Click" />
<br />
<asp:TextBox id="ContentHtml" runat="server" Width="100%" Height="360px" TextMode="MultiLine"></asp:TextBox>
</form>
</body>
</html>
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询