C# HttpWebRequest 请求网页时候,怎么判断网页编码
用httpwebrequesthttpwebresponse请求得到网页,然后用System.IO.StreamReadersr=newSystem.IO.StreamR...
用httpwebrequest httpwebresponse 请求得到网页,然后用 System.IO.StreamReader sr = new System.IO.StreamReader(s, myEncoding);读取。我遇到过,用utf-8编码,有时候会遇到网页源出现中文是乱码的现象。如果改为gb2312,就会正常,但是其他网页不知道适应不。怎么自动识别呢
展开
2个回答
展开全部
先判断Response的Content-Type中的charset数据。如果不行,再判断http的meta中的content-type中的charset数据。再不行,只能测试是不是某些主流编码了(例如:UTF-8,GB2312,GBK之类的)。我这里的代码没有测试主流编码,只实现了前面的判断。
static void Main(string[] args)
{
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(/*要获取源码的网站*/);
HttpWebResponse reponse = (HttpWebResponse)request.GetResponse();
string contentType = reponse.Headers["Content-Type"];
Encoding encoding = null;
Regex regex = new Regex("charset\\s*=\\s*(\\S+)", RegexOptions.IgnoreCase);
Match match = null;
if (contentType != null)
{
match = regex.Match(contentType);
if (match.Success)
{
try
{
encoding = Encoding.GetEncoding(match.Groups[1].Value.Trim());
using (TextReader reader = new StreamReader(reponse.GetResponseStream(), encoding))
{
string str = reader.ReadToEnd();
Console.WriteLine(str);
}
}
catch (Exception exx)
{
Console.WriteLine(exx);
}
}
}
if (contentType == null || (!match.Success))
{
using (TextReader reader = new StreamReader(reponse.GetResponseStream(), Encoding.Default))
{
string str = reader.ReadToEnd();
regex = new Regex("<\\s*meta.+charset\\s*=\\s*(\\S+)\\s*\"", RegexOptions.IgnoreCase);
match = regex.Match(str);
if (match.Success)
{
try
{
encoding = Encoding.GetEncoding(match.Groups[1].Value.Trim());
str = encoding.GetString(Encoding.Default.GetBytes(str));
Console.WriteLine(str);
}
catch (Exception exx)
{
Console.WriteLine(exx);
}
}
}
}
Console.ReadKey();
}
static void Main(string[] args)
{
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(/*要获取源码的网站*/);
HttpWebResponse reponse = (HttpWebResponse)request.GetResponse();
string contentType = reponse.Headers["Content-Type"];
Encoding encoding = null;
Regex regex = new Regex("charset\\s*=\\s*(\\S+)", RegexOptions.IgnoreCase);
Match match = null;
if (contentType != null)
{
match = regex.Match(contentType);
if (match.Success)
{
try
{
encoding = Encoding.GetEncoding(match.Groups[1].Value.Trim());
using (TextReader reader = new StreamReader(reponse.GetResponseStream(), encoding))
{
string str = reader.ReadToEnd();
Console.WriteLine(str);
}
}
catch (Exception exx)
{
Console.WriteLine(exx);
}
}
}
if (contentType == null || (!match.Success))
{
using (TextReader reader = new StreamReader(reponse.GetResponseStream(), Encoding.Default))
{
string str = reader.ReadToEnd();
regex = new Regex("<\\s*meta.+charset\\s*=\\s*(\\S+)\\s*\"", RegexOptions.IgnoreCase);
match = regex.Match(str);
if (match.Success)
{
try
{
encoding = Encoding.GetEncoding(match.Groups[1].Value.Trim());
str = encoding.GetString(Encoding.Default.GetBytes(str));
Console.WriteLine(str);
}
catch (Exception exx)
{
Console.WriteLine(exx);
}
}
}
}
Console.ReadKey();
}
推荐律师服务:
若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询