WebClient webClient = new WebClient();
webClient.Credentials = CredentialCache.DefaultCredentials;
Byte[] pageData = webClient.DownloadData(url);
string pageHtml = Encoding.GetEncoding("utf8").GetString(pageData);
英文没有错,而中文出来的竟然是“%3Fq%3D%25E6%25B7%25B1%25E5%259C%25B3%2B”这样的utf的转义符号
我应该怎么下载才能得到它的中文呢?
8 个解决方案
#1
又或者说,我下载了这样的字符下来,应该怎么转换回中文字呢?
#2
你可以用“HttpUtility.UrlDecode”试试,例如:
Byte[] pageData = webClient.DownloadData(url);
string pageData = HttpUtility.UrlDecode( pageData, Encoding.Utf8 );
Byte[] pageData = webClient.DownloadData(url);
string pageData = HttpUtility.UrlDecode( pageData, Encoding.Utf8 );
#3
up
#4
不行,试过了
#5
以下这样是不行的,不知道为什么……
string url = @"http://www.google.com/search?hl=en&q=%E4%B8%AD%E5%9B%BD";
WebClient client = new WebClient();
Byte[] pageData = client.DownloadData(url);
string pageHtml = HttpUtility.UrlDecode(pageData, Encoding.UTF8);
Console.Write(pageHtml);
string url = @"http://www.google.com/search?hl=en&q=%E4%B8%AD%E5%9B%BD";
WebClient client = new WebClient();
Byte[] pageData = client.DownloadData(url);
string pageHtml = HttpUtility.UrlDecode(pageData, Encoding.UTF8);
Console.Write(pageHtml);
#6
?? 星期天大家都休息了吗?
#7
public string CatchHtml(string sourceUrl)
{
HttpWebRequest myHttpWebRequest=(HttpWebRequest)WebRequest.Create(sourceUrl);
myHttpWebRequest.Accept=@"image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";
myHttpWebRequest.UserAgent=@"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; Alexa Toolbar)";
myHttpWebRequest.Headers.Add("Cookie","B=0drjb7t2k48s4&b=3&s=m0; Q=q1=AAAAAAAAAAAAAA--&q2=RUIiOg--; CN_FP_TAB=todaytabs%3Dtodaytabscon%2C%2Copen%2Cend%7Cschtabs%3Dschtabscon%2C%2Copen%2Cend%7Ccdntabs%3Dcdntabscon%2C%2Copen%2Cend");
myHttpWebRequest.Timeout = 20000;
myHttpWebRequest.Accept = "zh-cn";
try
{
HttpWebResponse myHttpWebResponse=(HttpWebResponse)myHttpWebRequest.GetResponse();
Stream streamResponse=myHttpWebResponse.GetResponseStream();
StreamReader streamRead = new StreamReader(streamResponse,Encoding.Default);
string htmlcode=streamRead.ReadToEnd();
streamRead.Close();
streamResponse.Close();
myHttpWebResponse.Close();
return htmlcode;
}
catch(Exception e)
{
return "======= ERR ========= \r\n" + e.Message.ToString();
}
}
{
HttpWebRequest myHttpWebRequest=(HttpWebRequest)WebRequest.Create(sourceUrl);
myHttpWebRequest.Accept=@"image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";
myHttpWebRequest.UserAgent=@"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; Alexa Toolbar)";
myHttpWebRequest.Headers.Add("Cookie","B=0drjb7t2k48s4&b=3&s=m0; Q=q1=AAAAAAAAAAAAAA--&q2=RUIiOg--; CN_FP_TAB=todaytabs%3Dtodaytabscon%2C%2Copen%2Cend%7Cschtabs%3Dschtabscon%2C%2Copen%2Cend%7Ccdntabs%3Dcdntabscon%2C%2Copen%2Cend");
myHttpWebRequest.Timeout = 20000;
myHttpWebRequest.Accept = "zh-cn";
try
{
HttpWebResponse myHttpWebResponse=(HttpWebResponse)myHttpWebRequest.GetResponse();
Stream streamResponse=myHttpWebResponse.GetResponseStream();
StreamReader streamRead = new StreamReader(streamResponse,Encoding.Default);
string htmlcode=streamRead.ReadToEnd();
streamRead.Close();
streamResponse.Close();
myHttpWebResponse.Close();
return htmlcode;
}
catch(Exception e)
{
return "======= ERR ========= \r\n" + e.Message.ToString();
}
}
#8
顶!
#1
又或者说,我下载了这样的字符下来,应该怎么转换回中文字呢?
#2
你可以用“HttpUtility.UrlDecode”试试,例如:
Byte[] pageData = webClient.DownloadData(url);
string pageData = HttpUtility.UrlDecode( pageData, Encoding.Utf8 );
Byte[] pageData = webClient.DownloadData(url);
string pageData = HttpUtility.UrlDecode( pageData, Encoding.Utf8 );
#3
up
#4
不行,试过了
#5
以下这样是不行的,不知道为什么……
string url = @"http://www.google.com/search?hl=en&q=%E4%B8%AD%E5%9B%BD";
WebClient client = new WebClient();
Byte[] pageData = client.DownloadData(url);
string pageHtml = HttpUtility.UrlDecode(pageData, Encoding.UTF8);
Console.Write(pageHtml);
string url = @"http://www.google.com/search?hl=en&q=%E4%B8%AD%E5%9B%BD";
WebClient client = new WebClient();
Byte[] pageData = client.DownloadData(url);
string pageHtml = HttpUtility.UrlDecode(pageData, Encoding.UTF8);
Console.Write(pageHtml);
#6
?? 星期天大家都休息了吗?
#7
public string CatchHtml(string sourceUrl)
{
HttpWebRequest myHttpWebRequest=(HttpWebRequest)WebRequest.Create(sourceUrl);
myHttpWebRequest.Accept=@"image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";
myHttpWebRequest.UserAgent=@"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; Alexa Toolbar)";
myHttpWebRequest.Headers.Add("Cookie","B=0drjb7t2k48s4&b=3&s=m0; Q=q1=AAAAAAAAAAAAAA--&q2=RUIiOg--; CN_FP_TAB=todaytabs%3Dtodaytabscon%2C%2Copen%2Cend%7Cschtabs%3Dschtabscon%2C%2Copen%2Cend%7Ccdntabs%3Dcdntabscon%2C%2Copen%2Cend");
myHttpWebRequest.Timeout = 20000;
myHttpWebRequest.Accept = "zh-cn";
try
{
HttpWebResponse myHttpWebResponse=(HttpWebResponse)myHttpWebRequest.GetResponse();
Stream streamResponse=myHttpWebResponse.GetResponseStream();
StreamReader streamRead = new StreamReader(streamResponse,Encoding.Default);
string htmlcode=streamRead.ReadToEnd();
streamRead.Close();
streamResponse.Close();
myHttpWebResponse.Close();
return htmlcode;
}
catch(Exception e)
{
return "======= ERR ========= \r\n" + e.Message.ToString();
}
}
{
HttpWebRequest myHttpWebRequest=(HttpWebRequest)WebRequest.Create(sourceUrl);
myHttpWebRequest.Accept=@"image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";
myHttpWebRequest.UserAgent=@"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; Alexa Toolbar)";
myHttpWebRequest.Headers.Add("Cookie","B=0drjb7t2k48s4&b=3&s=m0; Q=q1=AAAAAAAAAAAAAA--&q2=RUIiOg--; CN_FP_TAB=todaytabs%3Dtodaytabscon%2C%2Copen%2Cend%7Cschtabs%3Dschtabscon%2C%2Copen%2Cend%7Ccdntabs%3Dcdntabscon%2C%2Copen%2Cend");
myHttpWebRequest.Timeout = 20000;
myHttpWebRequest.Accept = "zh-cn";
try
{
HttpWebResponse myHttpWebResponse=(HttpWebResponse)myHttpWebRequest.GetResponse();
Stream streamResponse=myHttpWebResponse.GetResponseStream();
StreamReader streamRead = new StreamReader(streamResponse,Encoding.Default);
string htmlcode=streamRead.ReadToEnd();
streamRead.Close();
streamResponse.Close();
myHttpWebResponse.Close();
return htmlcode;
}
catch(Exception e)
{
return "======= ERR ========= \r\n" + e.Message.ToString();
}
}
#8
顶!