C#获取指定网页源码的几种方法

时间:2022-01-11 22:44:01

// WebClient private string GetWebClient(string url) { string strHTML = ""; WebClient myWebClient = new WebClient(); Stream myStream = myWebClient.OpenRead(url); StreamReader sr = new StreamReader(myStream, System.Text.Encoding.GetEncoding("utf-8")); strHTML = sr.ReadToEnd(); myStream.Close(); return strHTML; } // WebRequest private string GetWebRequest(string url) { Uri uri = new Uri(url); WebRequest myReq = WebRequest.Create(uri); WebResponse result = myReq.GetResponse(); Stream receviceStream = result.GetResponseStream(); StreamReader readerOfStream = new StreamReader(receviceStream,System.Text.Encoding.GetEncoding("gb2312")); string strHTML = readerOfStream.ReadToEnd(); readerOfStream.Close(); receviceStream.Close(); result.Close(); return strHTML; } // HttpWebRequest private string GetHttpWebRequest(string url) { try { Uri uri = new Uri(url); HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(uri); myReq.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705"; myReq.Accept = "*/*"; myReq.KeepAlive = true; myReq.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5"); HttpWebResponse result = (HttpWebResponse)myReq.GetResponse(); Stream receviceStream = result.GetResponseStream(); StreamReader readerOfStream = new StreamReader(receviceStream, System.Text.Encoding.GetEncoding("gb2312")); string strHTML = readerOfStream.ReadToEnd(); readerOfStream.Close(); receviceStream.Close(); result.Close(); return strHTML; } catch (Exception ex) { throw new Exception("采集指定网址异常," + ex.Message); } }

  

// 获取网页源码,,如果启用了gzip压缩后页面获取会产生乱码,采用此方法可解决gzip压缩而产生的乱码情况 private string GetHtmlCode(string url) { string htmlCode; HttpWebRequest webRequest = (System.Net.HttpWebRequest)System.Net.WebRequest.Create(url); webRequest.Timeout = 30000; webRequest.Method = "GET"; webRequest.UserAgent = "Mozilla/4.0"; webRequest.Headers.Add("Accept-Encoding", "gzip, deflate"); HttpWebResponse webResponse = (System.Net.HttpWebResponse)webRequest.GetResponse(); if (webResponse.ContentEncoding.ToLower() == "gzip")//如果使用了GZip则先解压 { using (System.IO.Stream streamReceive = webResponse.GetResponseStream()) { using (var zipStream = new System.IO.Compression.GZipStream(streamReceive, System.IO.Compression.CompressionMode.Decompress)) { using (StreamReader sr = new System.IO.StreamReader(zipStream, Encoding.Default)) { htmlCode = sr.ReadToEnd(); } } } } else { using (System.IO.Stream streamReceive = webResponse.GetResponseStream()) { using (System.IO.StreamReader sr = new System.IO.StreamReader(streamReceive, Encoding.Default)) { htmlCode = sr.ReadToEnd(); } } } return htmlCode; }