using System; using System.Text; using System.Net; using System.IO; using System.Threading; using System.Text.RegularExpressions; namespace Ant.Service.Utilities { public class HTMLHelper { #region 私有字段 private static CookieContainer cc = new CookieContainer(); private static string contentType = "application/x-www-form-urlencoded"; private static string accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/x-silverlight, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, application/x-silverlight-2-b1, */*"; private static string userAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)"; private static Encoding encoding = Encoding.GetEncoding("utf-8"); private static int delay = 1000; private static int maxTry = 300; private static int currentTry = 0; #endregion #region 公有属性 /// /// Cookie /// public static CookieContainer CookieContainer { get { return cc; } } /// /// 语言 /// public static Encoding Encoding { get { return encoding; } set { encoding = value; } } public static int NetworkDelay { get { Random r = new Random(); return (r.Next(delay, delay * 2)); } set { delay = value; } } public static int MaxTry { get { return maxTry; } set { maxTry = value; } } #endregion #region 获取HTML /// /// 获取HTML /// /// 地址 /// post 提交的字符串 /// 是否是post /// CookieContainer public static string GetHtml(string url, string postData, bool isPost, CookieContainer cookieContainer) { if (string.IsNullOrEmpty(postData)) return GetHtml(url, cookieContainer); Thread.Sleep(NetworkDelay); currentTry++; HttpWebRequest httpWebRequest = null; HttpWebResponse httpWebResponse = null; try { byte[] byteRequest = Encoding.Default.GetBytes(postData); httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(url); httpWebRequest.CookieContainer = cookieContainer; httpWebRequest.ContentType = contentType; httpWebRequest.ServicePoint.ConnectionLimit = maxTry; httpWebRequest.Referer = url; httpWebRequest.Accept = accept; httpWebRequest.UserAgent = userAgent; httpWebRequest.Method = isPost ? "POST" : "GET"; httpWebRequest.ContentLength = byteRequest.Length; Stream stream = httpWebRequest.GetRequestStream(); stream.Write(byteRequest, 0, byteRequest.Length); stream.Close(); httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); Stream responseStream = httpWebResponse.GetResponseStream(); StreamReader streamReader = new StreamReader(responseStream, encoding); string html = streamReader.ReadToEnd(); streamReader.Close(); responseStream.Close(); currentTry = 0; httpWebRequest.Abort(); httpWebResponse.Close(); return html; } catch (Exception e) { if (currentTry <= maxTry) GetHtml(url, postData, isPost, cookieContainer); currentTry--; if (httpWebRequest != null) httpWebRequest.Abort(); if (httpWebResponse != null) httpWebResponse.Close(); return string.Empty; } } /// /// 获取HTML /// /// 地址 /// CookieContainer public static string GetHtml(string url, CookieContainer cookieContainer) { Thread.Sleep(NetworkDelay); currentTry++; HttpWebRequest httpWebRequest = null; HttpWebResponse httpWebResponse = null; try { httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(url); httpWebRequest.CookieContainer = cookieContainer; httpWebRequest.ContentType = contentType; httpWebRequest.ServicePoint.ConnectionLimit = maxTry; httpWebRequest.Referer = url; httpWebRequest.Accept = accept; httpWebRequest.UserAgent = userAgent; httpWebRequest.Method = "GET"; httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); Stream responseStream = httpWebResponse.GetResponseStream(); StreamReader streamReader = new StreamReader(responseStream, encoding); string html = streamReader.ReadToEnd(); streamReader.Close(); responseStream.Close(); currentTry--; httpWebRequest.Abort(); httpWebResponse.Close(); return html; } catch (Exception e) { if (currentTry <= maxTry) GetHtml(url, cookieContainer); currentTry--; if (httpWebRequest != null) httpWebRequest.Abort(); if (httpWebResponse != null) httpWebResponse.Close(); return string.Empty; } } #endregion #region 获取字符流 /// /// 获取字符流 /// //--------------------------------------------------------------------------------------------------------------- // 示例: // System.Net.CookieContainer cookie = new System.Net.CookieContainer(); // Stream s = HttpHelper.GetStream("http://ptlogin2.qq.com/getimage?aid=15000102&0.43878429697395826", cookie); // picVerify.Image = Image.FromStream(s); //--------------------------------------------------------------------------------------------------------------- /// 地址 /// cookieContainer public static Stream GetStream(string url, CookieContainer cookieContainer) { currentTry++; HttpWebRequest httpWebRequest = null; HttpWebResponse httpWebResponse = null; try { httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(url); httpWebRequest.CookieContainer = cookieContainer; httpWebRequest.ContentType = contentType; httpWebRequest.ServicePoint.ConnectionLimit = maxTry; httpWebRequest.Referer = url; httpWebRequest.Accept = accept; httpWebRequest.UserAgent = userAgent; httpWebRequest.Method = "GET"; httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); Stream responseStream = httpWebResponse.GetResponseStream(); currentTry--; return responseStream; } catch (Exception e) { if (currentTry <= maxTry) { GetHtml(url, cookieContainer); } currentTry--; if (httpWebRequest != null) { httpWebRequest.Abort(); } if (httpWebResponse != null) { httpWebResponse.Close(); } return null; } } #endregion #region 清除HTML标记 /// ///清除HTML标记 /// ///包括HTML的源码 ///已经去除后的文字 public static string NoHTML(string Htmlstring) { //删除脚本 Htmlstring = Regex.Replace(Htmlstring, @"]*?>.*?", "", RegexOptions.IgnoreCase); //删除HTML Regex regex = new Regex("<.+?>", RegexOptions.IgnoreCase); Htmlstring = regex.Replace(Htmlstring, ""); Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"