当前位置:编程学习 > asp >>

用DOM实现文章采集--采集到网页源码

先来个采集网页的代码。
[csharp]
using System; 
using System.Collections.Generic; 
using System.IO; 
using System.IO.Compression; 
using System.Net; 
using System.Text; 
namespace TopWinCMS.Common 

    public class NetHelper 
    { 
 
        //private string _HTTP_USER_AGENT = "Mozilla/4.0+(compatible;+MSIE+6.0;+Windows+NT+5.2;+SV1;+.NET+CLR+1.1.4322;+.NET+CLR+2.0.50727)"; 
        private string _UserAgent = "Googlebot/2.1 (+http://www.google.com/bot.html)"; 
        private Encoding _HttpEncoding = null; 
        private string _ProxyHost = string.Empty; 
        private int _ProxyInt = 8080; 
        private int _TimeOut = 200000; 
 
        #region 属性 
        /// <summary> 
        /// 设置UserAgent 
        /// </summary> 
        public string UserAgent 
        { 
            get 
            { 
                return this._UserAgent; 
            } 
            set 
            { 
                this._UserAgent = value; 
            } 
        } 
        /// <summary> 
        /// 设置编码 
        /// </summary> 
        public Encoding HttpEncoding 
        { 
            get 
            { 
                return this._HttpEncoding; 
            } 
            set 
            { 
                this._HttpEncoding = value; 
            } 
        } 
        /// <summary> 
        /// 设置代理服务器 
        /// </summary> 
        public string ProxyHost 
        { 
            get 
            { 
                return this._ProxyHost; 
            } 
            set 
            { 
                this._ProxyHost = value; 
            } 
        } 
        /// <summary> 
        /// 设置代理服务器端口 
        /// </summary> 
        public int ProxyInt 
        { 
            get 
            { 
                return this._ProxyInt; 
            } 
            set 
            { 
                this._ProxyInt = value; 
            } 
        } 
        /// <summary> 
        /// 设置默认超时时间 
        /// </summary> 
        public int TimeOut 
        { 
            get 
            { 
                return this._TimeOut; 
            } 
            set 
            { 
                this._TimeOut = value; 
            } 
        } 
        #endregion 
 
        public RemoteRes Get(string uri) 
        { 
            return Get(new Uri(uri)); 
        } 
        public RemoteRes Get(Uri uri) 
        { 
            RemoteRes info = new RemoteRes(); 
 
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri); 
            request.Timeout = this._TimeOut; 
            request.UserAgent = this
补充:Web开发 , ASP.Net ,
CopyRight © 2022 站长资源库 编程知识问答 zzzyk.com All Rights Reserved
部分文章来自网络,