用DOM实现文章采集--采集到网页源码
先来个采集网页的代码。[csharp]
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Net;
using System.Text;
namespace TopWinCMS.Common
{
public class NetHelper
{
//private string _HTTP_USER_AGENT = "Mozilla/4.0+(compatible;+MSIE+6.0;+Windows+NT+5.2;+SV1;+.NET+CLR+1.1.4322;+.NET+CLR+2.0.50727)";
private string _UserAgent = "Googlebot/2.1 (+http://www.google.com/bot.html)";
private Encoding _HttpEncoding = null;
private string _ProxyHost = string.Empty;
private int _ProxyInt = 8080;
private int _TimeOut = 200000;
#region 属性
/// <summary>
/// 设置UserAgent
/// </summary>
public string UserAgent
{
get
{
return this._UserAgent;
}
set
{
this._UserAgent = value;
}
}
/// <summary>
/// 设置编码
/// </summary>
public Encoding HttpEncoding
{
get
{
return this._HttpEncoding;
}
set
{
this._HttpEncoding = value;
}
}
/// <summary>
/// 设置代理服务器
/// </summary>
public string ProxyHost
{
get
{
return this._ProxyHost;
}
set
{
this._ProxyHost = value;
}
}
/// <summary>
/// 设置代理服务器端口
/// </summary>
public int ProxyInt
{
get
{
return this._ProxyInt;
}
set
{
this._ProxyInt = value;
}
}
/// <summary>
/// 设置默认超时时间
/// </summary>
public int TimeOut
{
get
{
return this._TimeOut;
}
set
{
this._TimeOut = value;
}
}
#endregion
public RemoteRes Get(string uri)
{
return Get(new Uri(uri));
}
public RemoteRes Get(Uri uri)
{
RemoteRes info = new RemoteRes();
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri);
request.Timeout = this._TimeOut;
request.UserAgent = this
补充:Web开发 , ASP.Net ,