用DOM实现文章采集--通过jquery语法式的方法采集指定对象的文本
[csharp]
/// <summary>
/// DOM查询器,用法跟jquery差不多
/// </summary>
public class DomQuery
{
/// <summary>
/// 获得节点
/// </summary>
/// <param name="_HtmlDocument"></param>
/// <param name="selector"></param>
/// <returns></returns>
/// <remarks>DOM选择器,用法跟jquery差不多</remarks>
public IList<HtmlNode> Get(HtmlDocument _HtmlDocument, string selector)
{
string[] Expressions = selector.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
List<HtmlNode> hnList = new List<HtmlNode>();
if (Expressions[0].StartsWith("#"))
{
hnList.Add(_HtmlDocument.GetElementbyId(Expressions[0].TrimStart('#')));
hnList.RemoveAll(x => { return x == null; });
if (Expressions.Length == 1)
{
return hnList;
}
for (int i = 1; i < Expressions.Length; i++)
{
hnList = Get(hnList, Expressions[i]);
}
}
else
{
hnList.AddRange(_HtmlDocument.DocumentNode.ChildNodes.Where(x => { return x.NodeType == HtmlNodeType.Element; }));
for (int i = 0; i < Expressions.Length; i++)
{
hnList = Get(hnList, Expressions[i]);
}
}
return hnList;
}
/// <summary>
/// 查找节点,并直接返回InnerHtml
/// </summary>
/// <param name="_HtmlDocument"></param>
/// <param name="selector"></param>
/// <returns></returns>
public string SingleGetInnerHtml(HtmlDocument _HtmlDocument, string selector)
{
HtmlNode hn = SingleGet(_HtmlDocument, selector);
if (hn == null)
return null;
else
return hn.InnerHtml;
}
/// <summary>
/// 查找节点,并直接返回InnerText
/// </summary>
/// <param name="_HtmlDocument"></param>
/// <param name="selector"></param>
/// <returns></returns>
public string SingleGetInnerText(HtmlDocument _HtmlDocument, string selector)
{
HtmlNode hn = SingleGet(_HtmlDocument, selector);
if (hn == null)
return null;
else
return hn.InnerText.Trim();
}
/// <summary>
/// 查找节点
/// </summary>
/// <param name="_HtmlDocument"></param>
/// <param name="selector"></param>
/// <returns></returns>
public HtmlNode SingleGet(HtmlDocument _HtmlDocument, string selector)
{
IList<HtmlNode> hnList = Get(_HtmlDocument, selector);
if (hnList.Count == 0)
{
return null;
}
else
{
return hnList[0];
}
}
#region 获得属性
/// <summary>
/// 获得属性
/// </summary>
/// <param name="_HtmlNodes"></param>
/// <param name="attr"></param>
/// <returns></returns>
public string[] Attr(IList<HtmlNode> _HtmlNodes, string attr)
{
if (_HtmlNodes == null)
{
return new string[0];
}
if (_HtmlNodes.Count() == 0)
{
return new string[0];
}
var v = from x in _HtmlNodes where x.At
补充:Web开发 , ASP.Net ,