怎么做一个新闻发布网站 高手们给点建议
我们公司现在在做新闻发布网站,要求是自动从几大知名网站上按照给出的新闻标题或关键字抓取新闻发布时间、内容、等等!!!--------------------编程问答-------------------- rss --------------------编程问答-------------------- 2楼的大哥能不能说清楚点 --------------------编程问答-------------------- //获取http页面函数
public string Get_Http(string a_strUrl,int timeout)
{
string strResult ;
try
{
HttpWebRequest myReq = (HttpWebRequest)HttpWebRequest.Create(a_strUrl) ;
myReq.Timeout = timeout;
HttpWebResponse HttpWResp = (HttpWebResponse)myReq.GetResponse();
Stream myStream = HttpWResp.GetResponseStream () ;
StreamReader sr = new StreamReader(myStream , Encoding.Default);
StringBuilder strBuilder = new StringBuilder();
while (-1 != sr.Peek())
{
strBuilder.Append(sr.ReadLine()+"\r\n");
}
strResult = strBuilder.ToString();
}
catch(Exception exp)
{
strResult = "错误:" + exp.Message ;
}
return strResult ;
}
获取页面内容后,分析页面中连接地址取到要抓取的url:
//处理页面标题和链接
public string SniffWebUrl( string urlStr,string blockB,string blockE )
{
string urlch1 = "";
string urlch2 = "";
int end_n1 = 0;
int end_nums = 0;
int end_nums1 = 0;
int end_nums2 = 0;
int end_nums3 = 0;
string reUTStr = "";
string reTitle = "";
string ret = "";
try
{
int pos01 = urlStr.IndexOf( "." );
int pos02 = urlStr.LastIndexOf( "/" );
if( pos01 < 0 )
{
return "";
}
if( pos02 < 0 )
{
return "";
}
int pos03 = urlStr.IndexOf( "/",pos01 );
if ( pos03 < 0 )
{
urlch1 = urlStr;
urlch2 = urlStr;
}
else
{
urlch1 = urlStr.Substring( 0,pos03 );
urlch2 = urlStr.Substring( 0,pos02 );
}
string tmpAllStr = new PublicFun().Get_Http( urlStr ,time1);
int pos1 = tmpAllStr.IndexOf( blockB );
int pos2 = tmpAllStr.IndexOf( blockE,pos1 + blockB.Length );
if ( pos1>0 && pos2>0 && pos2>pos1 )
{
ret = tmpAllStr.Substring( pos1 + blockB.Length,pos2 - pos1 - blockB.Length );
ret = ret.Substring( ret.IndexOf( "<" ));
while( ret.IndexOf( "<A" ) >= 0 )
{
ret = ret.Substring( 0,ret.IndexOf( "<A" ) ) + "<a" + ret.Substring( ret.IndexOf( "<A" ) + 2 );
}
while( ret.IndexOf( "</A" ) >=0 )
{
ret = ret.Substring( 0,ret.IndexOf( "</A" ) ) + "</a" + ret.Substring( ret.IndexOf( "</A" ) + 3 );
}
while( ret.IndexOf( "Href=" ) >=0 )
{
ret = ret.Substring( 0,ret.IndexOf( "Href=" )) + "href=" + ret.Substring( ret.IndexOf( "Href=" ) + 5 );
}
while( ret.IndexOf( "HREF=" ) >=0 )
{
ret = ret.Substring( 0,ret.IndexOf( "HREF=" )) + "href=" + ret.Substring( ret.IndexOf( "HREF=" ) + 5 );
}
while( ret.IndexOf( "href='" ) >=0 )
{
ret = ret.Substring( 0,ret.IndexOf( "href='" )) + "href=\"" + ret.Substring( ret.IndexOf( "href='" ) + 6 );
--------------------编程问答-------------------- 学学学 --------------------编程问答-------------------- mark --------------------编程问答-------------------- 用Sharepoint然后爬网
补充:.NET技术 , C#