求教高人,c#模拟登陆再抓取网页的问题,分不多.请高手帮忙
现在的项目要模拟登陆,再抓取登陆后的内容.我做的可以登陆,可以抓取到登陆页面的内容,但是一到登陆后的main页面,就会被退出来.
明明已经登陆进去,我在另一个机子用网页登陆,再用我的程序登,是可以把网页登陆的那个给踢出来的.而且也返回了
mian页面的数据...
但再往下,就会被踢出来.
这是得到验证码的代理
html = Http.GetHtml("http://www.ibc168.com/", out aspcookie);//获得Cookie中的SessionID
aspcookie = aspcookie.Split(';')[0];//这句话可用可不用
richTextBox1.AppendText("获得的Cookie:" + aspcookie + "\r\n");
string header = "";
byte[] b = { };
Image img = new Bitmap(
Http.GetStreamByBytes("http://www.ibc168.com/", "http://www.ibc168.com/login_code.aspx?", b,
aspcookie, out header));//获得验证码图片
this.pictureBox1.Image = img;
取得cookie
public static string GetHtml(string URL, out string cookie)
{
WebRequest wrt;
wrt = WebRequest.Create(URL);
wrt.Credentials = CredentialCache.DefaultCredentials;
WebResponse wrp;
wrp = wrt.GetResponse();
string html = new StreamReader(wrp.GetResponseStream(), Encoding.UTF8).ReadToEnd();
cookie = wrp.Headers.Get("Set-Cookie");
return html;
}
取验证码
#region --stream--
public static Stream GetStreamByBytes(string server, string URL, byte[] byteRequest, string cookie,
out string header)
{
Stream stream = new MemoryStream(GetHtmlByBytes(server, URL, byteRequest, cookie, out header));
return stream;
}
#endregion
public static byte[] GetHtmlByBytes(string server, string URL, byte[] byteRequest, string cookie,out string header)
{
long contentLength;
HttpWebRequest httpWebRequest;
HttpWebResponse webResponse;
Stream getStream;
httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(URL);
CookieContainer co = new CookieContainer();
co.SetCookies(new Uri(server), cookie);
httpWebRequest.CookieContainer = co;
httpWebRequest.ContentType = "application/x-www-form-urlencoded";
httpWebRequest.Accept =
"image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";
httpWebRequest.Referer = "http://www.ibc168.com/";
httpWebRequest.UserAgent =
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Maxthon; .NET CLR 1.1.4322)";
httpWebRequest.Method = "Post";
httpWebRequest.ContentLength = byteRequest.Length;
Stream stream;
stream = httpWebRequest.GetRequestStream();
stream.Write(byteRequest, 0, byteRequest.Length);
stream.Close();
webResponse = (HttpWebResponse)httpWebRequest.GetResponse();
header = webResponse.Headers.ToString();
getStream = webResponse.GetResponseStream();
contentLength = webResponse.ContentLength;
byte[] outBytes = new byte[contentLength];
outBytes = ReadFully(getStream);
getStream.Close();
return outBytes;
}
public static byte[] ReadFully(Stream stream)
{
byte[] buffer = new byte[128];
using (MemoryStream ms = new MemoryStream())
{
while (true)
{
int read = stream.Read(buffer, 0, buffer.Length);
if (read <= 0)
return ms.ToArray();
ms.Write(buffer, 0, read);
}
}
}
----------------------------------
下面是登陆再取页面的代码
string url1 = "http://www.ibc168.com/processlogin.aspx";
string postData = "txtID=******5&txtPW=******&txtCode=" + textBox3.Text+"&selLang=en&submit=go";
html = Http.GetHtml(url1, postData, aspcookie, out header); //login
url1 = "http://www.ibc168.com/rulesalert.aspx";
postData = "Accept=YES";
html = Http.GetHtml(url1,url1, postData, aspcookie, out header);//同意
string str = html.Substring(html.IndexOf("http://"));
string http = str.Substring(0, str.IndexOf("/", 7));
url1 = str.Substring(0, str.LastIndexOf("'"));
html = Http.GetHtml(url1, aspcookie, out header);
http = http + "/CorrectScore_data.aspx?Market=CS&Sport=1&RT=U&CT=09%2F15%2F2009+10%3A20%3A47&Game=0";
url1 = http;
html = Http.GetHtml(url1, aspcookie, out header);
richTextBox1.AppendText(html);
public static string GetHtml(string server, string URL, string postData, string cookie, out string header)
{
byte[] byteRequest = Encoding.Default.GetBytes(postData);
return GetHtml(server, URL, byteRequest, cookie, out header);
}
public static string GetHtml(string server, string URL, byte[] byteRequest, string cookie, out string header)
{
byte[] bytes = GetHtmlByBytes(server, URL, byteRequest, cookie, out header);
Stream getStream = new MemoryStream(bytes);
StreamReader streamReader = new StreamReader(getStream, Encoding.UTF8);
string getString = streamReader.ReadToEnd();
streamReader.Close();
getStream.Close();
return getString;
}
public static string GetHtml(string URL, string cookie, out string header)
{
return GetHtml(URL, cookie, out header, "http://www.ibc168.com/");
}
请各位大大们帮忙吧..............这里谢过了 --------------------编程问答-------------------- 在线等............. --------------------编程问答-------------------- 应该是cookie的问题,你为什么只取了第一个?
比较一下ie和你的程序post数据的区别
*****************************************************************************
欢迎使用CSDN论坛专用阅读器 : CSDN Reader(附全部源代码)
http://feiyun0112.cnblogs.com/ --------------------编程问答-------------------- 我在取验证码的时候取了一次,以后都是用这个cookie呀.
aspcookie这个是全局的... --------------------编程问答-------------------- 哪里写的不对,请大家帮忙呀... --------------------编程问答-------------------- 没人来帮我看一下么.... --------------------编程问答-------------------- 登录以后去最新cookie --------------------编程问答-------------------- 我试试...谢谢兄弟呀. --------------------编程问答-------------------- 太复杂了,看不懂! --------------------编程问答-------------------- 我也做这个不会做。等待高手。听说是使用httpwatch。 --------------------编程问答-------------------- 代码要点:
1、通过附加一个cookiecontainer到httprequest对象中,可以得到登录后返回的代表SESSION ID的COOKIE。
2、将此COOKIE包含在一个cookiecontainer中并附加到另一个HTTPREQUEST请求中,则可以实现SESSION的还原。
部分主要代码:
CookieContainer cookieContainer = new CookieContainer();
///////////////////////////////////////////////////
// 1. 打开 Login.aspx 页面,获得 VeiwState & EventValidation。
//如果是登陆页为asp.net页面,需要获取VeiwState及EventValidation
///////////////////////////////////////////////////
// 设置打开页面的参数
string URI = http://localhost/Test/Login.aspx;
HttpWebRequest request = WebRequest.Create(URI) as HttpWebRequest;
request.Method = "GET";
request.KeepAlive = false;
// 接收返回的页面
HttpWebResponse response = request.GetResponse() as HttpWebResponse;
System.IO.Stream responseStream = response.GetResponseStream();
System.IO.StreamReader reader = new System.IO.StreamReader(responseStream,Encoding.UTF8);
string srcString = reader.ReadToEnd();
// 获取页面的 VeiwState
string viewStateFlag = "id=\"__VIEWSTATE\" value=\"";
int i = srcString.IndexOf(viewStateFlag) + viewStateFlag.Length;
int j = srcString.IndexOf("\"", i);
string viewState = srcString.Substring(i, j - i);
// 获取页面的 EventValidation
string eventValidationFlag = "id=\"__EVENTVALIDATION\" value=\"";
i = srcString.IndexOf(eventValidationFlag) + eventValidationFlag.Length;
j = srcString.IndexOf("\"", i);
string eventValidation = srcString.Substring(i, j - i);
///////////////////////////////////////////////////
// 2. 自动填充并提交 Login.aspx 页面
///////////////////////////////////////////////////
// 提交按钮的文本
string submitButton = "登录";
// 用户名和密码
string userName = "1";
string password = "1";
// 将文本转换成 URL 编码字符串
viewState = System.Web.HttpUtility.UrlEncode(viewState);
eventValidation = System.Web.HttpUtility.UrlEncode(eventValidation);
submitButton = System.Web.HttpUtility.UrlEncode(submitButton);
// 要提交的字符串数据。格式形如:user=uesr1&password=123
string formatString =
"userName={0}&password={1}&loginButton={2}&__VIEWSTATE={3}&__EVENTVALIDATION={4}";
string postString =
string.Format(formatString, userName, password, submitButton, viewState, eventValidation);
// 将提交的字符串数据转换成字节数组
byte[] postData = Encoding.ASCII.GetBytes(postString);
// 设置提交的相关参数
request = WebRequest.Create(URI) as HttpWebRequest;
request.Method = "POST";
request.KeepAlive = false;
request.ContentType = "application/x-www-form-urlencoded";
request.CookieContainer = cookieContainer;
request.ContentLength = postData.Length;
// 提交请求数据
System.IO.Stream outputStream = request.GetRequestStream();
outputStream.Write(postData, 0, postData.Length);
outputStream.Close();
// 接收返回的页面
response = request.GetResponse() as HttpWebResponse;
responseStream = response.GetResponseStream();
reader = new System.IO.StreamReader(responseStream,Encoding.GetEncoding("GB2312"));
srcString = reader.ReadToEnd();
///////////////////////////////////////////////////
// 3. 打开 Default.aspx 页面
///////////////////////////////////////////////////
// 设置打开页面的参数
URI = "http://localhost:1165/WebTest/Default.aspx";
request = WebRequest.Create(URI) as HttpWebRequest;
request.Method = "GET";
request.KeepAlive = false;
request.CookieContainer = cookieContainer;
// 接收返回的页面
response = request.GetResponse() as HttpWebResponse;
responseStream = response.GetResponseStream();
reader = new System.IO.StreamReader(responseStream, Encoding.UTF8);
srcString = reader.ReadToEnd();
///////////////////////////////////////////////////
// 4. 分析返回的页面
///////////////////////////////////////////////////
本文来自CSDN博客,转载请标明出处:http://blog.csdn.net/zengfanxing/archive/2009/08/23/4476400.aspx --------------------编程问答-------------------- 参考 --------------------编程问答-------------------- 可是登陆页面里没有VIEWSTATE和EVENTVALIDATION --------------------编程问答-------------------- http://download.csdn.net/source/251039
登陆爬虫 C# 代码 --------------------编程问答-------------------- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title></title>
<link href="template/ibcbet/en/css/Language.css?v=20090820" rel="stylesheet" type="text/css" />
<link href="template/ibcbet/en/css/login.css?v=20090720" rel="stylesheet" type="text/css" />
<link href="template/ibcbet/public/css/button.css" rel="stylesheet" type="text/css" />
<style type="text/css">
<!--
body {
background-color: #e8eff5;
}
-->
</style>
<script type="text/JavaScript">
<!--
var i=0;
function callSubmit(){
var obj;
obj=document.getElementById('txtID');
if(obj.value==''){
obj.focus();
alert('Please enter username');
return false;
}
obj=document.getElementById('txtPW');
if(obj.value==''){
obj.focus();
alert('Please enter password');
return false;
}
obj=document.getElementById('txtCode');
if(obj.value==''){
obj.focus();
alert('Please enter validation ');
return false;
}
//document.getElementById('hidSelLang').value='';
return true;
}
// Change language
function changeLan(selValue){
document.frmChangeLang.hidSelLang.value=selValue;
document.frmChangeLang.hidIsLogin.value="no";
document.frmChangeLang.submit();
}
function loadTopNews()
{
var showmsg = document.getElementById("Hotnews");
showmsg.innerHTML = TopNews_Data.pubmsg;
}
function refreshTopNewsData()
{
var frmTopNewsData=document.getElementById('frmTopNewsData');
frmTopNewsData.submit();
}
function reloadValidatecode()
{
i++;
document.getElementById('validateCode').src='login_code.aspx?'+i;
}
setInterval("refreshTopNewsData()",60000);
//-->
</script>
</head>
<body onLoad="document.getElementById('txtID').focus();refreshTopNewsData()">
<div>
<!--logo-->
<div id="containerHead" class="newhead">
<div id="ibclogo"></div>
<div id="newtopmenu" class="topmenu">
<!--menu-->
<ul>
<li><a href="index_info.aspx?page=1" target="_blank"><span>About Us | </span></a></li>
<li><a href="index_info.aspx?page=2" target="_blank"><span>Open Account | </span></a></li>
<li><a href="index_info.aspx?page=3" target="_blank"><span>Account | </span></a></li>
<li><a href="index_info.aspx?page=4" target="_blank"><span>How To Use | </span></a></li>
<li><a href="index_info.aspx?page=5" target="_blank"><span>Rules & Regulations | </span></a></li>
<li><a href="index_info.aspx?page=6" target="_blank"><span>FAQ | </span></a></li>
<li><a href="index_info.aspx?page=7" target="_blank"><span>Contact Us</span></a></li>
</ul>
</div>
<!--top news-->
<div id="containerHotnews">
<form id="form3" name="form3" method="post" action="">
<marquee id="Hotnews" scrollamount='2' scrolldelay='20' onmouseover="Hotnews.stop()" onmouseout="Hotnews.start()">To Our valued member/customers: we are not associated with any other website other than http://www.ibcbet.com We advise that you check with us to avoid any misunderstanding or fraud perpetuated on you with regard to any websites that claim to be associated with or related to IBCBET.com.We advice strongly not to deal with third parties claiming to be agents/affiliates of our company in gaming forums, chat rooms or websites or to follow solicitation regarding sign up bonuses; such postings and solicitations are not approved by our company, we do not allow such activities by agents/affiliates.</marquee>
</form>
</div>
<!--menu end-->
<!--member login-->
<form id="frmLogin" name="frmLogin" method="post" action="https://www.ibc168.com/ProcessLogin.aspx">
<div id="login">
<!-- BEGIN IBC_LanOptBlock -->
<span>Select Language</span>
<select name="selLang" class="font" onchange="changeLan(this[this.selectedIndex].value);">
<option value='ko' >頃滉淡鞏?/option>
<option value='th' >喔犩覆喔┼覆喙勦笚喔?/option>
<option value='jp' >鏃ユ湰瑾?/option>
<option value='it' >Italiano</option>
<option value='cs' >绠€浣撲腑鏂?/option>
<option value='ch' >绻侀珨涓枃</option>
<option value='en' selected>English</option>
</select>
<!-- END IBC_LanOptBlock -->
<span>Username</span>
<input name="txtID" id="txtID" type="text" class="font" maxlength="20" size="10" />
<span>Password</span>
<input name="txtPW" id="txtPW" type="password" class="font" maxlength="12" size="12" />
<span>Validation</span>
<input id="txtCode" name="txtCode" type="text" class="font" maxlength="5" size="5" />
<img id="validateCode" width="55px" height="20px" src="login_code.aspx?"+i onclick="reloadValidatecode()" align="absmiddle" / class="code">
<input name="submit" type=submit class="input_b" onclick="return callSubmit()" value="GO" />
</div>
</form>
<!--member login end-->
</div>
</div>
<form name="frmTopNewsData" id="frmTopNewsData" action="TopNews_Data.aspx" target="TopNews_Data">
</form>
<form id="frmChangeLang" name="frmChangeLang" method="post" action="ChangeLanguage.aspx">
<input id="hidSelLang" name="hidSelLang" type="hidden" />
<input type=hidden name=hidIsLogin value="no">
</form>
<iframe name="TopNews_Data" id="TopNews_Data" src="" width="0" height="0" frameborder ="0" ></iframe>
</body>
</html>
这是登陆页面的代码... --------------------编程问答-------------------- <input name="txtID" id="txtID" type="text" class="font" maxlength="20" size="10" />
<span>Password </span>
<input name="txtPW" id="txtPW" type="password" class="font" maxlength="12" size="12" />
<span>Validation </span>
<input id="txtCode" name="txtCode" type="text" class="font" maxlength="5" size="5" />
<img id="validateCode" width="55px" height="20px" src="login_code.aspx?"+i onclick="reloadValidatecode()" align="absmiddle" / class="code">
--------------------编程问答-------------------- 啥意思? --------------------编程问答-------------------- 我想抓取一个网页的数据.可是取不到,都能登陆了...进mian页面时就返回登陆页面了. --------------------编程问答--------------------
我取了,但是还是返回到登陆页面...而且,那个cookie不是每次都可以取得到的. --------------------编程问答-------------------- 不知道为什么 --------------------编程问答-------------------- 好像很复杂啊,看看 --------------------编程问答-------------------- [img=http://][/img] --------------------编程问答-------------------- up
补充:.NET技术 , C#