HttpClient模拟登陆人人网,并且爬取日志内容(一)
使用HttpClient最新版本,下载地址Download/2012/0428/20120428021640813.zip
注释已经写的比较清楚了,就不再说明了。
[java]
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see <http://www.apache.org/>.
* renren.com
* <input type="hidden" name="origURL" value="http://www.renren.com/home" />
* <input type="hidden" name="domain" value="renren.com" />
* <input type="hidden" name="key_id" value="1" />
* <input type="submit" id="login" class="input-submit login-btn" value="登录人人网" tabindex="5"/>
* http://s.xnimg.cn/a36853/n/apps/login/login-all.js
*/
package org.apache.http.examples.client;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;
/**
*
* Purpose:
*
* @author: shihuangzhe.com
* @since: JDK 1.6
* @date: 2012-4-28
*
*/
public class RrLogin {
/** 帐号 */
private static final String userName = "xxxxx@yahoo.com.cn";
/** 密码 */
private static final String password = "******";
/** 网域 */
private static final String domain = "renren.com";
/** key_id */
private static final String keyID = "1";
/** 表单提交url */
private static String loginURL = "http://www.renren.com/PLogin.do";
/** 登陆成功后,跳转到我自己的blog日志,人人默认跳转路径为 http://www.renren.com/home */
private static final String targetUrl = "http://blog.renren.com/blog/84082953/398292611";
/** 表单域常量(跳转url) */
private static final String _ORGI_URL = "origURL";
/** 表单域常量(网域) */
private static final String _DOMAIN = "domain";
/** 表单域常量(key_id) */
private static final String _KEY_ID = "key_id";
/** 表单域常量(帐号) */
private static final String _EMAIL = "email";
/** 表单域常量(密码) */
private static final String _PASSWORD = "password";
/** ThreadSafeClientConnManager保证多线程安全 */
private HttpClient client = new DefaultHttpClient(
new ThreadSafeClientConnManager());
/**
* Purpose: 登陆renren.com
*
* @throws Exception
* @return: void
*/
private void login() throws Exception {
HttpPost httpost = new HttpPost(loginURL);
try {
// 为请求参数赋值
List<NameValuePair> nvps = new ArrayList<NameValuePair>();
nvps.add(new BasicNameValuePair(_ORGI_URL, targetUrl));
nvps.add(new BasicNameValuePair(_DOMAIN, domain));
nvps.add(new BasicNameValuePair(_KEY_ID, keyID));
nvps.add(new BasicNameValuePair(_EMAIL, userName));
nvps.add(new BasicNameValuePair(_PASSWORD, password));
httpost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));
// 获取请求相应
HttpResponse response = client.execute(httpost);
/*
* 注意,因为renren.com登陆成功后,需要再次经过
* http://www.renren.com/callback.do?t=da278e2526f9b2387ea22e57578a85d93
* &
* origURL=http%3A%2F%2Fblog.renren.com%2Fblog%2F84082953%2F398292611
* &needNotify=false 这种方式跳转,所以需要再次处理发一次请求
&nbs
补充:综合编程 , 其他综合 ,