当前位置:编程学习 > 网站相关 >>

HttpClient模拟登陆人人网,并且爬取日志内容(一)

使用HttpClient最新版本,下载地址Download/2012/0428/20120428021640813.zip
 

注释已经写的比较清楚了,就不再说明了。

[java]

 *
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see <http://www.apache.org/>.
 * renren.com
 * <input type="hidden" name="origURL" value="http://www.renren.com/home" />
 * <input type="hidden" name="domain" value="renren.com" />
 * <input type="hidden" name="key_id" value="1" />
 * <input type="submit" id="login" class="input-submit login-btn" value="登录人人网" tabindex="5"/>
 * http://s.xnimg.cn/a36853/n/apps/login/login-all.js
 */ 
package org.apache.http.examples.client; 
 
import java.util.ArrayList; 
import java.util.List; 
import java.util.regex.Matcher; 
import java.util.regex.Pattern; 
 
import org.apache.http.Header; 
import org.apache.http.HttpEntity; 
import org.apache.http.HttpResponse; 
import org.apache.http.NameValuePair; 
import org.apache.http.client.HttpClient; 
import org.apache.http.client.entity.UrlEncodedFormEntity; 
import org.apache.http.client.methods.HttpGet; 
import org.apache.http.client.methods.HttpPost; 
import org.apache.http.impl.client.DefaultHttpClient; 
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager; 
import org.apache.http.message.BasicNameValuePair; 
import org.apache.http.protocol.HTTP; 
import org.apache.http.util.EntityUtils; 
 
/**
 * 
 * Purpose:
 * 
 * @author: shihuangzhe.com
 * @since: JDK 1.6
 * @date: 2012-4-28
 * 
 */ 
public class RrLogin { 
    /** 帐号 */ 
    private static final String userName = "xxxxx@yahoo.com.cn"; 
    /** 密码 */ 
    private static final String password = "******"; 
    /** 网域 */ 
    private static final String domain = "renren.com"; 
    /** key_id */ 
    private static final String keyID = "1"; 
    /** 表单提交url */ 
    private static String loginURL = "http://www.renren.com/PLogin.do"; 
    /** 登陆成功后,跳转到我自己的blog日志,人人默认跳转路径为 http://www.renren.com/home */ 
    private static final String targetUrl = "http://blog.renren.com/blog/84082953/398292611"; 
    /** 表单域常量(跳转url) */ 
    private static final String _ORGI_URL = "origURL"; 
    /** 表单域常量(网域) */ 
    private static final String _DOMAIN = "domain"; 
    /** 表单域常量(key_id) */ 
    private static final String _KEY_ID = "key_id"; 
    /** 表单域常量(帐号) */ 
    private static final String _EMAIL = "email"; 
    /** 表单域常量(密码) */ 
    private static final String _PASSWORD = "password"; 
    /** ThreadSafeClientConnManager保证多线程安全 */ 
    private HttpClient client = new DefaultHttpClient( 
            new ThreadSafeClientConnManager()); 
 
    /**
     * Purpose: 登陆renren.com
     * 
     * @throws Exception
     * @return: void
     */ 
    private void login() throws Exception { 
        HttpPost httpost = new HttpPost(loginURL); 
        try { 
            // 为请求参数赋值 
            List<NameValuePair> nvps = new ArrayList<NameValuePair>(); 
            nvps.add(new BasicNameValuePair(_ORGI_URL, targetUrl)); 
            nvps.add(new BasicNameValuePair(_DOMAIN, domain)); 
            nvps.add(new BasicNameValuePair(_KEY_ID, keyID)); 
            nvps.add(new BasicNameValuePair(_EMAIL, userName)); 
            nvps.add(new BasicNameValuePair(_PASSWORD, password)); 
            httpost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8)); 
            // 获取请求相应 
            HttpResponse response = client.execute(httpost); 
            /*
             * 注意,因为renren.com登陆成功后,需要再次经过
             * http://www.renren.com/callback.do?t=da278e2526f9b2387ea22e57578a85d93
             * &
             * origURL=http%3A%2F%2Fblog.renren.com%2Fblog%2F84082953%2F398292611
             * &needNotify=false 这种方式跳转,所以需要再次处理发一次请求
        &nbs

补充:综合编程 , 其他综合 ,
CopyRight © 2012 站长网 编程知识问答 www.zzzyk.com All Rights Reserved
部份技术文章来自网络,