当前位置:编程学习 > JAVA >>

急,大神进!!!获取源码,图上框出部分获取不了!



//java代码
package com.ptmind;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;

public class HtmlParser {
public static String getHtmlContent(URL url, String encode) {
StringBuffer contentBuffer = new StringBuffer();

int responseCode = -1;
HttpURLConnection con = null;
try {
con = (HttpURLConnection) url.openConnection();
//con.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// IE代理进行下载
con.setRequestProperty("User-Agent","Mozilla/5.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
con.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
con.setRequestProperty("Accept-Encoding", "gzip,deflate,sdch");
con.setRequestProperty("Cache-Control", "max-age=0");
con.setRequestProperty("Connection", "keep-alive");
con.setRequestProperty("Cookie", 
   "Apache=118.26.72.42.1387365390559540; APPINFOPER=fname%3D%E7%88%BD%3Bptlimitpoint%3D0%3Bpoint%3D0%3Blname%3D%E6%83%A0%3B; sc_cp1=%5B%5B'NON'%2C'1387440505428'%5D%2C%5B'NON'%2C'1387448605240'%5D%2C%5B'NON'%2C'1387505999873'%5D%2C%5B'NON'%2C'1387510167198'%5D%2C%5B'NON'%2C'1387520247143'%5D%2C%5B'NON'%2C'1387527808190'%5D%2C%5B'NON'%2C'1387532682459'%5D%5D; sc_cp2=%5B%5B'NON'%2C'1387440505428'%5D%2C%5B'NON'%2C'1387448605240'%5D%2C%5B'NON'%2C'1387505999873'%5D%2C%5B'NON'%2C'1387510167198'%5D%2C%5B'NON'%2C'1387520247159'%5D%2C%5B'NON'%2C'1387527808205'%5D%2C%5B'NON'%2C'1387532682459'%5D%5D; JSESSIONID=0005t2OXxH38qyRPkyPgdapPaLI:17l1pcan1:121oac73g:15sej5p37:17cbkme8k; KNO=8qfyLOgzU8wruu2+Y4D8EA==###1###1; KSTMP=1387534231915; APPINFOSES=logon%3D1%3B; TTINFOPER=cmpkiknsrybi%3D%3Bkiknmkn%3D1%3Bkshtk%3D%3Bsbt%3DW%3Bjtktk%3D0119%3Bshsnytbi%3D%3Boflbtshn%3D%3Bctlgkn%3D0%3Bjchrkamttkz%3D0%3Bkyklytykbn%3D+%3Bjchrkamtgzi%3D0%3Blogonsm%3D1%3Bsngp%3D19841030%3Bmlmtrkum%3D1%3Bcmpno%3D%3Bkdmsng%3D%3B; SPHED=; mbox=PC#1387365394918-37898.24_04#1388743904|session#1387533731988-794844#1387536164|check#true#1387534364; s_cc=true; mboxCPF=_visitNum#0#1450606309|_matanitySeen#false#1450606309|_matanityBought#false#1450606309|_numPurchased#0#1450606309|_lastFurniSeen#others#1450606309|_totalAmount#0#1450606309|_elapsedDays#0#1450606309|_lastMdlCat##1450606309|_isLogin#1#1450606309|_isFavMember#0#1450606309|_isFavGuest#0#1450606309|_isCartMember#0#1450606309|_isCartGuest#0#1450606309|_lastBigCat#1#1450606309|_spmode#PC#1450606309|session#1387533731988-794844#1387536002; sc_cp0=NON; s_sq=%5B%5BB%5D%5D; RRKSGNFLG=");

// 获得网页返回信息码
responseCode = con.getResponseCode();
if (responseCode == -1) {
System.out.println(url.toString()+ " : connection is failure...");
con.disconnect();
return null;
}
if (responseCode >= 400) // 请求失败
{
System.out.println("请求失败:get response code: " + responseCode);
con.disconnect();
return null;
}

InputStream inStr = con.getInputStream();
InputStreamReader istreamReader = new InputStreamReader(inStr,encode);
BufferedReader buffStr = new BufferedReader(istreamReader);

String str = null;
while ((str = buffStr.readLine()) != null)
contentBuffer.append(str);
inStr.close();
} catch (IOException e) {
e.printStackTrace();
contentBuffer = null;
System.out.println("error: " + url.toString());
} finally {
con.disconnect();
}
return contentBuffer.toString();
}

public static String getHtmlContent(String url, String encode) {
if (!url.toLowerCase().startsWith("http://")) {
url = "http://" + url;
}
try {
URL rUrl = new URL(url);
return getHtmlContent(rUrl, encode);
} catch (Exception e) {
e.printStackTrace();
return null;
}
}

public static void main(String args[]) {
System.out.println(getHtmlContent("www.bellemaison.jp", "Shift_JIS"));
}

public static void writeFile(String content) {
try {
File f = new File("E:/" + "b2" + ".html");
f.createNewFile();
BufferedWriter output = new BufferedWriter(new FileWriter(f));
output.write(content);
output.close();
} catch (IOException e) {
e.printStackTrace();
return;
}
}
}
--------------------编程问答-------------------- 图上框出部分,看起来像是用JS或IFrame二次装载的,不能在主页面中直接获取到。 --------------------编程问答-------------------- 我通过httpClient获取到了图上框出部分,但是通过模拟发送头,重构cookie得不到登陆的用户名!显示和未登录一样的状况!
补充:Java ,  Java相关
CopyRight © 2012 站长网 编程知识问答 www.zzzyk.com All Rights Reserved
部份技术文章来自网络,