当前位置:编程学习 > JAVA >>

java获取网页源码

01 package gogo.cool;
02 
03 import java.io.BufferedReader;
04 import java.io.IOException;
05 import java.io.InputStreamReader;
06 import java.net.HttpURLConnection;
07 import java.net.URL;
08 
09 public class test1 {
10 
11     public static void main(String[] a) throws IOException {
12 
13         String url = "http://www.baidu.com";
14 
15         System.out.println(getHTML(url, "gbk")); // 使用原网页里声明的gb2312反而会出现乱码
16 
17     }
18 
19     public static String getHTML(String pageURL, String encoding) {
20 
21         StringBuilder pageHTML = new StringBuilder();
22 
23         try {
24 
25             URL url = new URL(pageURL);
26 
27             HttpURLConnection connection = (HttpURLConnection) url
28                     .openConnection();
29 
30             connection.setRequestProperty("User-Agent", "MSIE 7.0");
31 
32             BufferedReader br = new BufferedReader(new InputStreamReader(
33                     connection.getInputStream(), encoding));
34 
35             String line = null; www.zzzyk.com
36 
37             while ((line = br.readLine()) != null) {
38 
39                 pageHTML.append(line);
40 
41                 pageHTML.append("\r\n");
42 
43             }
44 
45             connection.disconnect();
46 
47         } catch (Exception e) {
48 
49             e.printStackTrace();
50 
51         }
52 
53         return pageHTML.toString();
54 
55     }
56 }

 作者:neo600

补充:软件开发 , Java ,
CopyRight © 2012 站长网 编程知识问答 www.zzzyk.com All Rights Reserved
部份技术文章来自网络,