获取html页面全部标签或者标签内容

首先是两个正则表达式：

1.<[^>]+>：这个正则表达式可以匹配所有html标签,可以100%匹配(注意页面编码方式和读取的编码方式)。

2.>[^<]+<：这个可以匹配标签内容，本人对正则不是很熟悉，因而只是简单的将第一个正则表达式反了过来，匹配出来的结果都会带着><，如果有更好的正则表达式，希望可以告诉我。

下面上程序：

[java]
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class URLTest {

    /**
     * @param args
     * @throws URISyntaxException
     */
    public static void main(String[] args) throws Exception {
        URL url = new URL("http://www.ascii-code.com/");
        InputStreamReader reader = new InputStreamReader(url.openStream());
        BufferedReader br = new BufferedReader(reader);
        String s = null;
        while((s=br.readLine())!=null){
            s = GetLabel(s);
            if(s!=null){
                System.out.println(s);
            }
        }
        br.close();
        reader.close();
    }

    public static String GetContent(String html) {
        //String html = "<ul><li>1.hehe</li><li>2.hi</li><li>3.hei</li></ul>";
        String ss = ">[^<]+<";
        String temp = null;
        Pattern pa = Pattern.compile(ss);
        Matcher ma = null;
        ma = pa.matcher(html);
        String result = null;
        while(ma.find()){
            temp = ma.group();
            if(temp!=null){
                if(temp.startsWith(">")){
                    temp = temp.substring(1);
                }
                if(temp.endsWith("<")){
                    temp = temp.substring(0, temp.length()-1);
                }
                if(!temp.equalsIgnoreCase("")){
                    if(result==null){
                        result = temp;
                    }
                    else{
                        result+="____"+temp;
                    }
                }
            }
        }
        return result;
    }

    public static String GetLabel(String html) {
        //String html = "<ul><li>1.hehe</li><li>2.hi</li><li>3.hei</li></ul>";
        String ss = "<[^>]+>";
        String temp = null;
        Pattern pa = Pattern.compile(ss);
        Matcher ma = null;
        ma = pa.matcher(html);
        String result = null;
        while(ma.find()){
            temp = ma.group();
            if(temp!=null){
                if(temp.startsWith(">")){
                    temp = temp.substring(1);
                }
                if(temp.endsWith("<")){
                    temp = temp.substring(0, temp.length()-1);
                }
                if(!temp.equalsIgnoreCase("")){
                    if(result==null){
                        result = temp;
                    }
                    else{
                     &n

补充：软件开发 , Java ,