当前位置:编程学习 > XML/UML >>

java解析XML和java解析HTML

一。dom4j  解析xml:
 
俩jar包:dom4j-1.6.1.jar 和jaxen-1.1-beta-6.jar
 
/**
     * java解析xml文件各个节点信息
     * 
     * @author Jeelon
     * @param string
     *            :解析的文件名
     */ 
    private static void getXmlInfo(String string) { 
        SAXReader reader = new SAXReader(); 
        InputStream in = Thread.currentThread().getContextClassLoader() 
                .getResourceAsStream(string); 
        try { 
            Document doc = reader.read(in); 
            Element driverNameEls = (Element) doc 
                    .selectObject("/config/db-info/driver-name"); 
            Element urlEls = (Element) doc.selectObject("/config/db-info/url"); 
            Element userNameEls = (Element) doc 
                    .selectObject("/config/db-info/user-name"); 
            Element passwordEls = (Element) doc 
                    .selectObject("/config/db-info/password"); 
 
            String driverName = driverNameEls.getStringValue(); 
            String url = urlEls.getStringValue(); 
            String userName = userNameEls.getStringValue(); 
            String password = passwordEls.getStringValue(); 
 
            System.out.println("===================================="); 
            System.out.println("驱动名:" + driverName); 
            System.out.println("URL地址:" + url); 
            System.out.println("用户名:" + userName); 
            System.out.println("密码:" + password); 
            System.out.println("===================================="); 
        } catch (DocumentException e) { 
            e.printStackTrace(); 
        } 
 
    } 

 
二。java解析HTML
需要的jar包:jsoup-1.6.0.jar
 
 
/**
     * 提取HTML文件的文本内容
     * 
     * @author Jeelon
     * @param html
     *            提取的html文件名
     * @return 返回提取内容String
     */ 
    private static String getDocument(File html) { 
        String text = ""; 
        try { 
            // 设置编码集 
            org.jsoup.nodes.Document doc = Jsoup.parse(html, "UTF-8"); 
            // 提取标题信息 
            Elements title = doc.select("title"); 
            for (org.jsoup.nodes.Element link : title) { 
                text += link.text() + " "; 
            } 
            // 提取table中的文本信息 
            Elements links = doc.select("table"); 
            for (org.jsoup.nodes.Element link : links) { 
                text += link.text() + " "; 
            } 
            // 提取div中的文本信息 
            Elements divs = doc.select("div[class=post]"); 
            for (org.jsoup.nodes.Element link : divs) { 
                text += link.text() + " "; 
            } 
        } catch (IOException e) { 
            e.printStackTrace(); 
        } 
 
        return text; 
    } 
 
 
 
Element element = null; 
        File f = new File("a.xml"); 
        DocumentBuilder db = null; // documentBuilder为抽象不能直接实例化(将XML文件转换为DOM文件) 
        DocumentBuilderFactory dbf = null; 
        try { 
 
            dbf = DocumentBuilderFactory.newInstance(); // 返回documentBuilderFactory对象 
            db = dbf.newDocumentBuilder();// 返回db对象用documentBuilderFatory对象获得返回documentBuildr对象 
 
            Document dt = db.parse(f); // 得到一个DOM并返回给document对象 
            element = dt.getDocumentElement();// 得到一个elment根元素 <

补充:软件开发 , Java ,
CopyRight © 2012 站长网 编程知识问答 www.zzzyk.com All Rights Reserved
部份技术文章来自网络,