java解析XML和java解析HTML
一。dom4j 解析xml:
俩jar包:dom4j-1.6.1.jar 和jaxen-1.1-beta-6.jar
/**
* java解析xml文件各个节点信息
*
* @author Jeelon
* @param string
* :解析的文件名
*/
private static void getXmlInfo(String string) {
SAXReader reader = new SAXReader();
InputStream in = Thread.currentThread().getContextClassLoader()
.getResourceAsStream(string);
try {
Document doc = reader.read(in);
Element driverNameEls = (Element) doc
.selectObject("/config/db-info/driver-name");
Element urlEls = (Element) doc.selectObject("/config/db-info/url");
Element userNameEls = (Element) doc
.selectObject("/config/db-info/user-name");
Element passwordEls = (Element) doc
.selectObject("/config/db-info/password");
String driverName = driverNameEls.getStringValue();
String url = urlEls.getStringValue();
String userName = userNameEls.getStringValue();
String password = passwordEls.getStringValue();
System.out.println("====================================");
System.out.println("驱动名:" + driverName);
System.out.println("URL地址:" + url);
System.out.println("用户名:" + userName);
System.out.println("密码:" + password);
System.out.println("====================================");
} catch (DocumentException e) {
e.printStackTrace();
}
}
二。java解析HTML
需要的jar包:jsoup-1.6.0.jar
/**
* 提取HTML文件的文本内容
*
* @author Jeelon
* @param html
* 提取的html文件名
* @return 返回提取内容String
*/
private static String getDocument(File html) {
String text = "";
try {
// 设置编码集
org.jsoup.nodes.Document doc = Jsoup.parse(html, "UTF-8");
// 提取标题信息
Elements title = doc.select("title");
for (org.jsoup.nodes.Element link : title) {
text += link.text() + " ";
}
// 提取table中的文本信息
Elements links = doc.select("table");
for (org.jsoup.nodes.Element link : links) {
text += link.text() + " ";
}
// 提取div中的文本信息
Elements divs = doc.select("div[class=post]");
for (org.jsoup.nodes.Element link : divs) {
text += link.text() + " ";
}
} catch (IOException e) {
e.printStackTrace();
}
return text;
}
Element element = null;
File f = new File("a.xml");
DocumentBuilder db = null; // documentBuilder为抽象不能直接实例化(将XML文件转换为DOM文件)
DocumentBuilderFactory dbf = null;
try {
dbf = DocumentBuilderFactory.newInstance(); // 返回documentBuilderFactory对象
db = dbf.newDocumentBuilder();// 返回db对象用documentBuilderFatory对象获得返回documentBuildr对象
Document dt = db.parse(f); // 得到一个DOM并返回给document对象
element = dt.getDocumentElement();// 得到一个elment根元素 <
补充:软件开发 , Java ,