【引言】
目前在Java中用于解析XML的技術很多,主流的有DOM、SAX、JDOM、DOM4j,下文主要介紹這4種解析XML文檔技術的使用、優缺點及性能測試。
一、【基礎知識——掃盲】
sax、dom是兩種對xml文檔進行解析的方法(沒有具體實現,只是接口),所以只有它們是無法解析xml文檔的;jaxp只是api,它進一步封裝了sax、dom兩種接口,并且提供了DomcumentBuilderFactory/DomcumentBuilder和SAXParserFactory/SAXParser(默認使用xerces解釋器)。
二、【DOM、SAX、JDOM、DOM4j簡單使用介紹】
1、【DOM(Document Object Model) 】
由W3C提供的接口,它將整個XML文檔讀入內存,構建一個DOM樹來對各個節點(Node)進行操作。
示例代碼:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
<? xml version = "1.0" encoding = "UTF-8" ?> < university name = "pku" > < college name = "c1" > < class name = "class1" > < student name = "stu1" sex = 'male' age = "21" /> < student name = "stu2" sex = 'female' age = "20" /> < student name = "stu3" sex = 'female' age = "20" /> </ class > < class name = "class2" > < student name = "stu4" sex = 'male' age = "19" /> < student name = "stu5" sex = 'female' age = "20" /> < student name = "stu6" sex = 'female' age = "21" /> </ class > </ college > < college name = "c2" > < class name = "class3" > < student name = "stu7" sex = 'male' age = "20" /> </ class > </ college > < college name = "c3" > </ college > </ university > |
后文代碼中有使用到text.xml(該文檔放在src路徑下,既編譯后在classes路徑下),都是指該xml文檔。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
|
package test.xml; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Text; import org.xml.sax.SAXException; /** * dom讀寫xml * @author whwang */ public class TestDom { public static void main(String[] args) { read(); //write(); } public static void read() { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); try { DocumentBuilder builder = dbf.newDocumentBuilder(); InputStream in = TestDom. class .getClassLoader().getResourceAsStream( "test.xml" ); Document doc = builder.parse(in); // root <university> Element root = doc.getDocumentElement(); if (root == null ) return ; System.err.println(root.getAttribute( "name" )); // all college node NodeList collegeNodes = root.getChildNodes(); if (collegeNodes == null ) return ; for ( int i = 0 ; i < collegeNodes.getLength(); i++) { Node college = collegeNodes.item(i); if (college != null && college.getNodeType() == Node.ELEMENT_NODE) { System.err.println( "\t" + college.getAttributes().getNamedItem( "name" ).getNodeValue()); // all class node NodeList classNodes = college.getChildNodes(); if (classNodes == null ) continue ; for ( int j = 0 ; j < classNodes.getLength(); j++) { Node clazz = classNodes.item(j); if (clazz != null && clazz.getNodeType() == Node.ELEMENT_NODE) { System.err.println( "\t\t" + clazz.getAttributes().getNamedItem( "name" ).getNodeValue()); // all student node NodeList studentNodes = clazz.getChildNodes(); if (studentNodes == null ) continue ; for ( int k = 0 ; k < studentNodes.getLength(); k++) { Node student = studentNodes.item(k); if (student != null && student.getNodeType() == Node.ELEMENT_NODE) { System.err.print( "\t\t\t" + student.getAttributes().getNamedItem( "name" ).getNodeValue()); System.err.print( " " + student.getAttributes().getNamedItem( "sex" ).getNodeValue()); System.err.println( " " + student.getAttributes().getNamedItem( "age" ).getNodeValue()); } } } } } } } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static void write() { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); try { DocumentBuilder builder = dbf.newDocumentBuilder(); InputStream in = TestDom. class .getClassLoader().getResourceAsStream( "test.xml" ); Document doc = builder.parse(in); // root <university> Element root = doc.getDocumentElement(); if (root == null ) return ; // 修改屬性 root.setAttribute( "name" , "tsu" ); NodeList collegeNodes = root.getChildNodes(); if (collegeNodes != null ) { for ( int i = 0 ; i <collegeNodes.getLength() - 1 ; i++) { // 刪除節點 Node college = collegeNodes.item(i); if (college.getNodeType() == Node.ELEMENT_NODE) { String collegeName = college.getAttributes().getNamedItem( "name" ).getNodeValue(); if ( "c1" .equals(collegeName) || "c2" .equals(collegeName)) { root.removeChild(college); } else if ( "c3" .equals(collegeName)) { Element newChild = doc.createElement( "class" ); newChild.setAttribute( "name" , "c4" ); college.appendChild(newChild); } } } } // 新增節點 Element addCollege = doc.createElement( "college" ); addCollege.setAttribute( "name" , "c5" ); root.appendChild(addCollege); Text text = doc.createTextNode( "text" ); addCollege.appendChild(text); // 將修改后的文檔保存到文件 TransformerFactory transFactory = TransformerFactory.newInstance(); Transformer transFormer = transFactory.newTransformer(); DOMSource domSource = new DOMSource(doc); File file = new File( "src/dom-modify.xml" ); if (file.exists()) { file.delete(); } file.createNewFile(); FileOutputStream out = new FileOutputStream(file); StreamResult xmlResult = new StreamResult(out); transFormer.transform(domSource, xmlResult); System.out.println(file.getAbsolutePath()); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (TransformerConfigurationException e) { e.printStackTrace(); } catch (TransformerException e) { e.printStackTrace(); } } } |
該代碼只要稍做修改,即可變得更加簡潔,無需一直寫if來判斷是否有子節點。
2、【SAX (Simple API for XML) 】
SAX不用將整個文檔加載到內存,基于事件驅動的API(Observer模式),用戶只需要注冊自己感興趣的事件即可。SAX提供EntityResolver, DTDHandler, ContentHandler, ErrorHandler接口,分別用于監聽解析實體事件、DTD處理事件、正文處理事件和處理出錯事件,與AWT類似,SAX還提供了一個對這4個接口默認的類DefaultHandler(這里的默認實現,其實就是一個空方法),一般只要繼承DefaultHandler,重寫自己感興趣的事件即可。
示例代碼:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
package test.xml; import java.io.IOException; import java.io.InputStream; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.Locator; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.helpers.DefaultHandler; /** * * @author whwang */ public class TestSAX { public static void main(String[] args) { read(); write(); } public static void read() { try { SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser parser = factory.newSAXParser(); InputStream in = TestSAX. class .getClassLoader().getResourceAsStream( "test.xml" ); parser.parse(in, new MyHandler()); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static void write() { System.err.println( "純SAX對于寫操作無能為力" ); } } // 重寫對自己感興趣的事件處理方法 class MyHandler extends DefaultHandler { @Override public InputSource resolveEntity(String publicId, String systemId) throws IOException, SAXException { return super .resolveEntity(publicId, systemId); } @Override public void notationDecl(String name, String publicId, String systemId) throws SAXException { super .notationDecl(name, publicId, systemId); } @Override public void unparsedEntityDecl(String name, String publicId, String systemId, String notationName) throws SAXException { super .unparsedEntityDecl(name, publicId, systemId, notationName); } @Override public void setDocumentLocator(Locator locator) { super .setDocumentLocator(locator); } @Override public void startDocument() throws SAXException { System.err.println( "開始解析文檔" ); } @Override public void endDocument() throws SAXException { System.err.println( "解析結束" ); } @Override public void startPrefixMapping(String prefix, String uri) throws SAXException { super .startPrefixMapping(prefix, uri); } @Override public void endPrefixMapping(String prefix) throws SAXException { super .endPrefixMapping(prefix); } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { System.err.print( "Element: " + qName + ", attr: " ); print(attributes); } @Override public void endElement(String uri, String localName, String qName) throws SAXException { super .endElement(uri, localName, qName); } @Override public void characters( char [] ch, int start, int length) throws SAXException { super .characters(ch, start, length); } @Override public void ignorableWhitespace( char [] ch, int start, int length) throws SAXException { super .ignorableWhitespace(ch, start, length); } @Override public void processingInstruction(String target, String data) throws SAXException { super .processingInstruction(target, data); } @Override public void skippedEntity(String name) throws SAXException { super .skippedEntity(name); } @Override public void warning(SAXParseException e) throws SAXException { super .warning(e); } @Override public void error(SAXParseException e) throws SAXException { super .error(e); } @Override public void fatalError(SAXParseException e) throws SAXException { super .fatalError(e); } private void print(Attributes attrs) { if (attrs == null ) return ; System.err.print( "[" ); for ( int i = 0 ; i < attrs.getLength(); i++) { System.err.print(attrs.getQName(i) + " = " + attrs.getValue(i)); if (i != attrs.getLength() - 1 ) { System.err.print( ", " ); } } System.err.println( "]" ); } } |
3、【JDOM】
JDOM與DOM非常類似,它是處理XML的純JAVA API,API大量使用了Collections類,且JDOM僅使用具體類而不使用接口。 JDOM 它自身不包含解析器。它通常使用 SAX2 解析器來解析和驗證輸入 XML 文檔(盡管它還可以將以前構造的 DOM 表示作為輸入)。它包含一些轉換器以將 JDOM 表示輸出成 SAX2 事件流、DOM 模型或 XML 文本文檔
示例代碼:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
package test.xml; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.List; import org.jdom.Attribute; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.input.SAXBuilder; import org.jdom.output.XMLOutputter; /** * JDom讀寫xml * @author whwang */ public class TestJDom { public static void main(String[] args) { //read(); write(); } public static void read() { try { boolean validate = false ; SAXBuilder builder = new SAXBuilder(validate); InputStream in = TestJDom. class .getClassLoader().getResourceAsStream( "test.xml" ); Document doc = builder.build(in); // 獲取根節點 <university> Element root = doc.getRootElement(); readNode(root, "" ); } catch (JDOMException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } @SuppressWarnings ( "unchecked" ) public static void readNode(Element root, String prefix) { if (root == null ) return ; // 獲取屬性 List<Attribute> attrs = root.getAttributes(); if (attrs != null && attrs.size() > 0 ) { System.err.print(prefix); for (Attribute attr : attrs) { System.err.print(attr.getValue() + " " ); } System.err.println(); } // 獲取他的子節點 List<Element> childNodes = root.getChildren(); prefix += "\t" ; for (Element e : childNodes) { readNode(e, prefix); } } public static void write() { boolean validate = false ; try { SAXBuilder builder = new SAXBuilder(validate); InputStream in = TestJDom. class .getClassLoader().getResourceAsStream( "test.xml" ); Document doc = builder.build(in); // 獲取根節點 <university> Element root = doc.getRootElement(); // 修改屬性 root.setAttribute( "name" , "tsu" ); // 刪除 boolean isRemoved = root.removeChildren( "college" ); System.err.println(isRemoved); // 新增 Element newCollege = new Element( "college" ); newCollege.setAttribute( "name" , "new_college" ); Element newClass = new Element( "class" ); newClass.setAttribute( "name" , "ccccc" ); newCollege.addContent(newClass); root.addContent(newCollege); XMLOutputter out = new XMLOutputter(); File file = new File( "src/jdom-modify.xml" ); if (file.exists()) { file.delete(); } file.createNewFile(); FileOutputStream fos = new FileOutputStream(file); out.output(doc, fos); } catch (JDOMException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } |
4、【DOM4j】
dom4j是目前在xml解析方面是最優秀的(Hibernate、Sun的JAXM也都使用dom4j來解析XML),它合并了許多超出基本 XML 文檔表示的功能,包括集成的 XPath 支持、XML Schema 支持以及用于大文檔或流化文檔的基于事件的處理
示例代碼:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
package test.xml; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.util.List; import org.dom4j.Attribute; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.dom4j.Element; import org.dom4j.ProcessingInstruction; import org.dom4j.VisitorSupport; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; /** * Dom4j讀寫xml * @author whwang */ public class TestDom4j { public static void main(String[] args) { read1(); //read2(); //write(); } public static void read1() { try { SAXReader reader = new SAXReader(); InputStream in = TestDom4j. class .getClassLoader().getResourceAsStream( "test.xml" ); Document doc = reader.read(in); Element root = doc.getRootElement(); readNode(root, "" ); } catch (DocumentException e) { e.printStackTrace(); } } @SuppressWarnings ( "unchecked" ) public static void readNode(Element root, String prefix) { if (root == null ) return ; // 獲取屬性 List<Attribute> attrs = root.attributes(); if (attrs != null && attrs.size() > 0 ) { System.err.print(prefix); for (Attribute attr : attrs) { System.err.print(attr.getValue() + " " ); } System.err.println(); } // 獲取他的子節點 List<Element> childNodes = root.elements(); prefix += "\t" ; for (Element e : childNodes) { readNode(e, prefix); } } public static void read2() { try { SAXReader reader = new SAXReader(); InputStream in = TestDom4j. class .getClassLoader().getResourceAsStream( "test.xml" ); Document doc = reader.read(in); doc.accept( new MyVistor()); } catch (DocumentException e) { e.printStackTrace(); } } public static void write() { try { // 創建一個xml文檔 Document doc = DocumentHelper.createDocument(); Element university = doc.addElement( "university" ); university.addAttribute( "name" , "tsu" ); // 注釋 university.addComment( "這個是根節點" ); Element college = university.addElement( "college" ); college.addAttribute( "name" , "cccccc" ); college.setText( "text" ); File file = new File( "src/dom4j-modify.xml" ); if (file.exists()) { file.delete(); } file.createNewFile(); XMLWriter out = new XMLWriter( new FileWriter(file)); out.write(doc); out.flush(); out.close(); } catch (IOException e) { e.printStackTrace(); } } } class MyVistor extends VisitorSupport { public void visit(Attribute node) { System.out.println( "Attibute: " + node.getName() + "=" + node.getValue()); } public void visit(Element node) { if (node.isTextOnly()) { System.out.println( "Element: " + node.getName() + "=" + node.getText()); } else { System.out.println(node.getName()); } } @Override public void visit(ProcessingInstruction node) { System.out.println( "PI:" + node.getTarget() + " " + node.getText()); } } |
三、【性能測試】
環境:AMD4400+ 2.0+GHz主頻 JDK6.0
運行參數:-Xms400m -Xmx400m
xml文件大小:10.7M
結果:
DOM: >581297ms
SAX: 8829ms
JDOM: 581297ms
DOM4j: 5309ms
時間包括IO的,只是進行了簡單的測試,僅供參考!!!!
四、【對比】
1、【DOM】
DOM是基于樹的結構,通常需要加載整文檔和構造DOM樹,然后才能開始工作。
優點:
a、由于整棵樹在內存中,因此可以對xml文檔隨機訪問
b、可以對xml文檔進行修改操作
c、較sax,dom使用也更簡單。
缺點:
a、整個文檔必須一次性解析完
a、由于整個文檔都需要載入內存,對于大文檔成本高
2、【SAX】
SAX類似流媒體,它基于事件驅動的,因此無需將整個文檔載入內存,使用者只需要監聽自己感興趣的事件即可。
優點:
a、無需將整個xml文檔載入內存,因此消耗內存少
b、可以注冊多個ContentHandler
缺點:
a、不能隨機的訪問xml中的節點
b、不能修改文檔
3、【JDOM】
JDOM是純Java的處理XML的API,其API中大量使用Collections類,
優點:
a、DOM方式的優點
b、具有SAX的Java規則
缺點
a、DOM方式的缺點
4、【DOM4J】
這4中xml解析方式中,最優秀的一個,集易用和性能于一身。
五、【小插曲XPath】
XPath 是一門在 XML 文檔中查找信息的語言, 可用來在 XML 文檔中對元素和屬性進行遍歷。XPath 是 W3C XSLT 標準的主要元素,并且 XQuery 和 XPointer 同時被構建于 XPath 表達之上。因此,對 XPath 的理解是很多高級 XML 應用的基礎。
XPath非常類似對數據庫操作的SQL語言,或者說JQuery,它可以方便開發者抓起文檔中需要的東西。(dom4j也支持xpath)
示例代碼:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
package test.xml; import java.io.IOException; import java.io.InputStream; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import org.w3c.dom.Document; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; public class TestXPath { public static void main(String[] args) { read(); } public static void read() { try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = dbf.newDocumentBuilder(); InputStream in = TestXPath. class .getClassLoader().getResourceAsStream( "test.xml" ); Document doc = builder.parse(in); XPathFactory factory = XPathFactory.newInstance(); XPath xpath = factory.newXPath(); // 選取所有class元素的name屬性 // XPath語法介紹: http://w3school.com.cn/xpath/ XPathExpression expr = xpath.compile( "//class/@name" ); NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET); for ( int i = 0 ; i < nodes.getLength(); i++) { System.out.println( "name = " + nodes.item(i).getNodeValue()); } } catch (XPathExpressionException e) { e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } |
六、【補充】
注意4種解析方法對TextNode(文本節點)的處理:
1、在使用DOM時,調用node.getChildNodes()獲取該節點的子節點,文本節點也會被當作一個Node來返回,如:
1
2
3
4
5
6
7
8
9
10
|
<? xml version = "1.0" encoding = "UTF-8" ?> < university name = "pku" > < college name = "c1" > < class name = "class1" > < student name = "stu1" sex = 'male' age = "21" /> < student name = "stu2" sex = 'female' age = "20" /> < student name = "stu3" sex = 'female' age = "20" /> </ class > </ college > </ university > |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
package test.xml; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; /** * dom讀寫xml * @author whwang */ public class TestDom2 { public static void main(String[] args) { read(); } public static void read() { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); try { DocumentBuilder builder = dbf.newDocumentBuilder(); InputStream in = TestDom2.class.getClassLoader().getResourceAsStream("test.xml"); Document doc = builder.parse(in); // root < university > Element root = doc.getDocumentElement(); if (root == null) return; // System.err.println(root.getAttribute("name")); // all college node NodeList collegeNodes = root.getChildNodes(); if (collegeNodes == null) return; System.err.println("university子節點數:" + collegeNodes.getLength()); System.err.println("子節點如下:"); for(int i = 0; i < collegeNodes.getLength(); i++) { Node college = collegeNodes.item(i); if (college == null) continue; if (college.getNodeType() == Node.ELEMENT_NODE) { System.err.println("\t元素節點:" + college.getNodeName()); } else if (college.getNodeType() == Node.TEXT_NODE) { System.err.println("\t文本節點:" + Arrays.toString(college.getTextContent().getBytes())); } } } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } |
輸出的結果是:
1
2
3
4
5
|
university子節點數: 3 子節點如下: 文本節點:[ 10 , 9 ] 元素節點:college 文本節點:[ 10 ] |
其中\n的ASCII碼為10,\t的ASCII碼為9。結果讓人大吃一驚,university的子節點數不是1,也不是2,而是3,這3個子節點都是誰呢?為了看得更清楚點,把xml文檔改為:
1
2
3
4
5
6
7
8
9
10
11
12
|
<? xml version = "1.0" encoding = "UTF-8" ?> < university name = "pku" >11 < college name = "c1" > < class name = "class1" > < student name = "stu1" sex = 'male' age = "21" /> < student name = "stu2" sex = 'female' age = "20" /> < student name = "stu3" sex = 'female' age = "20" /> </ class > </ college >22 </ university > |
還是上面的程序,輸出結果為:
1
2
3
4
5
|
university子節點數:3 子節點如下: 文本節點:[49, 49, 10, 9] 元素節點:college 文本節點:[50, 50, 10] |
其中數字1的ASCII碼為49,數字2的ASCII碼為50。
2、使用SAX來解析同DOM,當你重寫它的public void characters(char[] ch, int start, int length)方法時,你就能看到。
3、JDOM,調用node.getChildren()只返回子節點,不包括TextNode節點(不管該節點是否有Text信息)。如果要獲取該節點的Text信息,可以調用node.getText()方法,該方法返回節點的Text信息,也包括\n\t等特殊字符。
4、DOM4j同JDOM