天天看点

python XML解析--xml.etree.ElementTree使用xml.etree.ElementTree 解析xml文档

使用xml.etree.ElementTree 解析xml文档

  • 加载xml文件,1,加载指的的字符串,ElementTree.fromstring(text)

    2.加载制定文件 ElementTree.parse(‘path’)

  • 获取element的方法:getiterator, getchildren, find方法(支持部分xpath),findall(支持部分xpath)
  • 获取值和属性,.text, .attrib[‘category’], .set
# coding=UTF-8
import xml.etree.ElementTree as ET

# parse解析xml
tree=ET.parse('./lx_xml.xml')

#获取根节点
root=tree.getroot()

# 获取根节点的tagname
tag_name=root.tag
print tag_name   #students

#获取子节点,标签名字和属性
for child in root:
    print child.tag,
    print child.attrib
# student {'no': '2009081097'}
# student {'no': '2009081098'}
# student {'no': '2009081099'}

# 查找标签,findall返回对象,需要遍历
student=root.findall("student")
for i in student:
    #get("xx")获取元素的某个属性值
    attr=i.get('no')
    #查找某一个子节点的,text
    name=i.find('name').text
    print attr
    print name

# 查找某个标签
student=root.find("student")
no=student.get('no')
print no

#支持Xpath
student1_name=root.findall(".//student/name")
for i in student1_name:
    print i.text
           

应用:

#找到所有书的节点对象
    root = ET.parse('./demo.xml')
    books = root.findall('//book')
    # print books

    #找到第二本书的属性,attrib返回的是一个字典{'category': 'cooking'}
    attrib = books[1].attrib['category']
    print attrib

    #找到第二本书的价格root
    price = root.find('//book[2]/price').text
    print price
           

封装

#coding=utf-8
import xml.etree.ElementTree as ET
import traceback

class ElementTreeXml:
    def __init__(self,filePath):
        self.filePath = filePath
        try:
            self.root = ET.parse(self.filePath)
        except:
            print 'File parse failed!'+traceback.print_exc()
            #如果不定义root,之后报错会说self.root Undefined
            self.root = None


    def getElementText(self,xpath):
        result = None
        if self.root is not None:
            try:
                element = self.root.find(xpath)
                result = element.text
            except:
                print 'Element is not found!'
                traceback.print_exc()
        return result

    def getElementAttribute(self,xpath,name):
        result = None
        if self.root is not None:
            try:
                element = self.root.find(xpath)
                result = element.attrib[name]
            except:
                print 'Element is not found!'
                traceback.print_exc()
        return result
        
 if __name__ == '__main__':
    root = ElementTreeXml('./demo.xml')
    book2Text = root.getElementText('//book[2]/price')
    book2Attrib = root.getElementAttribute('//book[2]','category')
    print book2Text
    print book2Attrib