1.准备样例文件website.xml:
<website>
<page name="index" title="Home Page">
<h1>Welcome to My Home Page</h1>
<p>Hi, there. My name is Mr. Yang. and This is my home page. Here are some of my interests:</p>
<ul>
<li><a href="interests/shouting.html">Shouting</a></li>
<li><a href="interests/Sleeping.html">Sleeping</a></li>
<li><a href="interests/eating.html">Eating</a></li>
</ul>
</page>
<directory name="interests">
<page name="shouting" title="Shouting">
<h1>Shouting Page of Mr. yang</h1>
<p>...</p>
</page>
<page name="sleeping" title="Sleeping">
<h1>Sleeping Page of Mr. yang</h1>
<page name="eating" title="Eating">
<h1>Eating Page of Mr. yang</h1>
</directory>
</website>
2. 简单的实现,编写python脚本xml_1.py:
#! /usr/bin/env python2.6
# Written by Tony.yang
#
from xml.sax import parse
from xml.sax.handler import ContentHandler
class PageMaker(ContentHandler):
passthrough = False
def startElement(self, name, attrs):
if name == 'page':
self.passthrough = True
self.out = open(attrs['name'] + '.html', 'w')
self.out.write('<html><head>\n')
self.out.write('<title>%s</title>\n' % attrs['title'])
self.out.write('</head><body>\n')
elif self.passthrough:
self.out.write('<' + name)
for key, val in attrs.items():
self.out.write('%s="%s"' % (key, val))
self.out.write('>')
def endElement(self, name):
self.passthrough = False
self.out.write('\n</body></html>\n')
self.out.close()
self.out.write('</%s>' % name)
def characters(self, chars):
if self.passthrough: self.out.write(chars)
parse('website.xml', PageMaker())
3. 优化后的脚本:(添加了一些其他功能):
xml_2.py:
import os
class Dispatcher:
def dispatch(self, prefix, name, attrs=None):
mname = prefix + name.capitalize()
dname = 'default' + prefix.capitalize()
method = getattr(self, mname, None)
if callable(method):
args = ()
else:
method = getattr(self, dname, None)
args = name,
if prefix == 'start':
args += attrs,
method(*args)
self.dispatch('start', name, attrs)
self.dispatch('end', name)
class WebsiteConstructor(Dispatcher, ContentHandler):
def __init__(self, directory):
self.directory = [directory]
self.ensureDirectory()
def ensureDirectory(self):
path = os.path.join(*self.directory)
if not os.path.isdir(path): os.makedirs(path)
def defaultStart(self, name, attrs):
if self.passthrough:
self.out.write(' %s="%s"' % (key, val))
def defaultEnd(self, name):
def startDirectory(self, attrs):
self.directory.append(attrs['name'])
def endDirectory(self):
self.directory.pop()
def startPage(self, attrs):
filename = os.path.join(*self.directory + [attrs['name'] + '.html'])
self.out = open(filename, 'w')
self.writeHeader(attrs['title'])
self.passthrough = True
def endPage(self):
self.passthrough = False
self.writeFooter()
self.out.close()
def writeHeader(self, title):
self.out.write('<html>\n<head>\n <title>')
self.out.write(title)
self.out.write('</title>\n</head>\n <body>\n')
def writeFooter(self):
self.out.write('\n</body>\n</html>\n')
parse('website.xml', WebsiteConstructor('public_html'))