天天看点

python-object-two<xml->html>_1

1.准备样例文件website.xml:

<website>

  <page name="index" title="Home Page">

    <h1>Welcome to My Home Page</h1>

    <p>Hi, there. My name is Mr. Yang. and This is my home page. Here are some of my interests:</p>

    <ul>

      <li><a href="interests/shouting.html">Shouting</a></li>

      <li><a href="interests/Sleeping.html">Sleeping</a></li>

      <li><a href="interests/eating.html">Eating</a></li>

    </ul>

  </page>

  <directory name="interests">

    <page name="shouting" title="Shouting">

      <h1>Shouting Page of Mr. yang</h1>

      <p>...</p>

    </page>

    <page name="sleeping" title="Sleeping">

      <h1>Sleeping Page of Mr. yang</h1>

    <page name="eating" title="Eating">

      <h1>Eating Page of Mr. yang</h1>

  </directory>

</website>

2. 简单的实现,编写python脚本xml_1.py:

#! /usr/bin/env python2.6

# Written by Tony.yang

#

from xml.sax import parse

from xml.sax.handler import ContentHandler

class PageMaker(ContentHandler):

        passthrough = False

        def startElement(self, name, attrs):

                if name == 'page':

                        self.passthrough = True

                        self.out = open(attrs['name'] + '.html', 'w')

                        self.out.write('<html><head>\n')

                        self.out.write('<title>%s</title>\n' % attrs['title'])

                        self.out.write('</head><body>\n')

                elif self.passthrough:

                        self.out.write('<' + name)

                        for key, val in attrs.items():

                                self.out.write('%s="%s"' % (key, val))

                        self.out.write('>')

        def endElement(self, name):

                        self.passthrough = False

                        self.out.write('\n</body></html>\n')

                        self.out.close()

                        self.out.write('</%s>' % name)

        def characters(self, chars):

                if self.passthrough: self.out.write(chars)

parse('website.xml', PageMaker())

 3. 优化后的脚本:(添加了一些其他功能):

xml_2.py:

import os

class Dispatcher:

        def dispatch(self, prefix, name, attrs=None):

                mname = prefix + name.capitalize()

                dname = 'default' + prefix.capitalize()

                method = getattr(self, mname, None)

                if callable(method):

                        args = ()

                else:

                        method = getattr(self, dname, None)

                        args = name,

                if prefix == 'start':

                        args += attrs,

                        method(*args)

                self.dispatch('start', name, attrs)

                self.dispatch('end', name)

class WebsiteConstructor(Dispatcher, ContentHandler):

        def __init__(self, directory):

                self.directory = [directory]

                self.ensureDirectory()

        def ensureDirectory(self):

                path = os.path.join(*self.directory)

                if not os.path.isdir(path): os.makedirs(path)

        def defaultStart(self, name, attrs):

                if self.passthrough:

                                self.out.write(' %s="%s"' % (key, val))

        def defaultEnd(self, name):

        def startDirectory(self, attrs):

                self.directory.append(attrs['name'])

        def endDirectory(self):

                self.directory.pop()

        def startPage(self, attrs):

                filename = os.path.join(*self.directory + [attrs['name'] + '.html'])

                self.out = open(filename, 'w')

                self.writeHeader(attrs['title'])

                self.passthrough = True

        def endPage(self):

                self.passthrough = False

                self.writeFooter()

                self.out.close()

        def writeHeader(self, title):

                self.out.write('<html>\n<head>\n    <title>')

                self.out.write(title)

                self.out.write('</title>\n</head>\n    <body>\n')

        def writeFooter(self):

                self.out.write('\n</body>\n</html>\n')

parse('website.xml', WebsiteConstructor('public_html'))