#!/usr/bin/env python """ :Author: Ollie Rutherfurd :Contact: oliver@rutherfurd.net :Revision: $Revision: 1.4 $ :Date: $Date: 2003/02/20 23:10:10 $ :Copyright: This module has been placed in the public domain. DocBook document tree Writer. This Writer converts a reST document tree to a subset of DocBook. .. Note:: This is an unfinished work in progress. Document Types ============== This writer can create 3 types of DocBook documents: 1. "article" *(default)* 2. "book" 3. "chapter" .. Note:: When creating a "book" document, all first-level sections are output as "chapter" elements instead of "section" as in "article" and "chapter". Mappings ======== Option List ----------- As there is no direct equivlent for a listing of program options in DocBook_, as defined in reST_, a table containing the option list contents is generated. Field List ---------- Like `Option List`_, there is not direct equivlent for a Field List in DocBook, so this is done using a "variablelist". .. NOTE:: It might be better to switch Definition List to glossary or something similar, so Field List and Definition List are generating the same type of output. Bibliography Elements --------------------- Here's how reST's bibliography elements are mapped to DocBook elements: +--------------+---------------------------------------------+ | reST Element | DocBook Element | +==============+=============================================+ | author | {doctype}info/author/othername | | | or | | | {doctype}info/authorgroup/author/othername | | | if nested under ``authors`` | +--------------+---------------------------------------------+ | authors | {doctype}info/authorgroup/ | +--------------+---------------------------------------------+ | contact | {doctype}info/author/email | +--------------+---------------------------------------------+ | copyright | {doctype}info/legalnotice | +--------------+---------------------------------------------+ | date | {doctype}info/date | +--------------+---------------------------------------------+ | organization | {doctype}info/orgname | +--------------+---------------------------------------------+ | revision | concatenated with ``version`` into | | | {doctype}info/edition | +--------------+---------------------------------------------+ | status | {doctype}info/releaseinfo | +--------------+---------------------------------------------+ | version | concatenated with ``revision`` into | | | {doctype}info/edition | +--------------+---------------------------------------------+ Note: ``{doctype}`` is the type of the DocBook document being generated, one of the following: ``article``, ``book``, or ``chapter``. Todo ==== - Inline images -- need to figure out how to identify an inline image - list item marks are not guarenteed to be what was specified (if they are it is be coincidence, however unless one starts out of order they should match most of the time). Should para, note, etc... not in a section at the start of the document be stuffed into an untitled ``section``? """ __docformat__ = 'reStructuredText' import re import string from docutils import writers, nodes, languages from types import ListType class Writer(writers.Writer): settings_spec = ( 'DocBook-Specific Options', None, (('Set DocBook document type. ' 'Choices are "article", "book", and "chapter". ' 'Default is "article".', ['--doctype'], {'default': 'article', 'metavar': '', 'type': 'choice', 'choices': ('article', 'book', 'chapter',) } ), ) ) output = None """Final translated form of `document`.""" def translate(self): visitor = DocBookTranslator(self.document) self.document.walkabout(visitor) self.output = visitor.astext() class DocBookTranslator(nodes.NodeVisitor): XML_DECL = '\n' DOCTYPE_DECL = """\n""" def __init__(self, document): nodes.NodeVisitor.__init__(self, document) self.language = languages.get_language( document.settings.language_code) self.doctype = document.settings.doctype self.doc_header = [ self.XML_DECL % (document.settings.output_encoding,), self.DOCTYPE_DECL % (self.doctype,), '<%s>\n' % (self.doctype,), ] self.doc_footer = [ '\n' % (self.doctype,) ] self.body = [] self.section = 0 self.context = [] self.colnames = [] self.footnotes = {} self.footnote_map = {} self.docinfo = [] def astext(self): return ''.join(self.doc_header + self.docinfo + self.body + self.doc_footer) def encode(self, text): """Encode special characters in `text` & return.""" # @@@ A codec to do these and all other # HTML entities would be nice. text = text.replace("&", "&") text = text.replace("<", "<") text = text.replace('"', """) text = text.replace(">", ">") return text def rearrange_footnotes(self): """ Replaces ``foonote_reference`` placeholders with ``footnote`` element content as DocBook and reST handle footnotes differently. DocBook defines footnotes inline, whereas they may be anywere in reST. This function replaces the first instance of a ``footnote_reference`` with the ``footnote`` element itself, and later references of the same a footnote with ``footnoteref`` elements. """ for (footnote_id,refs) in self.footnote_map.items(): ref_id, context, pos = refs[0] context[pos] = ''.join(self.footnotes[footnote_id]) for ref_id, context, pos in refs[1:]: context[pos] = '' \ % (footnote_id,) def attval(self, text, transtable=string.maketrans('\n\r\t\v\f', ' ')): """Cleanse, encode, and return attribute value text.""" return self.encode(text.translate(transtable)) def starttag(self, node, tagname, suffix='\n', infix='', **attributes): """ Construct and return a start tag given a node (id & class attributes are extracted), tag name, and optional attributes. """ atts = {} for (name, value) in attributes.items(): atts[name.lower()] = value for att in ('id',): # node attribute overrides if node.has_key(att): atts[att] = node[att] attlist = atts.items() attlist.sort() parts = [tagname.lower()] for name, value in attlist: if value is None: # boolean attribute # According to the HTML spec, ```` is good, # ```` is bad. # (But the XHTML (XML) spec says the opposite. ) parts.append(name.lower()) elif isinstance(value, ListType): values = [str(v) for v in value] parts.append('%s="%s"' % (name.lower(), self.attval(' '.join(values)))) else: parts.append('%s="%s"' % (name.lower(), self.attval(str(value)))) return '<%s%s>%s' % (' '.join(parts), infix, suffix) def emptytag(self, node, tagname, suffix='\n', **attributes): """Construct and return an XML-compatible empty tag.""" return self.starttag(node, tagname, suffix, infix=' /', **attributes) def visit_Text(self, node): self.body.append(self.encode(node.astext())) def depart_Text(self, node): pass def visit_attention(self, node): self.body.append(self.starttag(node, 'note')) self.body.append('\n%s\n' % (self.language.labels[node.tagname],)) def depart_attention(self, node): self.body.append('\n') # author is handled in ``visit_docinfo()`` def visit_author(self, node): raise nodes.SkipNode # authors is handled in ``visit_docinfo()`` def visit_authors(self, node): raise nodes.SkipNode def visit_block_quote(self, node): self.body.append(self.starttag(node, 'blockquote')) def depart_block_quote(self, node): self.body.append('\n') def visit_bullet_list(self, node): self.body.append(self.starttag(node, 'itemizedlist')) def depart_bullet_list(self, node): self.body.append('\n') def visit_caption(self, node): # NOTE: ideally, this should probably be stuffed into # the mediaobject as a "caption" element self.body.append(self.starttag(node, 'para')) def depart_caption(self, node): self.body.append('') def visit_caution(self, node): self.body.append(self.starttag(node, 'caution')) self.body.append('\n%s\n' % (self.language.labels[node.tagname],)) def depart_caution(self, node): self.body.append('\n') # reST seems to handle citations as a labled # footnotes, whereas DocBook doesn't from what # I can tell, so I'm not sure how to give DocBook # citations that result in equivlent output # as the docutils html writer. # # Currently, citations are handled as footnotes, # using the citation label as the footnote label # which seems functionally equivlent, but the # DocBook stylesheets for generating HTML output # don't seem to be using the label for foonotes # so this doesn't work. # # So I'm at a bit of a loss as to how to # handle citations. Any ideas or suggestions would # be welcome. # TODO: citation def visit_citation(self, node): self.visit_footnote(node) def depart_citation(self, node): self.depart_footnote(node) # TODO: citation_reference def visit_citation_reference(self, node): self.visit_footnote_reference(node) def depart_citation_reference(self, node): pass def visit_classifier(self, node): self.body.append(' : ') self.body.append(self.starttag(node, 'type')) def depart_classifier(self, node): self.body.append('\n') def visit_colspec(self, node): self.colnames.append('col_%d' % (len(self.colnames) + 1,)) atts = {'colname': self.colnames[-1]} self.body.append(self.emptytag(node, 'colspec', **atts)) def depart_colspec(self, node): pass def visit_comment(self, node, sub=re.compile('-(?=-)').sub): """Escape double-dashes in comment text.""" self.body.append('\n' % sub('- ', node.astext())) raise nodes.SkipNode # contact is handled in ``visit_docinfo()`` def visit_contact(self, node): raise nodes.SkipNode # copyright is handled in ``visit_docinfo()`` def visit_copyright(self, node): raise nodes.SkipNode def visit_danger(self, node): self.body.append(self.starttag(node, 'caution')) self.body.append('\n%s\n' % (self.language.labels[node.tagname],)) def depart_danger(self, node): self.body.append('\n') # date is handled in ``visit_docinfo()`` def visit_date(self, node): raise nodes.SkipNode # TODO: decoration def visit_decoration(self, node): pass def depart_decoration(self, node): pass def visit_definition(self, node): # "term" is not closed in depart_term self.body.append('\n') self.body.append(self.starttag(node, 'listitem')) def depart_definition(self, node): self.body.append('\n') def visit_definition_list(self, node): self.body.append(self.starttag(node, 'variablelist')) def depart_definition_list(self, node): self.body.append('\n') def visit_definition_list_item(self, node): self.body.append(self.starttag(node, 'varlistentry')) def depart_definition_list_item(self, node): self.body.append('\n') def visit_description(self, node): self.body.append(self.starttag(node, 'entry')) def depart_description(self, node): self.body.append('\n') def visit_docinfo(self, node): """ Collects all docinfo elements for the document. Since reST's bibliography elements don't map very cleanly to DocBook, rather than maintain state and check dependencies within the different visitor fuctions all processing of bibliography elements is dont within this function. .. NOTE:: Skips processing of all child nodes as everything should be collected here. """ docinfo = ['<%sinfo>\n' % self.doctype] authors = [] author = '' contact = '' date = '' legalnotice = '' orgname = '' releaseinfo = '' revision,version = '','' for n in node: if isinstance(n, nodes.author): author = n.astext() elif isinstance(n, nodes.authors): for a in n: authors.append(a.astext()) elif isinstance(n, nodes.contact): contact = n.astext() elif isinstance(n, nodes.copyright): legalnotice = n.astext() elif isinstance(n, nodes.date): date = n.astext() elif isinstance(n, nodes.organization): orgname = n.astext() elif isinstance(n, nodes.revision): revision = 'Revision ' + n.astext() elif isinstance(n, nodes.status): releaseinfo = n.astext() elif isinstance(n, nodes.version): version = 'Version ' + n.astext() # since all child nodes are handled here raise an exception # if node is not handled, so it doesn't silently slip through. else: raise self.unimplemented_visit(n) # can only add author if name is present # since contact is associate with author, the contact # can also only be added if an author name is given. if author: docinfo.append('\n') docinfo.append('%s\n' % author) if contact: docinfo.append('%s\n' % contact) docinfo.append('\n') if authors: docinfo.append('\n') for name in authors: docinfo.append( '%s\n' % name) docinfo.append('\n') if revision or version: edition = version if edition and revision: edition += ', ' + revision elif revision: edition = revision docinfo.append('%s\n' % edition) if date: docinfo.append('%s\n' % date) if orgname: docinfo.append('%s\n' % orgname) if releaseinfo: docinfo.append('%s\n' % releaseinfo) if legalnotice: docinfo.append('\n') docinfo.append('%s\n' % legalnotice) docinfo.append('\n') if len(docinfo) > 1: docinfo.append('\n' % self.doctype) self.docinfo = docinfo raise nodes.SkipChildren def depart_docinfo(self, node): pass def visit_doctest_block(self, node): self.body.append('\n') self.body.append(self.starttag(node, 'programlisting')) def depart_doctest_block(self, node): self.body.append('\n') self.body.append('\n') def visit_document(self, node): pass def depart_document(self, node): self.rearrange_footnotes() def visit_emphasis(self, node): self.body.append(self.starttag(node, 'emphasis')) def depart_emphasis(self, node): self.body.append('\n') def visit_entry(self, node): tagname = 'entry' atts = {} if node.has_key('morerows'): atts['morerows'] = node['morerows'] if node.has_key('morecols'): atts['namest'] = self.colnames[self.entry_level] atts['nameend'] = self.colnames[self.entry_level \ + node['morecols']] self.entry_level += 1 # for tracking what namest and nameend are self.body.append(self.starttag(node, tagname, **atts)) def depart_entry(self, node): self.body.append('\n') def visit_enumerated_list(self, node): # TODO: need to specify "mark" type used for list items self.body.append(self.starttag(node, 'orderedlist')) def depart_enumerated_list(self, node): self.body.append('\n') def visit_error(self, node): self.body.append(self.starttag(node, 'caution')) self.body.append('\n%s\n' % (self.language.labels[node.tagname],)) def depart_error(self, node): self.body.append('\n') # TODO: wrap with some element (filename used in DocBook example) def visit_field(self, node): self.body.append(self.starttag(node, 'varlistentry')) def depart_field(self, node): self.body.append('\n') # TODO: see if this should be wrapped with some element def visit_field_argument(self, node): self.body.append(' ') def depart_field_argument(self, node): pass def visit_field_body(self, node): # NOTE: this requires that a field body always # be present, which looks like the case # (from docutils.dtd) self.body.append(self.context.pop()) self.body.append(self.starttag(node, 'listitem')) def depart_field_body(self, node): self.body.append('\n') def visit_field_list(self, node): self.body.append(self.starttag(node, 'variablelist')) def depart_field_list(self, node): self.body.append('\n') def visit_field_name(self, node): self.body.append(self.starttag(node, 'term')) # popped by visit_field_body, so "field_argument" is # content within "term" self.context.append('\n') def depart_field_name(self, node): pass def visit_figure(self, node): self.body.append(self.starttag(node, 'informalfigure')) self.body.append('
') def depart_figure(self, node): self.body.append('
') self.body.append('\n') # TODO: footer (this is where 'generated by docutils' arrives) # if that's all that will be there, it could map to "colophon" def visit_footer(self, node): raise nodes.SkipChildren def depart_footer(self, node): pass def visit_footnote(self, node): self.footnotes[node['id']] = [] atts = {'id': node['id']} if isinstance(node[0], nodes.label): # FIXME: this fails with the second auto-sequenece character # used in the test document ``test.txt``. atts['label'] = node[0].astext() self.footnotes[node['id']].append( self.starttag(node, 'footnote', **atts)) # replace body with this with a footnote collector list # which will hold all the contents for this footnote. # This needs to be kept separate so it can be used to replace # the first ``footnote_reference`` as DocBook defines # ``footnote`` elements inline. self._body = self.body self.body = self.footnotes[node['id']] def depart_footnote(self, node): # finish footnote and then replace footnote collector # with real body list. self.footnotes[node['id']].append('\n') self.body = self._body self._body = None def visit_footnote_reference(self, node): if node.has_key('refid'): refid = node['refid'] else: refid = self.document.nameids[node['refname']] # going to replace this footnote reference with the actual # footnote later on, so store the footnote id to replace # this reference with and the list and position to replace it # in. Both list and position are stored in case a footnote # reference is within a footnote, in which case ``self.body`` # won't really be ``self.body`` but a footnote collector # list. refs = self.footnote_map.get(refid, []) refs.append((node['id'], self.body, len(self.body),)) self.footnote_map[refid] = refs # add place holder list item which should later be # replaced with the contents of the footnote element # and it's child elements self.body.append('') raise nodes.SkipNode # TODO: header def visit_hint(self, node): self.body.append(self.starttag(node, 'note')) self.body.append('\n%s\n' % (self.language.labels[node.tagname],)) def depart_hint(self, node): self.body.append('\n') def visit_image(self, node): atts = node.attributes.copy() atts['fileref'] = atts['uri'] alt = None del atts['uri'] if atts.has_key('alt'): alt = atts['alt'] del atts['alt'] if atts.has_key('height'): atts['depth'] = atts['height'] del atts['height'] # NOTE: using win32 port of xsltproc and docbook-stylesheets-1.51.1 # I'm getting the following error when transforming: # Error C:\home\igor\src\gnome-xml\xpath.c:8023: Undefined # namespace prefix xmlXPathCompiledEval: evaluation failed # When I switched to version 1.49 of the docbook-stylesheets # I didn't have this problem. self.body.append('\n') self.body.append('\n') self.body.append(self.emptytag(node, 'imagedata', **atts)) self.body.append('\n') if alt: self.body.append('' \ '%s\n' % alt) self.body.append('\n') def depart_image(self, node): pass def visit_important(self, node): self.body.append(self.starttag(node, 'important')) def depart_important(self, node): self.body.append('') # @@@ Incomplete, pending a proper implementation on the # Parser/Reader end. def visit_interpreted(self, node): self.body.append('\n') def depart_interpreted(self, node): self.body.append('\n') def visit_label(self, node): # getting label for "footnote" in ``visit_footnote`` # because label is an attribute for the ``footnote`` # element. if isinstance(node.parent, nodes.footnote): raise nodes.SkipNode # TODO: handle citation label elif isinstance(node.parent, nodes.citation): raise nodes.SkipNode def depart_label(self, node): pass def visit_legend(self, node): # TODO: explain why this is empty.... pass def depart_legend(self, node): pass def visit_list_item(self, node): self.body.append(self.starttag(node, 'listitem')) def depart_list_item(self, node): self.body.append('\n') def visit_literal(self, node): self.body.append('') def depart_literal(self, node): self.body.append('') def visit_literal_block(self, node): self.body.append(self.starttag(node, 'programlisting')) def depart_literal_block(self, node): self.body.append('\n') def visit_note(self, node): self.body.append(self.starttag(node, 'note')) self.body.append('\n%s\n' % (self.language.labels[node.tagname],)) def depart_note(self, node): self.body.append('\n') def visit_option(self, node): self.body.append(self.starttag(node, 'command')) if self.context[-1]: self.body.append(', ') def depart_option(self, node): self.context[-1] += 1 self.body.append('') def visit_option_argument(self, node): self.body.append(node.get('delimiter', ' ')) self.body.append(self.starttag(node, 'replaceable', '')) def depart_option_argument(self, node): self.body.append('') def visit_option_group(self, node): self.body.append(self.starttag(node, 'entry')) self.context.append(0) def depart_option_group(self, node): self.context.pop() self.body.append('\n') def visit_option_list(self, node): self.body.append(self.starttag(node, 'informaltable', frame='all')) self.body.append('\n') self.body.append('\n') self.body.append('\n') self.body.append('\n') self.body.append('\n') # FIXME: shouldn't hardcode everything... self.body.append('Option\n') self.body.append('Description\n') self.body.append('\n') self.body.append('\n') self.body.append('\n') def depart_option_list(self, node): self.body.append('') self.body.append('\n') self.body.append('\n') def visit_option_list_item(self, node): self.body.append(self.starttag(node, 'row')) def depart_option_list_item(self, node): self.body.append('\n') def visit_option_string(self, node): pass def depart_option_string(self, node): pass # organization is handled in ``visit_docinfo()`` def visit_organization(self, node): raise nodes.SkipNode def visit_paragraph(self, node): self.body.append(self.starttag(node, 'para', '')) def depart_paragraph(self, node): self.body.append('\n') # TODO: problematic visit_problematic = depart_problematic = lambda self, node: None def visit_raw(self, node): if node.has_key('format') and node['format'] == 'docbook': self.body.append(node.astext()) raise node.SkipNode def visit_reference(self, node): atts = {} if node.has_key('refuri'): atts['url'] = node['refuri'] self.context.append('ulink') elif node.has_key('refid'): atts['linkend'] = node['refid'] self.context.append('link') elif node.has_key('refname'): atts['linkend'] = self.document.nameids[node['refname']] self.context.append('link') self.body.append(self.starttag(node, self.context[-1], '', **atts)) def depart_reference(self, node): self.body.append('' % (self.context.pop(),)) # revision is handled in ``visit_docinfo()`` def visit_revision(self, node): raise nodes.SkipNode def visit_row(self, node): self.entry_level = 0 self.body.append(self.starttag(node, 'row')) def depart_row(self, node): self.body.append('\n') def visit_section(self, node): if self.section == 0 and self.doctype == 'book': self.body.append(self.starttag(node, 'chapter')) else: self.body.append(self.starttag(node, 'section')) self.section += 1 def depart_section(self, node): self.section -= 1 if self.section == 0 and self.doctype == 'book': self.body.append('\n') else: self.body.append('\n') def visit_sidebar(self, node): self.body.append(self.starttag(node, 'sidebar')) def depart_sidebar(self, node): self.body.append('\n') # author is handled in ``visit_docinfo()`` def visit_status(self, node): raise nodes.SkipNode def visit_strong(self, node): self.body.append(self.starttag(node, 'emphasis', role='strong')) def depart_strong(self, node): self.body.append('\n') def visit_substitution_definition(self, node): raise nodes.SkipNode def visit_substitution_reference(self, node): self.unimplemented_visit(node) def visit_subtitle(self, node): self.body.append(self.starttag(node, 'subtitle')) def depart_subtitle(self, node): self.body.append('\n') # TODO: system_message visit_system_message = depart_system_message = lambda self, node: None def visit_table(self, node): self.body.append( self.starttag(node, 'informaltable', frame='all') ) def depart_table(self, node): self.body.append('\n') # TODO: target visit_target = depart_target = lambda self,node: None def visit_tbody(self, node): self.body.append(self.starttag(node, 'tbody')) def depart_tbody(self, node): self.body.append('\n') def visit_term(self, node): self.body.append(self.starttag(node, 'term')) self.body.append('') def depart_term(self, node): # Leave the end tag "term" to ``visit_definition()``, # in case there's a classifier. self.body.append('') def visit_tgroup(self, node): self.colnames = [] atts = {'cols': node['cols']} self.body.append(self.starttag(node, 'tgroup', **atts)) def depart_tgroup(self, node): self.body.append('\n') def visit_thead(self, node): self.body.append(self.starttag(node, 'thead')) def depart_thead(self, node): self.body.append('\n') def visit_tip(self, node): self.body.append(self.starttag(node, 'tip')) def depart_tip(self, node): self.body.append('\n') def visit_title(self, node): self.body.append(self.starttag(node, 'title')) def depart_title(self, node): self.body.append('\n') def visit_topic(self, node): # Table of Contents generation handled by DocBook if node.get('class') == 'contents': raise nodes.SkipChildren elif node.get('class') == 'abstract': self.body.append(self.starttag(node, 'abstract')) self.context.append('abstract') else: print node print `node` print dir(node) self.unimplemented_visit(node) def depart_topic(self, node): if len(self.context): self.body.append('\n' % (self.context.pop(),)) # QUESTION: what to do for "transition"? def visit_transition(self, node): pass def depart_transition(self, node): pass # author is handled in ``visit_docinfo()`` def visit_version(self, node): raise nodes.SkipNode def visit_warning(self, node): self.body.append(self.starttag(node, 'warning')) def depart_warning(self, node): self.body.append('\n') def unimplemented_visit(self, node): raise NotImplementedError('visiting unimplemented node type: %s' % node.__class__.__name__) # :collapseFolds=0:folding=sidekick:indentSize=4: # :lineSeparator=\n:noTabs=true:tabSize=4: