You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

249 lines
8.8 KiB

# encoding: utf-8
"""
Initializes oxml sub-package, including registering custom element classes
corresponding to Open XML elements.
"""
from __future__ import absolute_import
from lxml import etree
from .ns import NamespacePrefixedTag, nsmap
# configure XML parser
element_class_lookup = etree.ElementNamespaceClassLookup()
oxml_parser = etree.XMLParser(remove_blank_text=True, resolve_entities=False)
oxml_parser.set_element_class_lookup(element_class_lookup)
def parse_xml(xml):
"""
Return root lxml element obtained by parsing XML character string in
*xml*, which can be either a Python 2.x string or unicode. The custom
parser is used, so custom element classes are produced for elements in
*xml* that have them.
"""
root_element = etree.fromstring(xml, oxml_parser)
return root_element
def register_element_cls(tag, cls):
"""
Register *cls* to be constructed when the oxml parser encounters an
element with matching *tag*. *tag* is a string of the form
``nspfx:tagroot``, e.g. ``'w:document'``.
"""
nspfx, tagroot = tag.split(':')
namespace = element_class_lookup.get_namespace(nsmap[nspfx])
namespace[tagroot] = cls
def OxmlElement(nsptag_str, attrs=None, nsdecls=None):
"""
Return a 'loose' lxml element having the tag specified by *nsptag_str*.
*nsptag_str* must contain the standard namespace prefix, e.g. 'a:tbl'.
The resulting element is an instance of the custom element class for this
tag name if one is defined. A dictionary of attribute values may be
provided as *attrs*; they are set if present. All namespaces defined in
the dict *nsdecls* are declared in the element using the key as the
prefix and the value as the namespace name. If *nsdecls* is not provided,
a single namespace declaration is added based on the prefix on
*nsptag_str*.
"""
nsptag = NamespacePrefixedTag(nsptag_str)
if nsdecls is None:
nsdecls = nsptag.nsmap
return oxml_parser.makeelement(
nsptag.clark_name, attrib=attrs, nsmap=nsdecls
)
# ===========================================================================
# custom element class mappings
# ===========================================================================
from .shared import CT_DecimalNumber, CT_OnOff, CT_String # noqa
register_element_cls("w:evenAndOddHeaders", CT_OnOff)
register_element_cls("w:titlePg", CT_OnOff)
from .coreprops import CT_CoreProperties # noqa
register_element_cls('cp:coreProperties', CT_CoreProperties)
from .document import CT_Body, CT_Document # noqa
register_element_cls('w:body', CT_Body)
register_element_cls('w:document', CT_Document)
from .numbering import CT_Num, CT_Numbering, CT_NumLvl, CT_NumPr # noqa
register_element_cls('w:abstractNumId', CT_DecimalNumber)
register_element_cls('w:ilvl', CT_DecimalNumber)
register_element_cls('w:lvlOverride', CT_NumLvl)
register_element_cls('w:num', CT_Num)
register_element_cls('w:numId', CT_DecimalNumber)
register_element_cls('w:numPr', CT_NumPr)
register_element_cls('w:numbering', CT_Numbering)
register_element_cls('w:startOverride', CT_DecimalNumber)
from .section import ( # noqa
CT_HdrFtr,
CT_HdrFtrRef,
CT_PageMar,
CT_PageSz,
CT_SectPr,
CT_SectType,
)
register_element_cls("w:footerReference", CT_HdrFtrRef)
register_element_cls("w:ftr", CT_HdrFtr)
register_element_cls("w:hdr", CT_HdrFtr)
register_element_cls("w:headerReference", CT_HdrFtrRef)
register_element_cls("w:pgMar", CT_PageMar)
register_element_cls("w:pgSz", CT_PageSz)
register_element_cls("w:sectPr", CT_SectPr)
register_element_cls("w:type", CT_SectType)
from .settings import CT_Settings # noqa
register_element_cls("w:settings", CT_Settings)
from .shape import ( # noqa
CT_Blip,
CT_BlipFillProperties,
CT_GraphicalObject,
CT_GraphicalObjectData,
CT_Inline,
CT_NonVisualDrawingProps,
CT_Picture,
CT_PictureNonVisual,
CT_Point2D,
CT_PositiveSize2D,
CT_ShapeProperties,
CT_Transform2D,
)
register_element_cls('a:blip', CT_Blip)
register_element_cls('a:ext', CT_PositiveSize2D)
register_element_cls('a:graphic', CT_GraphicalObject)
register_element_cls('a:graphicData', CT_GraphicalObjectData)
register_element_cls('a:off', CT_Point2D)
register_element_cls('a:xfrm', CT_Transform2D)
register_element_cls('pic:blipFill', CT_BlipFillProperties)
register_element_cls('pic:cNvPr', CT_NonVisualDrawingProps)
register_element_cls('pic:nvPicPr', CT_PictureNonVisual)
register_element_cls('pic:pic', CT_Picture)
register_element_cls('pic:spPr', CT_ShapeProperties)
register_element_cls('wp:docPr', CT_NonVisualDrawingProps)
register_element_cls('wp:extent', CT_PositiveSize2D)
register_element_cls('wp:inline', CT_Inline)
from .styles import CT_LatentStyles, CT_LsdException, CT_Style, CT_Styles # noqa
register_element_cls('w:basedOn', CT_String)
register_element_cls('w:latentStyles', CT_LatentStyles)
register_element_cls('w:locked', CT_OnOff)
register_element_cls('w:lsdException', CT_LsdException)
register_element_cls('w:name', CT_String)
register_element_cls('w:next', CT_String)
register_element_cls('w:qFormat', CT_OnOff)
register_element_cls('w:semiHidden', CT_OnOff)
register_element_cls('w:style', CT_Style)
register_element_cls('w:styles', CT_Styles)
register_element_cls('w:uiPriority', CT_DecimalNumber)
register_element_cls('w:unhideWhenUsed', CT_OnOff)
from .table import ( # noqa
CT_Height,
CT_Row,
CT_Tbl,
CT_TblGrid,
CT_TblGridCol,
CT_TblLayoutType,
CT_TblPr,
CT_TblWidth,
CT_Tc,
CT_TcPr,
CT_TrPr,
CT_VMerge,
CT_VerticalJc,
)
register_element_cls('w:bidiVisual', CT_OnOff)
register_element_cls('w:gridCol', CT_TblGridCol)
register_element_cls('w:gridSpan', CT_DecimalNumber)
register_element_cls('w:tbl', CT_Tbl)
register_element_cls('w:tblGrid', CT_TblGrid)
register_element_cls('w:tblLayout', CT_TblLayoutType)
register_element_cls('w:tblPr', CT_TblPr)
register_element_cls('w:tblStyle', CT_String)
register_element_cls('w:tc', CT_Tc)
register_element_cls('w:tcPr', CT_TcPr)
register_element_cls('w:tcW', CT_TblWidth)
register_element_cls('w:tr', CT_Row)
register_element_cls('w:trHeight', CT_Height)
register_element_cls('w:trPr', CT_TrPr)
register_element_cls('w:vAlign', CT_VerticalJc)
register_element_cls('w:vMerge', CT_VMerge)
from .text.font import ( # noqa
CT_Color,
CT_Fonts,
CT_Highlight,
CT_HpsMeasure,
CT_RPr,
CT_Underline,
CT_VerticalAlignRun,
)
register_element_cls('w:b', CT_OnOff)
register_element_cls('w:bCs', CT_OnOff)
register_element_cls('w:caps', CT_OnOff)
register_element_cls('w:color', CT_Color)
register_element_cls('w:cs', CT_OnOff)
register_element_cls('w:dstrike', CT_OnOff)
register_element_cls('w:emboss', CT_OnOff)
register_element_cls('w:highlight', CT_Highlight)
register_element_cls('w:i', CT_OnOff)
register_element_cls('w:iCs', CT_OnOff)
register_element_cls('w:imprint', CT_OnOff)
register_element_cls('w:noProof', CT_OnOff)
register_element_cls('w:oMath', CT_OnOff)
register_element_cls('w:outline', CT_OnOff)
register_element_cls('w:rFonts', CT_Fonts)
register_element_cls('w:rPr', CT_RPr)
register_element_cls('w:rStyle', CT_String)
register_element_cls('w:rtl', CT_OnOff)
register_element_cls('w:shadow', CT_OnOff)
register_element_cls('w:smallCaps', CT_OnOff)
register_element_cls('w:snapToGrid', CT_OnOff)
register_element_cls('w:specVanish', CT_OnOff)
register_element_cls('w:strike', CT_OnOff)
register_element_cls('w:sz', CT_HpsMeasure)
register_element_cls('w:u', CT_Underline)
register_element_cls('w:vanish', CT_OnOff)
register_element_cls('w:vertAlign', CT_VerticalAlignRun)
register_element_cls('w:webHidden', CT_OnOff)
from .text.paragraph import CT_P # noqa
register_element_cls('w:p', CT_P)
from .text.parfmt import ( # noqa
CT_Ind,
CT_Jc,
CT_PPr,
CT_Spacing,
CT_TabStop,
CT_TabStops,
)
register_element_cls('w:ind', CT_Ind)
register_element_cls('w:jc', CT_Jc)
register_element_cls('w:keepLines', CT_OnOff)
register_element_cls('w:keepNext', CT_OnOff)
register_element_cls('w:pageBreakBefore', CT_OnOff)
register_element_cls('w:pPr', CT_PPr)
register_element_cls('w:pStyle', CT_String)
register_element_cls('w:spacing', CT_Spacing)
register_element_cls('w:tab', CT_TabStop)
register_element_cls('w:tabs', CT_TabStops)
register_element_cls('w:widowControl', CT_OnOff)
from .text.run import CT_Br, CT_R, CT_Text # noqa
register_element_cls('w:br', CT_Br)
register_element_cls('w:r', CT_R)
register_element_cls('w:t', CT_Text)