You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

232 lines
8.3 KiB

# encoding: utf-8
"""Objects that implement reading and writing OPC packages."""
from __future__ import absolute_import, division, print_function, unicode_literals
from docx.opc.constants import RELATIONSHIP_TYPE as RT
from docx.opc.packuri import PACKAGE_URI, PackURI
from docx.opc.part import PartFactory
from docx.opc.parts.coreprops import CorePropertiesPart
from docx.opc.pkgreader import PackageReader
from docx.opc.pkgwriter import PackageWriter
from docx.opc.rel import Relationships
from docx.opc.shared import lazyproperty
class OpcPackage(object):
"""Main API class for |python-opc|.
A new instance is constructed by calling the :meth:`open` class method with a path
to a package file or file-like object containing one.
"""
def __init__(self):
super(OpcPackage, self).__init__()
def after_unmarshal(self):
"""
Entry point for any post-unmarshaling processing. May be overridden
by subclasses without forwarding call to super.
"""
# don't place any code here, just catch call if not overridden by
# subclass
pass
@property
def core_properties(self):
"""
|CoreProperties| object providing read/write access to the Dublin
Core properties for this document.
"""
return self._core_properties_part.core_properties
def iter_rels(self):
"""
Generate exactly one reference to each relationship in the package by
performing a depth-first traversal of the rels graph.
"""
def walk_rels(source, visited=None):
visited = [] if visited is None else visited
for rel in source.rels.values():
yield rel
if rel.is_external:
continue
part = rel.target_part
if part in visited:
continue
visited.append(part)
new_source = part
for rel in walk_rels(new_source, visited):
yield rel
for rel in walk_rels(self):
yield rel
def iter_parts(self):
"""
Generate exactly one reference to each of the parts in the package by
performing a depth-first traversal of the rels graph.
"""
def walk_parts(source, visited=list()):
for rel in source.rels.values():
if rel.is_external:
continue
part = rel.target_part
if part in visited:
continue
visited.append(part)
yield part
new_source = part
for part in walk_parts(new_source, visited):
yield part
for part in walk_parts(self):
yield part
def load_rel(self, reltype, target, rId, is_external=False):
"""
Return newly added |_Relationship| instance of *reltype* between this
part and *target* with key *rId*. Target mode is set to
``RTM.EXTERNAL`` if *is_external* is |True|. Intended for use during
load from a serialized package, where the rId is well known. Other
methods exist for adding a new relationship to the package during
processing.
"""
return self.rels.add_relationship(reltype, target, rId, is_external)
@property
def main_document_part(self):
"""
Return a reference to the main document part for this package.
Examples include a document part for a WordprocessingML package, a
presentation part for a PresentationML package, or a workbook part
for a SpreadsheetML package.
"""
return self.part_related_by(RT.OFFICE_DOCUMENT)
def next_partname(self, template):
"""Return a |PackURI| instance representing partname matching *template*.
The returned part-name has the next available numeric suffix to distinguish it
from other parts of its type. *template* is a printf (%)-style template string
containing a single replacement item, a '%d' to be used to insert the integer
portion of the partname. Example: "/word/header%d.xml"
"""
partnames = {part.partname for part in self.iter_parts()}
for n in range(1, len(partnames) + 2):
candidate_partname = template % n
if candidate_partname not in partnames:
return PackURI(candidate_partname)
@classmethod
def open(cls, pkg_file):
"""
Return an |OpcPackage| instance loaded with the contents of
*pkg_file*.
"""
pkg_reader = PackageReader.from_file(pkg_file)
package = cls()
Unmarshaller.unmarshal(pkg_reader, package, PartFactory)
return package
def part_related_by(self, reltype):
"""
Return part to which this package has a relationship of *reltype*.
Raises |KeyError| if no such relationship is found and |ValueError|
if more than one such relationship is found.
"""
return self.rels.part_with_reltype(reltype)
@property
def parts(self):
"""
Return a list containing a reference to each of the parts in this
package.
"""
return [part for part in self.iter_parts()]
def relate_to(self, part, reltype):
"""
Return rId key of relationship to *part*, from the existing
relationship if there is one, otherwise a newly created one.
"""
rel = self.rels.get_or_add(reltype, part)
return rel.rId
@lazyproperty
def rels(self):
"""
Return a reference to the |Relationships| instance holding the
collection of relationships for this package.
"""
return Relationships(PACKAGE_URI.baseURI)
def save(self, pkg_file):
"""
Save this package to *pkg_file*, where *file* can be either a path to
a file (a string) or a file-like object.
"""
for part in self.parts:
part.before_marshal()
PackageWriter.write(pkg_file, self.rels, self.parts)
@property
def _core_properties_part(self):
"""
|CorePropertiesPart| object related to this package. Creates
a default core properties part if one is not present (not common).
"""
try:
return self.part_related_by(RT.CORE_PROPERTIES)
except KeyError:
core_properties_part = CorePropertiesPart.default(self)
self.relate_to(core_properties_part, RT.CORE_PROPERTIES)
return core_properties_part
class Unmarshaller(object):
"""Hosts static methods for unmarshalling a package from a |PackageReader|."""
@staticmethod
def unmarshal(pkg_reader, package, part_factory):
"""
Construct graph of parts and realized relationships based on the
contents of *pkg_reader*, delegating construction of each part to
*part_factory*. Package relationships are added to *pkg*.
"""
parts = Unmarshaller._unmarshal_parts(
pkg_reader, package, part_factory
)
Unmarshaller._unmarshal_relationships(pkg_reader, package, parts)
for part in parts.values():
part.after_unmarshal()
package.after_unmarshal()
@staticmethod
def _unmarshal_parts(pkg_reader, package, part_factory):
"""
Return a dictionary of |Part| instances unmarshalled from
*pkg_reader*, keyed by partname. Side-effect is that each part in
*pkg_reader* is constructed using *part_factory*.
"""
parts = {}
for partname, content_type, reltype, blob in pkg_reader.iter_sparts():
parts[partname] = part_factory(
partname, content_type, reltype, blob, package
)
return parts
@staticmethod
def _unmarshal_relationships(pkg_reader, package, parts):
"""
Add a relationship to the source object corresponding to each of the
relationships in *pkg_reader* with its target_part set to the actual
target part in *parts*.
"""
for source_uri, srel in pkg_reader.iter_srels():
source = package if source_uri == '/' else parts[source_uri]
target = (srel.target_ref if srel.is_external
else parts[srel.target_partname])
source.load_rel(srel.reltype, target, srel.rId, srel.is_external)