diff --git a/.gitignore b/.gitignore index 352fe3b..8587350 100644 --- a/.gitignore +++ b/.gitignore @@ -1,238 +1,10 @@ .coverage -*.pyc +dist *.docx -*.kpf -build -problems +/*.egg-info/ +MANIFEST +*.pyc +README.html +_scratch template/word/media -bin/activate -bin/activate_this.py -bin/easy_install -bin/easy_install-2.6 -bin/get_env_details -bin/pilconvert.py -bin/pildriver.py -bin/pilfile.py -bin/pilfont.py -bin/pilprint.py -bin/pip -bin/postactivate -bin/postdeactivate -bin/preactivate -bin/predeactivate -bin/python -bin/python2.6 -include/python2.6 -lib/python2.6/UserDict.py -lib/python2.6/UserDict.pyo -lib/python2.6/_abcoll.py -lib/python2.6/_abcoll.pyo -lib/python2.6/abc.py -lib/python2.6/abc.pyo -lib/python2.6/codecs.py -lib/python2.6/codecs.pyo -lib/python2.6/config -lib/python2.6/copy_reg.py -lib/python2.6/copy_reg.pyo -lib/python2.6/distutils/__init__.py -lib/python2.6/distutils/distutils.cfg -lib/python2.6/encodings -lib/python2.6/fnmatch.py -lib/python2.6/fnmatch.pyo -lib/python2.6/genericpath.py -lib/python2.6/genericpath.pyo -lib/python2.6/lib-dynload -lib/python2.6/linecache.py -lib/python2.6/linecache.pyo -lib/python2.6/locale.py -lib/python2.6/locale.pyo -lib/python2.6/ntpath.py -lib/python2.6/ntpath.pyo -lib/python2.6/orig-prefix.txt -lib/python2.6/os.py -lib/python2.6/os.pyo -lib/python2.6/posixpath.py -lib/python2.6/posixpath.pyo -lib/python2.6/re.py -lib/python2.6/re.pyo -lib/python2.6/site-packages/PIL.pth -lib/python2.6/site-packages/PIL/ArgImagePlugin.py -lib/python2.6/site-packages/PIL/BdfFontFile.py -lib/python2.6/site-packages/PIL/BmpImagePlugin.py -lib/python2.6/site-packages/PIL/BufrStubImagePlugin.py -lib/python2.6/site-packages/PIL/ContainerIO.py -lib/python2.6/site-packages/PIL/CurImagePlugin.py -lib/python2.6/site-packages/PIL/DcxImagePlugin.py -lib/python2.6/site-packages/PIL/EpsImagePlugin.py -lib/python2.6/site-packages/PIL/ExifTags.py -lib/python2.6/site-packages/PIL/FitsStubImagePlugin.py -lib/python2.6/site-packages/PIL/FliImagePlugin.py -lib/python2.6/site-packages/PIL/FontFile.py -lib/python2.6/site-packages/PIL/FpxImagePlugin.py -lib/python2.6/site-packages/PIL/GbrImagePlugin.py -lib/python2.6/site-packages/PIL/GdImageFile.py -lib/python2.6/site-packages/PIL/GifImagePlugin.py -lib/python2.6/site-packages/PIL/GimpGradientFile.py -lib/python2.6/site-packages/PIL/GimpPaletteFile.py -lib/python2.6/site-packages/PIL/GribStubImagePlugin.py -lib/python2.6/site-packages/PIL/Hdf5StubImagePlugin.py -lib/python2.6/site-packages/PIL/IcnsImagePlugin.py -lib/python2.6/site-packages/PIL/IcoImagePlugin.py -lib/python2.6/site-packages/PIL/ImImagePlugin.py -lib/python2.6/site-packages/PIL/Image.py -lib/python2.6/site-packages/PIL/ImageChops.py -lib/python2.6/site-packages/PIL/ImageCms.py -lib/python2.6/site-packages/PIL/ImageColor.py -lib/python2.6/site-packages/PIL/ImageDraw.py -lib/python2.6/site-packages/PIL/ImageDraw2.py -lib/python2.6/site-packages/PIL/ImageEnhance.py -lib/python2.6/site-packages/PIL/ImageFile.py -lib/python2.6/site-packages/PIL/ImageFileIO.py -lib/python2.6/site-packages/PIL/ImageFilter.py -lib/python2.6/site-packages/PIL/ImageFont.py -lib/python2.6/site-packages/PIL/ImageGL.py -lib/python2.6/site-packages/PIL/ImageGrab.py -lib/python2.6/site-packages/PIL/ImageMath.py -lib/python2.6/site-packages/PIL/ImageMode.py -lib/python2.6/site-packages/PIL/ImageOps.py -lib/python2.6/site-packages/PIL/ImagePalette.py -lib/python2.6/site-packages/PIL/ImagePath.py -lib/python2.6/site-packages/PIL/ImageQt.py -lib/python2.6/site-packages/PIL/ImageSequence.py -lib/python2.6/site-packages/PIL/ImageShow.py -lib/python2.6/site-packages/PIL/ImageStat.py -lib/python2.6/site-packages/PIL/ImageTk.py -lib/python2.6/site-packages/PIL/ImageTransform.py -lib/python2.6/site-packages/PIL/ImageWin.py -lib/python2.6/site-packages/PIL/ImtImagePlugin.py -lib/python2.6/site-packages/PIL/IptcImagePlugin.py -lib/python2.6/site-packages/PIL/JpegImagePlugin.py -lib/python2.6/site-packages/PIL/McIdasImagePlugin.py -lib/python2.6/site-packages/PIL/MicImagePlugin.py -lib/python2.6/site-packages/PIL/MpegImagePlugin.py -lib/python2.6/site-packages/PIL/MspImagePlugin.py -lib/python2.6/site-packages/PIL/OleFileIO.py -lib/python2.6/site-packages/PIL/PIL-1.1.7-py2.6.egg-info/PKG-INFO -lib/python2.6/site-packages/PIL/PIL-1.1.7-py2.6.egg-info/SOURCES.txt -lib/python2.6/site-packages/PIL/PIL-1.1.7-py2.6.egg-info/dependency_links.txt -lib/python2.6/site-packages/PIL/PIL-1.1.7-py2.6.egg-info/installed-files.txt -lib/python2.6/site-packages/PIL/PIL-1.1.7-py2.6.egg-info/top_level.txt -lib/python2.6/site-packages/PIL/PSDraw.py -lib/python2.6/site-packages/PIL/PaletteFile.py -lib/python2.6/site-packages/PIL/PalmImagePlugin.py -lib/python2.6/site-packages/PIL/PcdImagePlugin.py -lib/python2.6/site-packages/PIL/PcfFontFile.py -lib/python2.6/site-packages/PIL/PcxImagePlugin.py -lib/python2.6/site-packages/PIL/PdfImagePlugin.py -lib/python2.6/site-packages/PIL/PixarImagePlugin.py -lib/python2.6/site-packages/PIL/PngImagePlugin.py -lib/python2.6/site-packages/PIL/PpmImagePlugin.py -lib/python2.6/site-packages/PIL/PsdImagePlugin.py -lib/python2.6/site-packages/PIL/SgiImagePlugin.py -lib/python2.6/site-packages/PIL/SpiderImagePlugin.py -lib/python2.6/site-packages/PIL/SunImagePlugin.py -lib/python2.6/site-packages/PIL/TarIO.py -lib/python2.6/site-packages/PIL/TgaImagePlugin.py -lib/python2.6/site-packages/PIL/TiffImagePlugin.py -lib/python2.6/site-packages/PIL/TiffTags.py -lib/python2.6/site-packages/PIL/WalImageFile.py -lib/python2.6/site-packages/PIL/WmfImagePlugin.py -lib/python2.6/site-packages/PIL/XVThumbImagePlugin.py -lib/python2.6/site-packages/PIL/XbmImagePlugin.py -lib/python2.6/site-packages/PIL/XpmImagePlugin.py -lib/python2.6/site-packages/PIL/__init__.py -lib/python2.6/site-packages/PIL/_imaging.so -lib/python2.6/site-packages/PIL/_imagingmath.so -lib/python2.6/site-packages/PIL/_imagingtk.so -lib/python2.6/site-packages/easy-install.pth -lib/python2.6/site-packages/lxml-2.3beta1-py2.6.egg-info/PKG-INFO -lib/python2.6/site-packages/lxml-2.3beta1-py2.6.egg-info/SOURCES.txt -lib/python2.6/site-packages/lxml-2.3beta1-py2.6.egg-info/dependency_links.txt -lib/python2.6/site-packages/lxml-2.3beta1-py2.6.egg-info/installed-files.txt -lib/python2.6/site-packages/lxml-2.3beta1-py2.6.egg-info/not-zip-safe -lib/python2.6/site-packages/lxml-2.3beta1-py2.6.egg-info/top_level.txt -lib/python2.6/site-packages/lxml/ElementInclude.py -lib/python2.6/site-packages/lxml/__init__.py -lib/python2.6/site-packages/lxml/_elementpath.py -lib/python2.6/site-packages/lxml/builder.py -lib/python2.6/site-packages/lxml/cssselect.py -lib/python2.6/site-packages/lxml/doctestcompare.py -lib/python2.6/site-packages/lxml/etree.so -lib/python2.6/site-packages/lxml/html/ElementSoup.py -lib/python2.6/site-packages/lxml/html/__init__.py -lib/python2.6/site-packages/lxml/html/_dictmixin.py -lib/python2.6/site-packages/lxml/html/_diffcommand.py -lib/python2.6/site-packages/lxml/html/_html5builder.py -lib/python2.6/site-packages/lxml/html/_setmixin.py -lib/python2.6/site-packages/lxml/html/builder.py -lib/python2.6/site-packages/lxml/html/clean.py -lib/python2.6/site-packages/lxml/html/defs.py -lib/python2.6/site-packages/lxml/html/diff.py -lib/python2.6/site-packages/lxml/html/formfill.py -lib/python2.6/site-packages/lxml/html/html5parser.py -lib/python2.6/site-packages/lxml/html/soupparser.py -lib/python2.6/site-packages/lxml/html/usedoctest.py -lib/python2.6/site-packages/lxml/isoschematron/__init__.py -lib/python2.6/site-packages/lxml/isoschematron/resources/rng/iso-schematron.rng -lib/python2.6/site-packages/lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl -lib/python2.6/site-packages/lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl -lib/python2.6/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl -lib/python2.6/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl -lib/python2.6/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl -lib/python2.6/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl -lib/python2.6/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl -lib/python2.6/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt -lib/python2.6/site-packages/lxml/objectify.so -lib/python2.6/site-packages/lxml/pyclasslookup.py -lib/python2.6/site-packages/lxml/sax.py -lib/python2.6/site-packages/lxml/usedoctest.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/EGG-INFO/PKG-INFO -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/EGG-INFO/SOURCES.txt -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/EGG-INFO/dependency_links.txt -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/EGG-INFO/entry_points.txt -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/EGG-INFO/not-zip-safe -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/EGG-INFO/top_level.txt -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/__init__.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/backwardcompat.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/basecommand.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/baseparser.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/commands/__init__.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/commands/bundle.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/commands/completion.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/commands/freeze.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/commands/help.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/commands/install.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/commands/search.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/commands/uninstall.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/commands/unzip.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/commands/zip.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/exceptions.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/index.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/locations.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/log.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/req.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/runner.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/util.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/vcs/__init__.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/vcs/bazaar.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/vcs/git.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/vcs/mercurial.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/vcs/subversion.py -lib/python2.6/site-packages/pip-0.7.2-py2.6.egg/pip/venv.py -lib/python2.6/site-packages/setuptools-0.6c11-py2.6.egg -lib/python2.6/site-packages/setuptools.pth -lib/python2.6/site.py -lib/python2.6/sre.py -lib/python2.6/sre.pyo -lib/python2.6/sre_compile.py -lib/python2.6/sre_compile.pyo -lib/python2.6/sre_constants.py -lib/python2.6/sre_constants.pyo -lib/python2.6/sre_parse.py -lib/python2.6/sre_parse.pyo -lib/python2.6/stat.py -lib/python2.6/stat.pyo -lib/python2.6/types.py -lib/python2.6/types.pyo -lib/python2.6/warnings.py -lib/python2.6/warnings.pyo \ No newline at end of file +.tox diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..03b6080 --- /dev/null +++ b/Makefile @@ -0,0 +1,33 @@ +PYTHON = $(shell test -x bin/python && echo bin/python || echo `which python`) +SETUP = $(PYTHON) ./setup.py + +.PHONY: clean help coverage register sdist upload + +help: + @echo "Please use \`make ' where is one or more of" + @echo " clean delete intermediate work product and start fresh" + @echo " coverage run nosetests with coverage" + @echo " readme update README.html from README.rst" + @echo " register update metadata (README.rst) on PyPI" + @echo " sdist generate a source distribution into dist/" + @echo " upload upload distribution tarball to PyPI" + +clean: + find . -type f -name \*.pyc -exec rm {} \; + rm -rf dist .coverage .DS_Store MANIFEST + +coverage: + nosetests --with-coverage --cover-package=docx --cover-erase + +readme: + rst2html README.rst >README.html + open README.html + +register: + $(SETUP) register + +sdist: + $(SETUP) sdist + +upload: + $(SETUP) sdist upload diff --git a/README.markdown b/README.markdown deleted file mode 100644 index cbccf12..0000000 --- a/README.markdown +++ /dev/null @@ -1,81 +0,0 @@ -Python docx -=========== - -## Introduction - -The docx module creates, reads and writes Microsoft Office Word 2007 docx files. - -These are referred to as 'WordML', 'Office Open XML' and 'Open XML' by Microsoft. - -These documents can be opened in Microsoft Office 2007 / 2010, Microsoft Mac Office 2008, Google Docs, OpenOffice.org 3, and Apple iWork 08. - -They also [validate as well formed XML](http://validator.w3.org/check). - -The module was created when I was looking for a Python support for MS Word .doc files, but could only find various hacks involving COM automation, calling .net or Java, or automating OpenOffice or MS Office. - -The docx module has the following features: - -### Making documents - -Features for making documents include: - -- Paragraphs -- Bullets -- Numbered lists -- Document properties (author, company, etc) -- Multiple levels of headings -- Tables -- Section and page breaks -- Images - -
- -### Editing documents - -Thanks to the awesomeness of the lxml module, we can: - -- Search and replace -- Extract plain text of document -- Add and delete items anywhere within the document -- Change document properties -- Run xpath queries against particular locations in the document - useful for retrieving data from user-completed templates. - -# Getting started - -## Making and Modifying Documents - -- Just [download python docx](http://github.com/mikemaccana/python-docx/tarball/master). -- Use **pip** or **easy_install** to fetch the **lxml** and **PIL** modules. -- Then run: - -
example-makedocument.py
- -Congratulations, you just made and then modified a Word document! - -## Extracting Text from a Document - -If you just want to extract the text from a Word file, run: - - example-extracttext.py 'Some word file.docx' 'new file.txt' - -### Ideas & To Do List - -- Further improvements to image handling -- Document health checks -- Egg -- Markdown conversion support - -### We love forks, changes and pull requests! - -- Check out the [HACKING](HACKING.markdown) to add your own changes! -- For this project on github -- Send a pull request via github and we'll add your changes! - -### Want to talk? Need help? - -Email . - -### License - -Licensed under the [MIT license](http://www.opensource.org/licenses/mit-license.php) -Short version: this code is copyrighted to me (Mike MacCana), I give you permission to do what you want with it except remove my name from the credits. See the LICENSE file for specific terms. diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..291465a --- /dev/null +++ b/README.rst @@ -0,0 +1,112 @@ +########### +This Project Has Moved! +########### + +**Python DocX is now part of Python OpenXML**. There's all kinds of new stuff, including Python 3 support, sister libraries for doing Excel files, and more. Check out the `current Python DocX GitHub `_ and the `current Python DocX docs `_. + +Info below is kept for archival purposes. **Go use the new stuff!** + +Introduction +============ + +The docx module creates, reads and writes Microsoft Office Word 2007 docx +files. + +These are referred to as 'WordML', 'Office Open XML' and 'Open XML' by +Microsoft. + +These documents can be opened in Microsoft Office 2007 / 2010, Microsoft Mac +Office 2008, Google Docs, OpenOffice.org 3, and Apple iWork 08. + +They also `validate as well formed XML `_. + +The module was created when I was looking for a Python support for MS Word +.docx files, but could only find various hacks involving COM automation, +calling .Net or Java, or automating OpenOffice or MS Office. + +The docx module has the following features: + +Making documents +---------------- + +Features for making documents include: + +- Paragraphs +- Bullets +- Numbered lists +- Document properties (author, company, etc) +- Multiple levels of headings +- Tables +- Section and page breaks +- Images + +.. image:: http://github.com/mikemaccana/python-docx/raw/master/screenshot.png + + +Editing documents +----------------- + +Thanks to the awesomeness of the lxml module, we can: + +- Search and replace +- Extract plain text of document +- Add and delete items anywhere within the document +- Change document properties +- Run xpath queries against particular locations in the document - useful for + retrieving data from user-completed templates. + + +Getting started +=============== + +Making and Modifying Documents +------------------------------ + +- Just `download python docx `_. +- Use **pip** or **easy_install** to fetch the **lxml** and **PIL** modules. +- Then run:: + + example-makedocument.py + + +Congratulations, you just made and then modified a Word document! + + +Extracting Text from a Document +------------------------------- + +If you just want to extract the text from a Word file, run:: + + example-extracttext.py 'Some word file.docx' 'new file.txt' + + +Ideas & To Do List +~~~~~~~~~~~~~~~~~~ + +- Further improvements to image handling +- Document health checks +- Egg +- Markdown conversion support + + +We love forks, changes and pull requests! +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- Check out the [HACKING](HACKING.markdown) to add your own changes! +- For this project on github +- Send a pull request via github and we'll add your changes! + +Want to talk? Need help? +~~~~~~~~~~~~~~~~~~~~~~~~ + +Email python-docx@googlegroups.com + + +License +~~~~~~~ + +Licensed under the `MIT license `_ + +Short version: this code is copyrighted to me (Mike MacCana), I give you +permission to do what you want with it except remove my name from the credits. +See the LICENSE file for specific terms. diff --git a/docx.py b/docx.py index 0f41b09..9d3f6e6 100755 --- a/docx.py +++ b/docx.py @@ -1,64 +1,82 @@ -#!/usr/bin/env python2.6 -# -*- coding: utf-8 -*- -''' -Open and modify Microsoft Word 2007 docx files (called 'OpenXML' and 'Office OpenXML' by Microsoft) +# encoding: utf-8 + +""" +Open and modify Microsoft Word 2007 docx files (called 'OpenXML' and +'Office OpenXML' by Microsoft) Part of Python's docx module - http://github.com/mikemaccana/python-docx See LICENSE for licensing information. -''' +""" + +import os +import re +import time +import shutil +import zipfile -import logging from lxml import etree +from os.path import abspath, basename, join + try: from PIL import Image except ImportError: import Image -import zipfile -import shutil -import re -import time -import os -from os.path import join + +try: + from PIL.ExifTags import TAGS +except ImportError: + TAGS = {} + +from exceptions import PendingDeprecationWarning +from warnings import warn + +import logging + log = logging.getLogger(__name__) # Record template directory's location which is just 'template' for a docx # developer or 'site-packages/docx-template' if you have installed docx -template_dir = join(os.path.dirname(__file__),'docx-template') # installed +template_dir = join(os.path.dirname(__file__), 'docx-template') # installed if not os.path.isdir(template_dir): - template_dir = join(os.path.dirname(__file__),'template') # dev + template_dir = join(os.path.dirname(__file__), 'template') # dev # All Word prefixes / namespace matches used in document.xml & core.xml. # LXML doesn't actually use prefixes (just the real namespace) , but these # make it easier to copy Word output more easily. nsprefixes = { + 'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main', + 'o': 'urn:schemas-microsoft-com:office:office', + 've': 'http://schemas.openxmlformats.org/markup-compatibility/2006', # Text Content - 'mv':'urn:schemas-microsoft-com:mac:vml', - 'mo':'http://schemas.microsoft.com/office/mac/office/2008/main', - 've':'http://schemas.openxmlformats.org/markup-compatibility/2006', - 'o':'urn:schemas-microsoft-com:office:office', - 'r':'http://schemas.openxmlformats.org/officeDocument/2006/relationships', - 'm':'http://schemas.openxmlformats.org/officeDocument/2006/math', - 'v':'urn:schemas-microsoft-com:vml', - 'w':'http://schemas.openxmlformats.org/wordprocessingml/2006/main', - 'w10':'urn:schemas-microsoft-com:office:word', - 'wne':'http://schemas.microsoft.com/office/word/2006/wordml', + 'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main', + 'w10': 'urn:schemas-microsoft-com:office:word', + 'wne': 'http://schemas.microsoft.com/office/word/2006/wordml', # Drawing - 'wp':'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing', - 'a':'http://schemas.openxmlformats.org/drawingml/2006/main', - 'pic':'http://schemas.openxmlformats.org/drawingml/2006/picture', + 'a': 'http://schemas.openxmlformats.org/drawingml/2006/main', + 'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math', + 'mv': 'urn:schemas-microsoft-com:mac:vml', + 'pic': 'http://schemas.openxmlformats.org/drawingml/2006/picture', + 'v': 'urn:schemas-microsoft-com:vml', + 'wp': ('http://schemas.openxmlformats.org/drawingml/2006/wordprocessing' + 'Drawing'), # Properties (core and extended) - 'cp':"http://schemas.openxmlformats.org/package/2006/metadata/core-properties", - 'dc':"http://purl.org/dc/elements/1.1/", - 'dcterms':"http://purl.org/dc/terms/", - 'dcmitype':"http://purl.org/dc/dcmitype/", - 'xsi':"http://www.w3.org/2001/XMLSchema-instance", - 'ep':'http://schemas.openxmlformats.org/officeDocument/2006/extended-properties', - # Content Types (we're just making up our own namespaces here to save time) - 'ct':'http://schemas.openxmlformats.org/package/2006/content-types', - # Package Relationships (we're just making up our own namespaces here to save time) - 'pr':'http://schemas.openxmlformats.org/package/2006/relationships' - } + 'cp': ('http://schemas.openxmlformats.org/package/2006/metadata/core-pr' + 'operties'), + 'dc': 'http://purl.org/dc/elements/1.1/', + 'ep': ('http://schemas.openxmlformats.org/officeDocument/2006/extended-' + 'properties'), + 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', + # Content Types + 'ct': 'http://schemas.openxmlformats.org/package/2006/content-types', + # Package Relationships + 'r': ('http://schemas.openxmlformats.org/officeDocument/2006/relationsh' + 'ips'), + 'pr': 'http://schemas.openxmlformats.org/package/2006/relationships', + # Dublin Core document properties + 'dcmitype': 'http://purl.org/dc/dcmitype/', + 'dcterms': 'http://purl.org/dc/terms/'} + def opendocx(file): '''Open a docx file, return a document XML tree''' @@ -67,12 +85,15 @@ def opendocx(file): document = etree.fromstring(xmlcontent) return document + def newdocument(): document = makeelement('document') document.append(makeelement('body')) return document -def makeelement(tagname,tagtext=None,nsprefix='w',attributes=None,attrnsprefix=None): + +def makeelement(tagname, tagtext=None, nsprefix='w', attributes=None, + attrnsprefix=None): '''Create an element & return it''' # Deal with list of nsprefix by making namespacemap namespacemap = None @@ -80,19 +101,21 @@ def makeelement(tagname,tagtext=None,nsprefix='w',attributes=None,attrnsprefix=N namespacemap = {} for prefix in nsprefix: namespacemap[prefix] = nsprefixes[prefix] - nsprefix = nsprefix[0] # FIXME: rest of code below expects a single prefix + # FIXME: rest of code below expects a single prefix + nsprefix = nsprefix[0] if nsprefix: - namespace = '{'+nsprefixes[nsprefix]+'}' + namespace = '{%s}' % nsprefixes[nsprefix] else: # For when namespace = None namespace = '' newelement = etree.Element(namespace+tagname, nsmap=namespacemap) # Add attributes with namespaces if attributes: - # If they haven't bothered setting attribute namespace, use an empty string - # (equivalent of no namespace) + # If they haven't bothered setting attribute namespace, use an empty + # string (equivalent of no namespace) if not attrnsprefix: - # Quick hack: it seems every element that has a 'w' nsprefix for its tag uses the same prefix for it's attributes + # Quick hack: it seems every element that has a 'w' nsprefix for + # its tag uses the same prefix for it's attributes if nsprefix == 'w': attributenamespace = namespace else: @@ -101,11 +124,13 @@ def makeelement(tagname,tagtext=None,nsprefix='w',attributes=None,attrnsprefix=N attributenamespace = '{'+nsprefixes[attrnsprefix]+'}' for tagattribute in attributes: - newelement.set(attributenamespace+tagattribute, attributes[tagattribute]) + newelement.set(attributenamespace+tagattribute, + attributes[tagattribute]) if tagtext: newelement.text = tagtext return newelement + def pagebreak(type='page', orient='portrait'): '''Insert a break, default 'page'. See http://openxmldeveloper.org/forums/thread/4075.aspx @@ -113,125 +138,153 @@ def pagebreak(type='page', orient='portrait'): # Need to enumerate different types of page breaks. validtypes = ['page', 'section'] if type not in validtypes: - raise ValueError('Page break style "%s" not implemented. Valid styles: %s.' % (type, validtypes)) + tmpl = 'Page break style "%s" not implemented. Valid styles: %s.' + raise ValueError(tmpl % (type, validtypes)) pagebreak = makeelement('p') if type == 'page': run = makeelement('r') - br = makeelement('br',attributes={'type':type}) + br = makeelement('br', attributes={'type': type}) run.append(br) pagebreak.append(run) elif type == 'section': pPr = makeelement('pPr') sectPr = makeelement('sectPr') if orient == 'portrait': - pgSz = makeelement('pgSz',attributes={'w':'12240','h':'15840'}) + pgSz = makeelement('pgSz', attributes={'w': '12240', 'h': '15840'}) elif orient == 'landscape': - pgSz = makeelement('pgSz',attributes={'h':'12240','w':'15840', 'orient':'landscape'}) + pgSz = makeelement('pgSz', attributes={'h': '12240', 'w': '15840', + 'orient': 'landscape'}) sectPr.append(pgSz) pPr.append(sectPr) pagebreak.append(pPr) return pagebreak -def paragraph(paratext,style='BodyText',breakbefore=False,jc='left'): - '''Make a new paragraph element, containing a run, and some text. - Return the paragraph element. + +def paragraph(paratext, style='BodyText', breakbefore=False, jc='left'): + """ + Return a new paragraph element containing *paratext*. The paragraph's + default style is 'Body Text', but a new style may be set using the + *style* parameter. @param string jc: Paragraph alignment, possible values: left, center, right, both (justified), ... see http://www.schemacentral.com/sc/ooxml/t-w_ST_Jc.html for a full list - If paratext is a list, spawn multiple run/text elements. - Support text styles (paratext must then be a list of lists in the form - /