From 0aa2404b61a2357e4a9ee19ae5923af9a81588b9 Mon Sep 17 00:00:00 2001 From: jamincai Date: Tue, 5 Dec 2023 16:43:08 +0800 Subject: [PATCH 1/3] update --- src/docx/__init__.py | 3 + src/docx/document.py | 8 ++ src/docx/opc/package.py | 14 +++ src/docx/oxml/__init__.py | 7 ++ src/docx/oxml/comments.py | 127 ++++++++++++++++++++++++ src/docx/oxml/document.py | 2 +- src/docx/oxml/text/paragraph.py | 33 ++++++ src/docx/oxml/text/run.py | 33 +++++- src/docx/parts/comments.py | 26 +++++ src/docx/parts/document.py | 19 ++++ src/docx/templates/default-comments.xml | 2 + src/docx/text/comment.py | 23 +++++ src/docx/text/paragraph.py | 17 ++++ src/docx/text/run.py | 22 ++++ 14 files changed, 334 insertions(+), 2 deletions(-) create mode 100644 src/docx/oxml/comments.py create mode 100644 src/docx/parts/comments.py create mode 100644 src/docx/templates/default-comments.xml create mode 100644 src/docx/text/comment.py diff --git a/src/docx/__init__.py b/src/docx/__init__.py index b214045d1..585dc584c 100644 --- a/src/docx/__init__.py +++ b/src/docx/__init__.py @@ -31,6 +31,7 @@ from docx.parts.numbering import NumberingPart from docx.parts.settings import SettingsPart from docx.parts.styles import StylesPart +from docx.parts.comments import CommentsPart def part_class_selector(content_type: str, reltype: str) -> Type[Part] | None: @@ -47,6 +48,7 @@ def part_class_selector(content_type: str, reltype: str) -> Type[Part] | None: PartFactory.part_type_for[CT.WML_NUMBERING] = NumberingPart PartFactory.part_type_for[CT.WML_SETTINGS] = SettingsPart PartFactory.part_type_for[CT.WML_STYLES] = StylesPart +PartFactory.part_type_for[CT.WML_COMMENTS] = CommentsPart del ( CT, @@ -58,5 +60,6 @@ def part_class_selector(content_type: str, reltype: str) -> Type[Part] | None: PartFactory, SettingsPart, StylesPart, + CommentsPart, part_class_selector, ) diff --git a/src/docx/document.py b/src/docx/document.py index 4deb8aa8e..da46a0772 100644 --- a/src/docx/document.py +++ b/src/docx/document.py @@ -113,6 +113,14 @@ def add_table(self, rows: int, cols: int, style: str | _TableStyle | None = None def core_properties(self): """A |CoreProperties| object providing Dublin Core properties of document.""" return self._part.core_properties + + @property + def comments_part(self): + """ + A |Comments| object providing read/write access to the core + properties of this document. + """ + return self.part.comments_part @property def inline_shapes(self): diff --git a/src/docx/opc/package.py b/src/docx/opc/package.py index b5bdc0e7c..380661ae3 100644 --- a/src/docx/opc/package.py +++ b/src/docx/opc/package.py @@ -6,6 +6,7 @@ from docx.opc.parts.coreprops import CorePropertiesPart from docx.opc.pkgreader import PackageReader from docx.opc.pkgwriter import PackageWriter +from docx.parts.comments import CommentsPart from docx.opc.rel import Relationships from docx.opc.shared import lazyproperty @@ -162,6 +163,19 @@ def _core_properties_part(self): core_properties_part = CorePropertiesPart.default(self) self.relate_to(core_properties_part, RT.CORE_PROPERTIES) return core_properties_part + + @property + def _comments_part(self): + """ + |CommentsPart| object related to this package. Creates + a default Comments part if one is not present. + """ + try: + return self.part_related_by(RT.COMMENTS) + except KeyError: + comments_part = CommentsPart.default(self) + self.relate_to(comments_part, RT.COMMENTS) + return comments_part class Unmarshaller: diff --git a/src/docx/oxml/__init__.py b/src/docx/oxml/__init__.py index 621ef279a..c703985ed 100644 --- a/src/docx/oxml/__init__.py +++ b/src/docx/oxml/__init__.py @@ -75,6 +75,13 @@ register_element_cls("w:r", CT_R) register_element_cls("w:t", CT_Text) +from .comments import CT_Comments,CT_Com, CT_CRE, CT_CRS, CT_CRef +register_element_cls('w:comments', CT_Comments) +register_element_cls('w:comment', CT_Com) +register_element_cls('w:commentRangeStart', CT_CRS) +register_element_cls('w:commentRangeEnd', CT_CRE) +register_element_cls('w:commentReference', CT_CRef) + # --------------------------------------------------------------------------- # header/footer-related mappings diff --git a/src/docx/oxml/comments.py b/src/docx/oxml/comments.py new file mode 100644 index 000000000..1baefb7d6 --- /dev/null +++ b/src/docx/oxml/comments.py @@ -0,0 +1,127 @@ +""" +Custom element classes related to the comments part +""" + +from docx.oxml import OxmlElement +from docx.oxml.simpletypes import ST_DecimalNumber, ST_String +from docx.opc.constants import NAMESPACE +from docx.text.paragraph import Paragraph +from docx.text.run import Run +from docx.oxml.xmlchemy import ( + BaseOxmlElement, OneAndOnlyOne, RequiredAttribute, ZeroOrMore, ZeroOrOne +) + +class CT_Com(BaseOxmlElement): + """ + A ```` element, a container for Comment properties + """ + initials = RequiredAttribute('w:initials', ST_String) + _id = RequiredAttribute('w:id', ST_DecimalNumber) + date = RequiredAttribute('w:date', ST_String) + author = RequiredAttribute('w:author', ST_String) + + p = ZeroOrOne('w:p', successors=('w:comment',)) + + @classmethod + def new(cls, initials, comm_id, date, author): + """ + Return a new ```` element having _id of *comm_id* and having + the passed params as meta data + """ + comment = OxmlElement('w:comment') + comment.initials = initials + comment.date = date + comment._id = comm_id + comment.author = author + return comment + + def _add_p(self, text): + _p = OxmlElement('w:p') + _r = _p.add_r() + run = Run(_r,self) + run.text = text + self._insert_p(_p) + return _p + + @property + def meta(self): + return [self.author, self.initials, self.date] + + @property + def paragraph(self): + return Paragraph(self.p, self) + + +class CT_Comments(BaseOxmlElement): + """ + A ```` element, a container for Comments properties + """ + comment = ZeroOrMore ('w:comment', successors=('w:comments',)) + + def add_comment(self,author, initials, date): + _next_id = self._next_commentId + comment = CT_Com.new(initials, _next_id, date, author) + comment = self._insert_comment(comment) + + return comment + + @property + def _next_commentId(self): + ids = self.xpath('./w:comment/@w:id') + len(ids) + _ids = [int(_str) for _str in ids] + _ids.sort() + + try: + return _ids[-1] + 2 + except: + return 0 + + def get_comment_by_id(self, _id): + namesapce = NAMESPACE().WML_MAIN + for c in self.findall('.//w:comment',{'w':namesapce}): + if c._id == _id: + return c + return None + + +class CT_CRS(BaseOxmlElement): + """ + A ```` element + """ + _id = RequiredAttribute('w:id', ST_DecimalNumber) + + @classmethod + def new(cls, _id): + commentRangeStart = OxmlElement('w:commentRangeStart') + commentRangeStart._id =_id + + return commentRangeStart + +class CT_CRE(BaseOxmlElement): + """ + A ``w:commentRangeEnd`` element + """ + _id = RequiredAttribute('w:id', ST_DecimalNumber) + + + @classmethod + def new(cls, _id): + commentRangeEnd = OxmlElement('w:commentRangeEnd') + commentRangeEnd._id =_id + return commentRangeEnd + + +class CT_CRef(BaseOxmlElement): + """ + w:commentReference + """ + _id = RequiredAttribute('w:id', ST_DecimalNumber) + + @classmethod + def new (cls, _id): + commentReference = OxmlElement('w:commentReference') + commentReference._id =_id + return commentReference + + diff --git a/src/docx/oxml/document.py b/src/docx/oxml/document.py index cc27f5aa9..9ab7a11a8 100644 --- a/src/docx/oxml/document.py +++ b/src/docx/oxml/document.py @@ -28,7 +28,7 @@ def sectPr_lst(self) -> List[CT_SectPr]: `w:sectPr` elements appear in document order. The last one is always `w:body/w:sectPr`, all preceding are `w:p/w:pPr/w:sectPr`. """ - xpath = "./w:body/w:p/w:pPr/w:sectPr | ./w:body/w:sectPr" + xpath = "./w:body/w:p/w:pPr/w:sectPr | ./w:body/w:sectPr | ./w:body/w:sdt/w:sdtContent/w:p/w:pPr/w:sectPr" return self.xpath(xpath) diff --git a/src/docx/oxml/text/paragraph.py b/src/docx/oxml/text/paragraph.py index f771dd74f..80a1e270b 100644 --- a/src/docx/oxml/text/paragraph.py +++ b/src/docx/oxml/text/paragraph.py @@ -35,6 +35,31 @@ def add_p_before(self) -> CT_P: new_p = cast(CT_P, OxmlElement("w:p")) self.addprevious(new_p) return new_p + + def link_comment(self, _id, rangeStart=0, rangeEnd=0): + rStart = OxmlElement('w:commentRangeStart') + rStart._id = _id + rEnd = OxmlElement('w:commentRangeEnd') + rEnd._id = _id + if rangeStart == 0 and rangeEnd == 0: + self.insert(0,rStart) + self.append(rEnd) + else: + self.insert(rangeStart,rStart) + if rangeEnd == len(self.getchildren() ) - 1 : + self.append(rEnd) + else: + self.insert(rangeEnd+1, rEnd) + + def add_comm(self, author, comment_part, initials, dtime, comment_text, rangeStart, rangeEnd): + + comment = comment_part.add_comment(author, initials, dtime) + comment._add_p(comment_text) + _r = self.add_r() + _r.add_comment_reference(comment._id) + self.link_comment(comment._id, rangeStart= rangeStart, rangeEnd=rangeEnd) + + return comment @property def alignment(self) -> WD_PARAGRAPH_ALIGNMENT | None: @@ -86,6 +111,14 @@ def style(self) -> str | None: if pPr is None: return None return pPr.style + + @property + def comment_id(self): + _id = self.xpath('./w:commentRangeStart/@w:id') + if len(_id) > 1 or len(_id) == 0: + return None + else: + return int(_id[0]) @style.setter def style(self, style: str | None): diff --git a/src/docx/oxml/text/run.py b/src/docx/oxml/text/run.py index f17d33845..419f1fb51 100644 --- a/src/docx/oxml/text/run.py +++ b/src/docx/oxml/text/run.py @@ -8,8 +8,9 @@ from docx.oxml.ns import qn from docx.oxml.simpletypes import ST_BrClear, ST_BrType from docx.oxml.text.font import CT_RPr -from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, ZeroOrMore, ZeroOrOne +from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, ZeroOrMore, ZeroOrOne, RequiredAttribute from docx.shared import TextAccumulator +from docx.oxml import OxmlElement if TYPE_CHECKING: from docx.oxml.shape import CT_Anchor, CT_Inline @@ -51,6 +52,30 @@ def add_drawing(self, inline_or_anchor: CT_Inline | CT_Anchor) -> CT_Drawing: drawing = self._add_drawing() drawing.append(inline_or_anchor) return drawing + + + def add_comm(self, author, comment_part, initials, dtime, comment_text): + comment = comment_part.add_comment(author, initials, dtime) + comment._add_p(comment_text) + # _r = self.add_r() + self.add_comment_reference(comment._id) + self.link_comment(comment._id) + + return comment + + def link_comment(self, _id): + rStart = OxmlElement('w:commentRangeStart') + rStart._id = _id + rEnd = OxmlElement('w:commentRangeEnd') + rEnd._id = _id + self.addprevious(rStart) + self.addnext(rEnd) + + def add_comment_reference(self, _id): + reference = OxmlElement('w:commentReference') + reference._id = _id + self.append(reference) + return reference def clear_content(self) -> None: """Remove all child elements except a `w:rPr` element if present.""" @@ -58,6 +83,12 @@ def clear_content(self) -> None: for e in self.xpath("./*[not(self::w:rPr)]"): self.remove(e) + def add_comment_reference(self, _id): + reference = OxmlElement('w:commentReference') + reference._id = _id + self.append(reference) + return reference + @property def inner_content_items(self) -> List[str | CT_Drawing | CT_LastRenderedPageBreak]: """Text of run, possibly punctuated by `w:lastRenderedPageBreak` elements.""" diff --git a/src/docx/parts/comments.py b/src/docx/parts/comments.py new file mode 100644 index 000000000..b3deb65ab --- /dev/null +++ b/src/docx/parts/comments.py @@ -0,0 +1,26 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +import os + +from docx.opc.constants import CONTENT_TYPE as CT +from docx.opc.packuri import PackURI + +from docx.oxml import parse_xml +from docx.opc.part import XmlPart + +class CommentsPart(XmlPart): + """Definition of Comments Part""" + + @classmethod + def default(cls, package): + partname = PackURI("/word/comments.xml") + content_type = CT.WML_COMMENTS + element = parse_xml(cls._default_comments_xml()) + return cls(partname, content_type, element, package) + + @classmethod + def _default_comments_xml(cls): + path = os.path.join(os.path.split(__file__)[0], '..', 'templates', 'default-comments.xml') + with open(path, 'rb') as f: + xml_bytes = f.read() + return xml_bytes diff --git a/src/docx/parts/document.py b/src/docx/parts/document.py index a157764b9..35782146e 100644 --- a/src/docx/parts/document.py +++ b/src/docx/parts/document.py @@ -10,6 +10,7 @@ from docx.parts.hdrftr import FooterPart, HeaderPart from docx.parts.numbering import NumberingPart from docx.parts.settings import SettingsPart +from docx.parts.comments import CommentsPart from docx.parts.story import StoryPart from docx.parts.styles import StylesPart from docx.shape import InlineShapes @@ -145,3 +146,21 @@ def _styles_part(self) -> StylesPart: styles_part = StylesPart.default(package) self.relate_to(styles_part, RT.STYLES) return styles_part + + + @lazyproperty + def comments_part(self): + """ + A |Comments| object providing read/write access to the core + properties of this document. + """ + # return self.package._comments_part + + @property + def _comments_part(self): + try: + return self.part_related_by(RT.COMMENTS) + except KeyError: + comments_part = CommentsPart.default(self) + self.relate_to(comments_part, RT.COMMENTS) + return comments_part diff --git a/src/docx/templates/default-comments.xml b/src/docx/templates/default-comments.xml new file mode 100644 index 000000000..4ceb12ea4 --- /dev/null +++ b/src/docx/templates/default-comments.xml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/src/docx/text/comment.py b/src/docx/text/comment.py new file mode 100644 index 000000000..cfc3b3dd4 --- /dev/null +++ b/src/docx/text/comment.py @@ -0,0 +1,23 @@ +from docx.shared import Parented + +class Comment(Parented): + """[summary] + + :param Parented: [description] + :type Parented: [type] + """ + def __init__(self, com, parent): + super(Comment, self).__init__(parent) + self._com = self._element = self.element = com + + @property + def paragraph(self): + return self.element.paragraph + + @property + def text(self): + return self.element.paragraph.text + + @text.setter + def text(self, text): + self.element.paragraph.text = text \ No newline at end of file diff --git a/src/docx/text/paragraph.py b/src/docx/text/paragraph.py index 0a5d67674..1488fe88a 100644 --- a/src/docx/text/paragraph.py +++ b/src/docx/text/paragraph.py @@ -16,6 +16,9 @@ from docx.text.parfmt import ParagraphFormat from docx.text.run import Run +from datetime import datetime +import re + if TYPE_CHECKING: from docx.enum.text import WD_PARAGRAPH_ALIGNMENT from docx.oxml.text.paragraph import CT_P @@ -46,6 +49,15 @@ def add_run( if style: run.style = style return run + + def add_comment(self, text, author='python-docx', initials='pd', dtime=None ,rangeStart=0, rangeEnd=0, comment_part=None): + if comment_part is None: + comment_part = self.part._comments_part.element + if dtime is None: + dtime = str( datetime.now() ).replace(' ', 'T') + comment = self._p.add_comm(author, comment_part, initials, dtime, text, rangeStart, rangeEnd) + + return comment @property def alignment(self) -> WD_PARAGRAPH_ALIGNMENT | None: @@ -152,6 +164,11 @@ def style(self, style_or_name: str | ParagraphStyle | None): style_id = self.part.get_style_id(style_or_name, WD_STYLE_TYPE.PARAGRAPH) self._p.style = style_id + @property + def comments(self): + runs_comments = [run.comments for run in self.runs] + return [comment for comments in runs_comments for comment in comments] + @property def text(self) -> str: """The textual content of this paragraph. diff --git a/src/docx/text/run.py b/src/docx/text/run.py index 44c41c0fe..38ff2254b 100644 --- a/src/docx/text/run.py +++ b/src/docx/text/run.py @@ -15,6 +15,11 @@ from docx.styles.style import CharacterStyle from docx.text.font import Font from docx.text.pagebreak import RenderedPageBreak +from datetime import datetime + +from docx.oxml.ns import qn +from docx.opc.part import * +from .comment import Comment if TYPE_CHECKING: from docx.enum.text import WD_UNDERLINE @@ -94,6 +99,15 @@ def add_text(self, text: str): """ t = self._r.add_t(text) return _Text(t) + + + def add_comment(self, text, author='python-docx', initials='pd', dtime=None): + comment_part = self.part._comments_part.element + if dtime is None: + dtime = str(datetime.now()).replace(' ', 'T') + comment = self._r.add_comm(author, comment_part, initials, dtime, text) + + return comment @property def bold(self) -> bool | None: @@ -240,6 +254,14 @@ def underline(self) -> bool | WD_UNDERLINE | None: def underline(self, value: bool): self.font.underline = value + @property + def comments(self): + comment_part = self._parent._parent.part._comments_part.element + comment_refs = self._element.findall(qn('w:commentReference')) + ids = [int(ref.get(qn('w:id'))) for ref in comment_refs] + coms = [com for com in comment_part if com._id in ids] + return [Comment(com, comment_part) for com in coms] + class _Text: """Proxy object wrapping `` element.""" From e6512ade603248bd67d89868addf4d044ad4da1d Mon Sep 17 00:00:00 2001 From: jamincai Date: Wed, 6 Dec 2023 09:54:04 +0800 Subject: [PATCH 2/3] update --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d35c790c7..a731ac450 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,10 +33,10 @@ readme = "README.md" requires-python = ">=3.7" [project.urls] -Changelog = "https://github.com/python-openxml/python-docx/blob/master/HISTORY.rst" +Changelog = "https://github.com/caijamin/python-docx/blob/master/HISTORY.rst" Documentation = "https://python-docx.readthedocs.org/en/latest/" -Homepage = "https://github.com/python-openxml/python-docx" -Repository = "https://github.com/python-openxml/python-docx" +Homepage = "https://github.com/caijamin/python-docx" +Repository = "https://github.com/caijamin/python-docx" [tool.black] target-version = ["py37", "py38", "py39", "py310", "py311"] From 39a8e2791b52548f2333a39e69f2f6535404f80f Mon Sep 17 00:00:00 2001 From: jamincai Date: Tue, 12 Mar 2024 17:49:28 +0800 Subject: [PATCH 3/3] update --- src/docx/oxml/ns.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/docx/oxml/ns.py b/src/docx/oxml/ns.py index 3238864e9..bfa7c5970 100644 --- a/src/docx/oxml/ns.py +++ b/src/docx/oxml/ns.py @@ -21,6 +21,8 @@ "wp": "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing", "xml": "http://www.w3.org/XML/1998/namespace", "xsi": "http://www.w3.org/2001/XMLSchema-instance", + "mc":"http://schemas.openxmlformats.org/markup-compatibility/2006", + "v":"urn:schemas-microsoft-com:vml", } pfxmap = {value: key for key, value in nsmap.items()}