From b59ed726511f0e67bf304400621e212fc8cedc7b Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Mon, 30 Nov 2020 13:47:57 +0000 Subject: [PATCH] Fix UTF-8 replacement chars in git send-email git-format-patch doesn't set the charset param of the Content-Type header field. The default value is "ascii", which forbids UTF-8. However git-format-patch message bodies may include UTF-8. This seems to be deliberate to make the output of git-format-patch more readable for humans. get_content() tries to decode the body according to the charset parameter of the Content-Type header field. Instead of avoiding to use get_content() as a workaround (it's easy to forget about it and step on a landmine), fixup the Content-Type header field to include the proper charset param. Closes https://todo.sr.ht/~sircmpwn/git.sr.ht/327 --- gitsrht/blueprints/email.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/gitsrht/blueprints/email.py b/gitsrht/blueprints/email.py index d498a19..7e80e54 100644 --- a/gitsrht/blueprints/email.py +++ b/gitsrht/blueprints/email.py @@ -157,6 +157,16 @@ def prepare_patchset(repo, git_repo, cover_letter=None, extra_headers=False, mbox = mailbox.mbox(ntf.name, factory=factory) emails = list(mbox) + # git-format-patch doesn't set the charset attribute of the + # Content-Type header field. The Python stdlib assumes ASCII and chokes + # on UTF-8. + for msg in emails: + # replace_header doesn't allow setting params, so we have to unset + # the header field and re-add it + t = msg.get_content_type() + del msg["Content-Type"] + msg.add_header("Content-Type", t, charset="utf-8") + if cover_letter: subject = emails[0]["Subject"] del emails[0]["Subject"] @@ -174,10 +184,10 @@ def prepare_patchset(repo, git_repo, cover_letter=None, extra_headers=False, if not commentary: continue commentary = "\n".join(wrapper.wrap(commentary)) - body = msg.get_payload(decode=True).decode() + body = msg.get_content() body = commentary_re.sub(r"---\n" + commentary.replace( "\\", r"\\") + r"\n\n\g", body, count=1) - msg.set_payload(body) + msg.set_content(body) if extra_headers: msgid = make_msgid().split("@") -- 2.38.4