Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions markdownify/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,30 @@ def chomp(text):
return (prefix, suffix, text)


def escape_link_destination(url):
"""
Make a URL safe to place inside Markdown ``(...)`` link/image syntax.

Destinations that contain whitespace or parentheses break the inline link
syntax, so wrap them in angle brackets -- CommonMark allows spaces and
parentheses inside ``<...>`` -- and backslash-escape any ``<``/``>``.
"""
if url and re.search(r'[\s()<>]', url):
escaped = url.replace('\\', r'\\').replace('<', r'\<').replace('>', r'\>')
return '<%s>' % escaped
return url


def escape_link_text(text):
"""
Escape characters that would break out of Markdown ``[...]`` link/image
label syntax. Used for image ``alt`` text taken verbatim from HTML, which
would otherwise allow the label to be closed early (e.g. an ``alt`` of
``](http://attacker)`` could inject an attacker-controlled destination).
"""
return text.replace('\\', r'\\').replace('[', r'\[').replace(']', r'\]')


def abstract_inline_conversion(markup_fn):
"""
This abstracts all simple inline tags like b, em, del, ...
Expand Down Expand Up @@ -453,7 +477,7 @@ def convert_a(self, el, text, parent_tags):
if self.options['default_title'] and not title:
title = href
title_part = ' "%s"' % title.replace('"', r'\"') if title else ''
return '%s[%s](%s%s)%s' % (prefix, text, href, title_part, suffix) if href else text
return '%s[%s](%s%s)%s' % (prefix, text, escape_link_destination(href), title_part, suffix) if href else text

convert_b = abstract_inline_conversion(lambda self: 2 * self.options['strong_em_symbol'])

Expand Down Expand Up @@ -588,7 +612,7 @@ def convert_img(self, el, text, parent_tags):
and el.parent.name not in self.options['keep_inline_images_in']):
return alt

return '![%s](%s%s)' % (alt, src, title_part)
return '![%s](%s%s)' % (escape_link_text(alt), escape_link_destination(src), title_part)

def convert_video(self, el, text, parent_tags):
if ('_inline' in parent_tags
Expand All @@ -601,11 +625,12 @@ def convert_video(self, el, text, parent_tags):
src = sources[0].attrs.get('src', None) or ''
poster = el.attrs.get('poster', None) or ''
if src and poster:
return '[![%s](%s)](%s)' % (text, poster, src)
return '[![%s](%s)](%s)' % (
text, escape_link_destination(poster), escape_link_destination(src))
if src:
return '[%s](%s)' % (text, src)
return '[%s](%s)' % (text, escape_link_destination(src))
if poster:
return '![%s](%s)' % (text, poster)
return '![%s](%s)' % (text, escape_link_destination(poster))
return text

def convert_list(self, el, text, parent_tags):
Expand Down
26 changes: 26 additions & 0 deletions tests/test_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,32 @@ def test_video():
assert md('<video>text</video>') == 'text'


def test_a_escapes_destination():
# See #261: a destination containing spaces or parentheses must be wrapped
# in angle brackets so it is not truncated by the inline link syntax.
assert md('<a href="/a)b">click</a>') == '[click](</a)b>)'
assert md('<a href="/a b">click</a>') == '[click](</a b>)'
# Unaffected destinations are left untouched.
assert md('<a href="/normal">click</a>') == '[click](/normal)'


def test_img_escapes_link_syntax():
# See #261: alt text is taken verbatim from HTML and must not be able to
# break out of the [...] label (which could otherwise inject a destination).
assert md('<img src="/a" alt="]">') == '![\\]](/a)'
assert md('<img src="/safe" alt="](http://attacker)">') == '![\\](http://attacker)](/safe)'
# A src containing spaces or parentheses is wrapped in angle brackets.
assert md('<img src="/a b" alt="x">') == '![x](</a b>)'
# Unaffected images are left untouched.
assert md('<img src="/normal.png" alt="cat">') == '![cat](/normal.png)'


def test_video_escapes_destination():
# See #261: video src/poster destinations are escaped like other links.
assert md('<video src="/v )x">text</video>') == '[text](</v )x>)'
assert md('<video poster="/p )x">text</video>') == '![text](</p )x>)'


def test_kbd():
inline_tests('kbd', '`')

Expand Down