Compare commits
19 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0c4b856b9c | ||
|
|
e9cc01938a | ||
|
|
aceced68eb | ||
|
|
3b049cdb9c | ||
|
|
b747378b52 | ||
|
|
ee73d89879 | ||
|
|
5563161c86 | ||
|
|
28e447d9ae | ||
|
|
89d14f4487 | ||
|
|
5f9243d91d | ||
|
|
d0f688d2e4 | ||
|
|
5ac08522be | ||
|
|
78afcc173e | ||
|
|
b132a6f5b3 | ||
|
|
0abe0a29e8 | ||
|
|
4932df631f | ||
|
|
8696e2bde1 | ||
|
|
ee53d85c41 | ||
|
|
53ba0daa77 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,5 +1,7 @@
|
|||||||
*.pyc
|
*.pyc
|
||||||
*.egg
|
*.egg
|
||||||
|
.eggs/
|
||||||
|
*.egg-info/
|
||||||
.DS_Store
|
.DS_Store
|
||||||
/.env
|
/.env
|
||||||
/dist
|
/dist
|
||||||
|
|||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
The MIT License (MIT)
|
||||||
|
|
||||||
|
Copyright 2012-2018 Matthew Tretter
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
32
README.rst
32
README.rst
@@ -29,6 +29,38 @@ Specify tags to exclude (blacklist):
|
|||||||
md('<b>Yay</b> <a href="http://github.com">GitHub</a>', convert=['b']) # > '**Yay** GitHub'
|
md('<b>Yay</b> <a href="http://github.com">GitHub</a>', convert=['b']) # > '**Yay** GitHub'
|
||||||
|
|
||||||
|
|
||||||
|
Options
|
||||||
|
=======
|
||||||
|
|
||||||
|
Markdownify supports the following options:
|
||||||
|
|
||||||
|
strip
|
||||||
|
A list of tags to strip (blacklist). This option can't be used with the
|
||||||
|
``convert`` option.
|
||||||
|
|
||||||
|
convert
|
||||||
|
A list of tags to convert (whitelist). This option can't be used with the
|
||||||
|
``strip`` option.
|
||||||
|
|
||||||
|
autolinks
|
||||||
|
A boolean indicating whether the "automatic link" style should be used when
|
||||||
|
a ``a`` tag's contents match its href. Defaults to ``True``
|
||||||
|
|
||||||
|
heading_style
|
||||||
|
Defines how headings should be converted. Accepted values are ``ATX``,
|
||||||
|
``ATX_CLOSED``, ``SETEXT``, and ``UNDERLINED`` (which is an alias for
|
||||||
|
``SETEXT``). Defaults to ``UNDERLINED``.
|
||||||
|
|
||||||
|
bullets
|
||||||
|
An iterable (string, list, or tuple) of bullet styles to be used. If the
|
||||||
|
iterable only contains one item, it will be used regardless of how deeply
|
||||||
|
lists are nested. Otherwise, the bullet will alternate based on nesting
|
||||||
|
level. Defaults to ``'*+-'``.
|
||||||
|
|
||||||
|
Options may be specified as kwargs to the ``markdownify`` function, or as a
|
||||||
|
nested ``Options`` class in ``MarkdownConverter`` subclasses.
|
||||||
|
|
||||||
|
|
||||||
Development
|
Development
|
||||||
===========
|
===========
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
from bs4 import BeautifulSoup, NavigableString
|
from bs4 import BeautifulSoup, NavigableString
|
||||||
import re
|
import re
|
||||||
|
import six
|
||||||
|
|
||||||
|
|
||||||
convert_heading_re = re.compile(r'convert_h(\d+)')
|
convert_heading_re = re.compile(r'convert_h(\d+)')
|
||||||
@@ -22,6 +23,19 @@ def escape(text):
|
|||||||
return text.replace('_', r'\_')
|
return text.replace('_', r'\_')
|
||||||
|
|
||||||
|
|
||||||
|
def chomp(text):
|
||||||
|
"""
|
||||||
|
If the text in an inline tag like b, a, or em contains a leading or trailing
|
||||||
|
space, strip the string and return a space as suffix of prefix, if needed.
|
||||||
|
This function is used to prevent conversions like
|
||||||
|
<b> foo</b> => ** foo**
|
||||||
|
"""
|
||||||
|
prefix = ' ' if text and text[0] == ' ' else ''
|
||||||
|
suffix = ' ' if text and text[-1] == ' ' else ''
|
||||||
|
text = text.strip()
|
||||||
|
return (prefix, suffix, text)
|
||||||
|
|
||||||
|
|
||||||
def _todict(obj):
|
def _todict(obj):
|
||||||
return dict((k, getattr(obj, k)) for k in dir(obj) if not k.startswith('_'))
|
return dict((k, getattr(obj, k)) for k in dir(obj) if not k.startswith('_'))
|
||||||
|
|
||||||
@@ -52,7 +66,7 @@ class MarkdownConverter(object):
|
|||||||
# want a full document. Therefore, we'll mark our fragment with an id,
|
# want a full document. Therefore, we'll mark our fragment with an id,
|
||||||
# create the document, and extract the element with the id.
|
# create the document, and extract the element with the id.
|
||||||
html = wrapped % html
|
html = wrapped % html
|
||||||
soup = BeautifulSoup(html)
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
return self.process_tag(soup.find(id=FRAGMENT_ID), children_only=True)
|
return self.process_tag(soup.find(id=FRAGMENT_ID), children_only=True)
|
||||||
|
|
||||||
def process_tag(self, node, children_only=False):
|
def process_tag(self, node, children_only=False):
|
||||||
@@ -61,7 +75,7 @@ class MarkdownConverter(object):
|
|||||||
# Convert the children first
|
# Convert the children first
|
||||||
for el in node.children:
|
for el in node.children:
|
||||||
if isinstance(el, NavigableString):
|
if isinstance(el, NavigableString):
|
||||||
text += self.process_text(unicode(el))
|
text += self.process_text(six.text_type(el))
|
||||||
else:
|
else:
|
||||||
text += self.process_tag(el)
|
text += self.process_tag(el)
|
||||||
|
|
||||||
@@ -109,13 +123,16 @@ class MarkdownConverter(object):
|
|||||||
return '%s\n%s\n\n' % (text, pad_char * len(text)) if text else ''
|
return '%s\n%s\n\n' % (text, pad_char * len(text)) if text else ''
|
||||||
|
|
||||||
def convert_a(self, el, text):
|
def convert_a(self, el, text):
|
||||||
|
prefix, suffix, text = chomp(text)
|
||||||
|
if not text:
|
||||||
|
return ''
|
||||||
href = el.get('href')
|
href = el.get('href')
|
||||||
title = el.get('title')
|
title = el.get('title')
|
||||||
if self.options['autolinks'] and text == href and not title:
|
if self.options['autolinks'] and text == href and not title:
|
||||||
# Shortcut syntax
|
# Shortcut syntax
|
||||||
return '<%s>' % href
|
return '<%s>' % href
|
||||||
title_part = ' "%s"' % title.replace('"', r'\"') if title else ''
|
title_part = ' "%s"' % title.replace('"', r'\"') if title else ''
|
||||||
return '[%s](%s%s)' % (text or '', href, title_part) if href else text or ''
|
return '%s[%s](%s%s)%s' % (prefix, text, href, title_part, suffix) if href else text
|
||||||
|
|
||||||
def convert_b(self, el, text):
|
def convert_b(self, el, text):
|
||||||
return self.convert_strong(el, text)
|
return self.convert_strong(el, text)
|
||||||
@@ -127,7 +144,10 @@ class MarkdownConverter(object):
|
|||||||
return ' \n'
|
return ' \n'
|
||||||
|
|
||||||
def convert_em(self, el, text):
|
def convert_em(self, el, text):
|
||||||
return '*%s*' % text if text else ''
|
prefix, suffix, text = chomp(text)
|
||||||
|
if not text:
|
||||||
|
return ''
|
||||||
|
return '%s*%s*%s' % (prefix, text, suffix)
|
||||||
|
|
||||||
def convert_hn(self, n, el, text):
|
def convert_hn(self, n, el, text):
|
||||||
style = self.options['heading_style']
|
style = self.options['heading_style']
|
||||||
@@ -151,8 +171,9 @@ class MarkdownConverter(object):
|
|||||||
break
|
break
|
||||||
el = el.parent
|
el = el.parent
|
||||||
if nested:
|
if nested:
|
||||||
text = '\n' + self.indent(text, 1)
|
# remove trailing newline if nested
|
||||||
return text
|
return '\n' + self.indent(text, 1).rstrip()
|
||||||
|
return '\n' + text + '\n'
|
||||||
|
|
||||||
convert_ul = convert_list
|
convert_ul = convert_list
|
||||||
convert_ol = convert_list
|
convert_ol = convert_list
|
||||||
@@ -175,7 +196,10 @@ class MarkdownConverter(object):
|
|||||||
return '%s\n\n' % text if text else ''
|
return '%s\n\n' % text if text else ''
|
||||||
|
|
||||||
def convert_strong(self, el, text):
|
def convert_strong(self, el, text):
|
||||||
return '**%s**' % text if text else ''
|
prefix, suffix, text = chomp(text)
|
||||||
|
if not text:
|
||||||
|
return ''
|
||||||
|
return '%s**%s**%s' % (prefix, text, suffix)
|
||||||
|
|
||||||
def convert_img(self, el, text):
|
def convert_img(self, el, text):
|
||||||
alt = el.attrs.get('alt', None) or ''
|
alt = el.attrs.get('alt', None) or ''
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
pkgmeta = dict(
|
|
||||||
__title__='markdownify',
|
|
||||||
__author__='Matthew Tretter',
|
|
||||||
__version__='0.4.0',
|
|
||||||
)
|
|
||||||
|
|
||||||
globals().update(pkgmeta)
|
|
||||||
__all__ = pkgmeta.keys()
|
|
||||||
13
setup.py
13
setup.py
@@ -7,10 +7,11 @@ from setuptools.command.test import test as TestCommand, Command
|
|||||||
|
|
||||||
read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read()
|
read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read()
|
||||||
|
|
||||||
|
pkgmeta = {
|
||||||
pkgmeta = {}
|
'__title__': 'markdownify',
|
||||||
execfile(os.path.join(os.path.dirname(__file__), 'markdownify', 'pkgmeta.py'),
|
'__author__': 'Matthew Tretter',
|
||||||
pkgmeta)
|
'__version__': '0.5.0',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class PyTest(TestCommand):
|
class PyTest(TestCommand):
|
||||||
@@ -75,13 +76,13 @@ setup(
|
|||||||
'pytest',
|
'pytest',
|
||||||
],
|
],
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'beautifulsoup4',
|
'beautifulsoup4', 'six'
|
||||||
],
|
],
|
||||||
classifiers=[
|
classifiers=[
|
||||||
'Environment :: Web Environment',
|
'Environment :: Web Environment',
|
||||||
'Framework :: Django',
|
'Framework :: Django',
|
||||||
'Intended Audience :: Developers',
|
'Intended Audience :: Developers',
|
||||||
'License :: OSI Approved :: BSD License',
|
'License :: OSI Approved :: MIT License',
|
||||||
'Operating System :: OS Independent',
|
'Operating System :: OS Independent',
|
||||||
'Programming Language :: Python :: 2.5',
|
'Programming Language :: Python :: 2.5',
|
||||||
'Programming Language :: Python :: 2.6',
|
'Programming Language :: Python :: 2.6',
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from markdownify import markdownify as md, ATX, ATX_CLOSED
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
nested_uls = re.sub('\s+', '', """
|
nested_uls = re.sub(r'\s+', '', """
|
||||||
<ul>
|
<ul>
|
||||||
<li>1
|
<li>1
|
||||||
<ul>
|
<ul>
|
||||||
@@ -22,10 +22,28 @@ nested_uls = re.sub('\s+', '', """
|
|||||||
</ul>""")
|
</ul>""")
|
||||||
|
|
||||||
|
|
||||||
|
def test_chomp():
|
||||||
|
assert md(' <b></b> ') == ' '
|
||||||
|
assert md(' <b> </b> ') == ' '
|
||||||
|
assert md(' <b> </b> ') == ' '
|
||||||
|
assert md(' <b> </b> ') == ' '
|
||||||
|
assert md(' <b>s </b> ') == ' **s** '
|
||||||
|
assert md(' <b> s</b> ') == ' **s** '
|
||||||
|
assert md(' <b> s </b> ') == ' **s** '
|
||||||
|
assert md(' <b> s </b> ') == ' **s** '
|
||||||
|
|
||||||
|
|
||||||
def test_a():
|
def test_a():
|
||||||
assert md('<a href="http://google.com">Google</a>') == '[Google](http://google.com)'
|
assert md('<a href="http://google.com">Google</a>') == '[Google](http://google.com)'
|
||||||
|
|
||||||
|
|
||||||
|
def test_a_spaces():
|
||||||
|
assert md('foo <a href="http://google.com">Google</a> bar') == 'foo [Google](http://google.com) bar'
|
||||||
|
assert md('foo<a href="http://google.com"> Google</a> bar') == 'foo [Google](http://google.com) bar'
|
||||||
|
assert md('foo <a href="http://google.com">Google </a>bar') == 'foo [Google](http://google.com) bar'
|
||||||
|
assert md('foo <a href="http://google.com"></a> bar') == 'foo bar'
|
||||||
|
|
||||||
|
|
||||||
def test_a_with_title():
|
def test_a_with_title():
|
||||||
text = md('<a href="http://google.com" title="The "Goog"">Google</a>')
|
text = md('<a href="http://google.com" title="The "Goog"">Google</a>')
|
||||||
assert text == r'[Google](http://google.com "The \"Goog\"")'
|
assert text == r'[Google](http://google.com "The \"Goog\"")'
|
||||||
@@ -45,6 +63,13 @@ def test_b():
|
|||||||
assert md('<b>Hello</b>') == '**Hello**'
|
assert md('<b>Hello</b>') == '**Hello**'
|
||||||
|
|
||||||
|
|
||||||
|
def test_b_spaces():
|
||||||
|
assert md('foo <b>Hello</b> bar') == 'foo **Hello** bar'
|
||||||
|
assert md('foo<b> Hello</b> bar') == 'foo **Hello** bar'
|
||||||
|
assert md('foo <b>Hello </b>bar') == 'foo **Hello** bar'
|
||||||
|
assert md('foo <b></b> bar') == 'foo bar'
|
||||||
|
|
||||||
|
|
||||||
def test_blockquote():
|
def test_blockquote():
|
||||||
assert md('<blockquote>Hello</blockquote>').strip() == '> Hello'
|
assert md('<blockquote>Hello</blockquote>').strip() == '> Hello'
|
||||||
|
|
||||||
@@ -62,6 +87,13 @@ def test_em():
|
|||||||
assert md('<em>Hello</em>') == '*Hello*'
|
assert md('<em>Hello</em>') == '*Hello*'
|
||||||
|
|
||||||
|
|
||||||
|
def test_em_spaces():
|
||||||
|
assert md('foo <em>Hello</em> bar') == 'foo *Hello* bar'
|
||||||
|
assert md('foo<em> Hello</em> bar') == 'foo *Hello* bar'
|
||||||
|
assert md('foo <em>Hello </em>bar') == 'foo *Hello* bar'
|
||||||
|
assert md('foo <em></em> bar') == 'foo bar'
|
||||||
|
|
||||||
|
|
||||||
def test_h1():
|
def test_h1():
|
||||||
assert md('<h1>Hello</h1>') == 'Hello\n=====\n\n'
|
assert md('<h1>Hello</h1>') == 'Hello\n=====\n\n'
|
||||||
|
|
||||||
@@ -90,7 +122,7 @@ def test_i():
|
|||||||
|
|
||||||
|
|
||||||
def test_ol():
|
def test_ol():
|
||||||
assert md('<ol><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
|
assert md('<ol><li>a</li><li>b</li></ol>') == '\n1. a\n2. b\n\n'
|
||||||
|
|
||||||
|
|
||||||
def test_p():
|
def test_p():
|
||||||
@@ -102,7 +134,11 @@ def test_strong():
|
|||||||
|
|
||||||
|
|
||||||
def test_ul():
|
def test_ul():
|
||||||
assert md('<ul><li>a</li><li>b</li></ul>') == '* a\n* b\n'
|
assert md('<ul><li>a</li><li>b</li></ul>') == '\n* a\n* b\n\n'
|
||||||
|
|
||||||
|
|
||||||
|
def test_inline_ul():
|
||||||
|
assert md('<p>foo</p><ul><li>a</li><li>b</li></ul><p>bar</p>') == 'foo\n\n\n* a\n* b\n\nbar\n\n'
|
||||||
|
|
||||||
|
|
||||||
def test_nested_uls():
|
def test_nested_uls():
|
||||||
@@ -110,11 +146,11 @@ def test_nested_uls():
|
|||||||
Nested ULs should alternate bullet characters.
|
Nested ULs should alternate bullet characters.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
assert md(nested_uls) == '* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t\t\n\t+ b\n\t+ c\n\t\n* 2\n* 3\n'
|
assert md(nested_uls) == '\n* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n\n'
|
||||||
|
|
||||||
|
|
||||||
def test_bullets():
|
def test_bullets():
|
||||||
assert md(nested_uls, bullets='-') == '- 1\n\t- a\n\t\t- I\n\t\t- II\n\t\t- III\n\t\t\n\t- b\n\t- c\n\t\n- 2\n- 3\n'
|
assert md(nested_uls, bullets='-') == '\n- 1\n\t- a\n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n\n'
|
||||||
|
|
||||||
|
|
||||||
def test_img():
|
def test_img():
|
||||||
|
|||||||
Reference in New Issue
Block a user