Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb73590623 | ||
|
|
22180a166d | ||
|
|
16d8a0e1f7 | ||
|
|
4aa6cf2a24 | ||
|
|
828e116530 | ||
|
|
62e9f0de02 |
23
README.rst
23
README.rst
@@ -100,6 +100,29 @@ Options may be specified as kwargs to the ``markdownify`` function, or as a
|
||||
nested ``Options`` class in ``MarkdownConverter`` subclasses.
|
||||
|
||||
|
||||
Creating Custom Converters
|
||||
=========================
|
||||
|
||||
If you have a special usecase that calls for a special conversion, you can
|
||||
always inherit from ``MarkdownConverter`` and override the method you want to
|
||||
change:
|
||||
|
||||
.. code:: python
|
||||
|
||||
from markdownify import MarkdownConverter
|
||||
|
||||
class ImageBlockConverter(MarkdownConverter):
|
||||
"""
|
||||
Create a custom MarkdownConverter that adds two newlines after an image
|
||||
"""
|
||||
def convert_img(self, el, text, convert_as_inline):
|
||||
return super().convert_img(el, text, convert_as_inline) + '\n\n'
|
||||
|
||||
# Create shorthand method for conversion
|
||||
def md(html, **options):
|
||||
return ImageBlockConverter(**options).convert(html)
|
||||
|
||||
|
||||
Development
|
||||
===========
|
||||
|
||||
|
||||
@@ -142,22 +142,26 @@ class MarkdownConverter(object):
|
||||
return text
|
||||
|
||||
def process_text(self, el):
|
||||
text = six.text_type(el)
|
||||
text = six.text_type(el) or ''
|
||||
|
||||
# dont remove any whitespace when handling pre or code in pre
|
||||
if (el.parent.name == 'pre'
|
||||
or (el.parent.name == 'code' and el.parent.parent.name == 'pre')):
|
||||
return escape(text or '')
|
||||
if not (el.parent.name == 'pre'
|
||||
or (el.parent.name == 'code'
|
||||
and el.parent.parent.name == 'pre')):
|
||||
text = whitespace_re.sub(' ', text)
|
||||
|
||||
cleaned_text = escape(whitespace_re.sub(' ', text or ''))
|
||||
if el.parent.name != 'code':
|
||||
text = escape(text)
|
||||
|
||||
# remove trailing whitespaces if any of the following condition is true:
|
||||
# - current text node is the last node in li
|
||||
# - current text node is followed by an embedded list
|
||||
if el.parent.name == 'li' and (not el.next_sibling or el.next_sibling.name in ['ul', 'ol']):
|
||||
return cleaned_text.rstrip()
|
||||
if (el.parent.name == 'li'
|
||||
and (not el.next_sibling
|
||||
or el.next_sibling.name in ['ul', 'ol'])):
|
||||
text = text.rstrip()
|
||||
|
||||
return cleaned_text
|
||||
return text
|
||||
|
||||
def __getattr__(self, attr):
|
||||
# Handle headings
|
||||
|
||||
2
setup.py
2
setup.py
@@ -10,7 +10,7 @@ read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read()
|
||||
pkgmeta = {
|
||||
'__title__': 'markdownify',
|
||||
'__author__': 'Matthew Tretter',
|
||||
'__version__': '0.9.0',
|
||||
'__version__': '0.9.1',
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -70,6 +70,7 @@ def test_br():
|
||||
|
||||
def test_code():
|
||||
inline_tests('code', '`')
|
||||
assert md('<code>this_should_not_escape</code>') == '`this_should_not_escape`'
|
||||
|
||||
|
||||
def test_del():
|
||||
@@ -131,8 +132,6 @@ def test_hn_nested_simple_tag():
|
||||
|
||||
|
||||
def test_hn_nested_img():
|
||||
assert md('<img src="/path/to/img.jpg" alt="Alt text" title="Optional title" />') == ''
|
||||
assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == ''
|
||||
image_attributes_to_markdown = [
|
||||
("", ""),
|
||||
("alt='Alt Text'", "Alt Text"),
|
||||
|
||||
18
tests/test_custom_converter.py
Normal file
18
tests/test_custom_converter.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from markdownify import MarkdownConverter
|
||||
|
||||
|
||||
class ImageBlockConverter(MarkdownConverter):
|
||||
"""
|
||||
Create a custom MarkdownConverter that adds two newlines after an image
|
||||
"""
|
||||
def convert_img(self, el, text, convert_as_inline):
|
||||
return super().convert_img(el, text, convert_as_inline) + '\n\n'
|
||||
|
||||
|
||||
def test_img():
|
||||
# Create shorthand method for conversion
|
||||
def md(html, **options):
|
||||
return ImageBlockConverter(**options).convert(html)
|
||||
|
||||
assert md('<img src="/path/to/img.jpg" alt="Alt text" title="Optional title" />') == '\n\n'
|
||||
assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == '\n\n'
|
||||
Reference in New Issue
Block a user