Compare commits
1 Commits
develop
...
chrispy/su
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2d0a14a2a7 |
14
README.rst
14
README.rst
@@ -157,12 +157,16 @@ strip_document
|
|||||||
within the document are unaffected.
|
within the document are unaffected.
|
||||||
Defaults to ``STRIP``.
|
Defaults to ``STRIP``.
|
||||||
|
|
||||||
beautiful_soup_parser
|
bs4_options
|
||||||
Specify the Beautiful Soup parser to be used for interpreting HTML markup. Parsers such
|
Specify additional configuration options for the ``BeautifulSoup`` object
|
||||||
as `html5lib`, `lxml` or even a custom parser as long as it is installed on the execution
|
used to interpret the HTML markup. String and list values (such as ``lxml``)
|
||||||
environment. Defaults to ``html.parser``.
|
are treated as ``features`` parameter arguments to control parser
|
||||||
|
selection. Dictionary values (such as ``{"from_encoding": "iso-8859-8"}``)
|
||||||
|
are treated as full kwargs to be used for the BeautifulSoup constructor,
|
||||||
|
allowing specification of any parameter. For parameter details, see the
|
||||||
|
Beautiful Soup documentation at:
|
||||||
|
|
||||||
.. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/
|
.. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||||
|
|
||||||
Options may be specified as kwargs to the ``markdownify`` function, or as a
|
Options may be specified as kwargs to the ``markdownify`` function, or as a
|
||||||
nested ``Options`` class in ``MarkdownConverter`` subclasses.
|
nested ``Options`` class in ``MarkdownConverter`` subclasses.
|
||||||
|
|||||||
@@ -154,7 +154,7 @@ def _next_block_content_sibling(el):
|
|||||||
class MarkdownConverter(object):
|
class MarkdownConverter(object):
|
||||||
class DefaultOptions:
|
class DefaultOptions:
|
||||||
autolinks = True
|
autolinks = True
|
||||||
beautiful_soup_parser = 'html.parser'
|
bs4_options = 'html.parser'
|
||||||
bullets = '*+-' # An iterable of bullet types.
|
bullets = '*+-' # An iterable of bullet types.
|
||||||
code_language = ''
|
code_language = ''
|
||||||
code_language_callback = None
|
code_language_callback = None
|
||||||
@@ -188,11 +188,15 @@ class MarkdownConverter(object):
|
|||||||
raise ValueError('You may specify either tags to strip or tags to'
|
raise ValueError('You may specify either tags to strip or tags to'
|
||||||
' convert, but not both.')
|
' convert, but not both.')
|
||||||
|
|
||||||
|
# If a string or list is passed to bs4_options, assume it is a 'features' specification
|
||||||
|
if not isinstance(self.options['bs4_options'], dict):
|
||||||
|
self.options['bs4_options'] = {'features': self.options['bs4_options']}
|
||||||
|
|
||||||
# Initialize the conversion function cache
|
# Initialize the conversion function cache
|
||||||
self.convert_fn_cache = {}
|
self.convert_fn_cache = {}
|
||||||
|
|
||||||
def convert(self, html):
|
def convert(self, html):
|
||||||
soup = BeautifulSoup(html, self.options['beautiful_soup_parser'])
|
soup = BeautifulSoup(html, **self.options['bs4_options'])
|
||||||
return self.convert_soup(soup)
|
return self.convert_soup(soup)
|
||||||
|
|
||||||
def convert_soup(self, soup):
|
def convert_soup(self, soup):
|
||||||
|
|||||||
@@ -32,3 +32,9 @@ def test_strip_document():
|
|||||||
assert markdownify("<p>Hello</p>", strip_document=RSTRIP) == "\n\nHello"
|
assert markdownify("<p>Hello</p>", strip_document=RSTRIP) == "\n\nHello"
|
||||||
assert markdownify("<p>Hello</p>", strip_document=STRIP) == "Hello"
|
assert markdownify("<p>Hello</p>", strip_document=STRIP) == "Hello"
|
||||||
assert markdownify("<p>Hello</p>", strip_document=None) == "\n\nHello\n\n"
|
assert markdownify("<p>Hello</p>", strip_document=None) == "\n\nHello\n\n"
|
||||||
|
|
||||||
|
|
||||||
|
def bs4_options():
|
||||||
|
assert markdownify("<p>Hello</p>", bs4_options="html.parser") == "Hello"
|
||||||
|
assert markdownify("<p>Hello</p>", bs4_options=["html.parser"]) == "Hello"
|
||||||
|
assert markdownify("<p>Hello</p>", bs4_options={"features": "html.parser"}) == "Hello"
|
||||||
|
|||||||
Reference in New Issue
Block a user