Compare commits
1 Commits
master
...
chrispy/su
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2d0a14a2a7 |
14
README.rst
14
README.rst
@@ -157,12 +157,16 @@ strip_document
|
||||
within the document are unaffected.
|
||||
Defaults to ``STRIP``.
|
||||
|
||||
beautiful_soup_parser
|
||||
Specify the Beautiful Soup parser to be used for interpreting HTML markup. Parsers such
|
||||
as `html5lib`, `lxml` or even a custom parser as long as it is installed on the execution
|
||||
environment. Defaults to ``html.parser``.
|
||||
bs4_options
|
||||
Specify additional configuration options for the ``BeautifulSoup`` object
|
||||
used to interpret the HTML markup. String and list values (such as ``lxml``)
|
||||
are treated as ``features`` parameter arguments to control parser
|
||||
selection. Dictionary values (such as ``{"from_encoding": "iso-8859-8"}``)
|
||||
are treated as full kwargs to be used for the BeautifulSoup constructor,
|
||||
allowing specification of any parameter. For parameter details, see the
|
||||
Beautiful Soup documentation at:
|
||||
|
||||
.. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/
|
||||
.. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
|
||||
Options may be specified as kwargs to the ``markdownify`` function, or as a
|
||||
nested ``Options`` class in ``MarkdownConverter`` subclasses.
|
||||
|
||||
@@ -154,7 +154,7 @@ def _next_block_content_sibling(el):
|
||||
class MarkdownConverter(object):
|
||||
class DefaultOptions:
|
||||
autolinks = True
|
||||
beautiful_soup_parser = 'html.parser'
|
||||
bs4_options = 'html.parser'
|
||||
bullets = '*+-' # An iterable of bullet types.
|
||||
code_language = ''
|
||||
code_language_callback = None
|
||||
@@ -188,11 +188,15 @@ class MarkdownConverter(object):
|
||||
raise ValueError('You may specify either tags to strip or tags to'
|
||||
' convert, but not both.')
|
||||
|
||||
# If a string or list is passed to bs4_options, assume it is a 'features' specification
|
||||
if not isinstance(self.options['bs4_options'], dict):
|
||||
self.options['bs4_options'] = {'features': self.options['bs4_options']}
|
||||
|
||||
# Initialize the conversion function cache
|
||||
self.convert_fn_cache = {}
|
||||
|
||||
def convert(self, html):
|
||||
soup = BeautifulSoup(html, self.options['beautiful_soup_parser'])
|
||||
soup = BeautifulSoup(html, **self.options['bs4_options'])
|
||||
return self.convert_soup(soup)
|
||||
|
||||
def convert_soup(self, soup):
|
||||
|
||||
@@ -32,3 +32,9 @@ def test_strip_document():
|
||||
assert markdownify("<p>Hello</p>", strip_document=RSTRIP) == "\n\nHello"
|
||||
assert markdownify("<p>Hello</p>", strip_document=STRIP) == "Hello"
|
||||
assert markdownify("<p>Hello</p>", strip_document=None) == "\n\nHello\n\n"
|
||||
|
||||
|
||||
def bs4_options():
|
||||
assert markdownify("<p>Hello</p>", bs4_options="html.parser") == "Hello"
|
||||
assert markdownify("<p>Hello</p>", bs4_options=["html.parser"]) == "Hello"
|
||||
assert markdownify("<p>Hello</p>", bs4_options={"features": "html.parser"}) == "Hello"
|
||||
|
||||
Reference in New Issue
Block a user