Compare commits
19 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c47709c21c | ||
|
|
fbc1353593 | ||
|
|
85ef82e083 | ||
|
|
f7053e46ab | ||
|
|
7edbc5a22b | ||
|
|
76e5edb357 | ||
|
|
48724e7002 | ||
|
|
9b1412aa5b | ||
|
|
75ab3064dd | ||
|
|
016251e915 | ||
|
|
0e1a849346 | ||
|
|
e29de4e753 | ||
|
|
2d654a6b7e | ||
|
|
26566891a7 | ||
|
|
13183f9925 | ||
|
|
7908f1492a | ||
|
|
618747c18c | ||
|
|
5122c973c1 | ||
|
|
ac5736f0a3 |
21
.github/workflows/python-app.yml
vendored
21
.github/workflows/python-app.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v4
|
||||||
- name: Set up Python 3.8
|
- name: Set up Python 3.8
|
||||||
uses: actions/setup-python@v2
|
uses: actions/setup-python@v2
|
||||||
with:
|
with:
|
||||||
@@ -30,3 +30,22 @@ jobs:
|
|||||||
- name: Build
|
- name: Build
|
||||||
run: |
|
run: |
|
||||||
python -m build -nwsx .
|
python -m build -nwsx .
|
||||||
|
|
||||||
|
types:
|
||||||
|
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: Set up Python 3.8
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: 3.8
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install --upgrade setuptools setuptools_scm wheel build tox mypy types-beautifulsoup4
|
||||||
|
- name: Check types
|
||||||
|
run: |
|
||||||
|
mypy .
|
||||||
|
mypy --strict tests/types.py
|
||||||
|
|||||||
2
.github/workflows/python-publish.yml
vendored
2
.github/workflows/python-publish.yml
vendored
@@ -13,7 +13,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v4
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v2
|
uses: actions/setup-python@v2
|
||||||
with:
|
with:
|
||||||
|
|||||||
19
README.rst
19
README.rst
@@ -110,7 +110,7 @@ code_language_callback
|
|||||||
When the HTML code contains ``pre`` tags that in some way provide the code
|
When the HTML code contains ``pre`` tags that in some way provide the code
|
||||||
language, for example as class, this callback can be used to extract the
|
language, for example as class, this callback can be used to extract the
|
||||||
language from the tag and prefix it to the converted ``pre`` tag.
|
language from the tag and prefix it to the converted ``pre`` tag.
|
||||||
The callback gets one single argument, an BeautifylSoup object, and returns
|
The callback gets one single argument, a BeautifulSoup object, and returns
|
||||||
a string containing the code language, or ``None``.
|
a string containing the code language, or ``None``.
|
||||||
An example to use the class name as code language could be::
|
An example to use the class name as code language could be::
|
||||||
|
|
||||||
@@ -157,6 +157,23 @@ strip_document
|
|||||||
within the document are unaffected.
|
within the document are unaffected.
|
||||||
Defaults to ``STRIP``.
|
Defaults to ``STRIP``.
|
||||||
|
|
||||||
|
strip_pre
|
||||||
|
Controls whether leading/trailing blank lines are removed from ``<pre>``
|
||||||
|
tags. Supported values are ``STRIP`` (all leading/trailing blank lines),
|
||||||
|
``STRIP_ONE`` (one leading/trailing blank line), and ``None`` (neither).
|
||||||
|
Defaults to ``STRIP``.
|
||||||
|
|
||||||
|
bs4_options
|
||||||
|
Specify additional configuration options for the ``BeautifulSoup`` object
|
||||||
|
used to interpret the HTML markup. String and list values (such as ``lxml``
|
||||||
|
or ``html5lib``) are treated as ``features`` arguments to control parser
|
||||||
|
selection. Dictionary values (such as ``{"from_encoding": "iso-8859-8"}``)
|
||||||
|
are treated as full kwargs to be used for the BeautifulSoup constructor,
|
||||||
|
allowing specification of any parameter. For parameter details, see the
|
||||||
|
Beautiful Soup documentation at:
|
||||||
|
|
||||||
|
.. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||||
|
|
||||||
Options may be specified as kwargs to the ``markdownify`` function, or as a
|
Options may be specified as kwargs to the ``markdownify`` function, or as a
|
||||||
nested ``Options`` class in ``MarkdownConverter`` subclasses.
|
nested ``Options`` class in ``MarkdownConverter`` subclasses.
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,10 @@ re_whitespace = re.compile(r'[\t ]+')
|
|||||||
re_all_whitespace = re.compile(r'[\t \r\n]+')
|
re_all_whitespace = re.compile(r'[\t \r\n]+')
|
||||||
re_newline_whitespace = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*')
|
re_newline_whitespace = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*')
|
||||||
re_html_heading = re.compile(r'h(\d+)')
|
re_html_heading = re.compile(r'h(\d+)')
|
||||||
|
re_pre_lstrip1 = re.compile(r'^ *\n')
|
||||||
|
re_pre_rstrip1 = re.compile(r'\n *$')
|
||||||
|
re_pre_lstrip = re.compile(r'^[ \n]*\n')
|
||||||
|
re_pre_rstrip = re.compile(r'[ \n]*$')
|
||||||
|
|
||||||
# Pattern for creating convert_<tag> function names from tag names
|
# Pattern for creating convert_<tag> function names from tag names
|
||||||
re_make_convert_fn_name = re.compile(r'[\[\]:-]')
|
re_make_convert_fn_name = re.compile(r'[\[\]:-]')
|
||||||
@@ -37,6 +41,9 @@ re_escape_misc_hashes = re.compile(r'(\s|^)(#{1,6}(?:\s|$))')
|
|||||||
# confused with a list item
|
# confused with a list item
|
||||||
re_escape_misc_list_items = re.compile(r'((?:\s|^)[0-9]{1,9})([.)](?:\s|$))')
|
re_escape_misc_list_items = re.compile(r'((?:\s|^)[0-9]{1,9})([.)](?:\s|$))')
|
||||||
|
|
||||||
|
# Find consecutive backtick sequences in a string
|
||||||
|
re_backtick_runs = re.compile(r'`+')
|
||||||
|
|
||||||
# Heading styles
|
# Heading styles
|
||||||
ATX = 'atx'
|
ATX = 'atx'
|
||||||
ATX_CLOSED = 'atx_closed'
|
ATX_CLOSED = 'atx_closed'
|
||||||
@@ -51,10 +58,25 @@ BACKSLASH = 'backslash'
|
|||||||
ASTERISK = '*'
|
ASTERISK = '*'
|
||||||
UNDERSCORE = '_'
|
UNDERSCORE = '_'
|
||||||
|
|
||||||
# Document strip styles
|
# Document/pre strip styles
|
||||||
LSTRIP = 'lstrip'
|
LSTRIP = 'lstrip'
|
||||||
RSTRIP = 'rstrip'
|
RSTRIP = 'rstrip'
|
||||||
STRIP = 'strip'
|
STRIP = 'strip'
|
||||||
|
STRIP_ONE = 'strip_one'
|
||||||
|
|
||||||
|
|
||||||
|
def strip1_pre(text):
|
||||||
|
"""Strip one leading and trailing newline from a <pre> string."""
|
||||||
|
text = re_pre_lstrip1.sub('', text)
|
||||||
|
text = re_pre_rstrip1.sub('', text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def strip_pre(text):
|
||||||
|
"""Strip all leading and trailing newlines from a <pre> string."""
|
||||||
|
text = re_pre_lstrip.sub('', text)
|
||||||
|
text = re_pre_rstrip.sub('', text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
def chomp(text):
|
def chomp(text):
|
||||||
@@ -106,6 +128,7 @@ def should_remove_whitespace_inside(el):
|
|||||||
return el.name in ('p', 'blockquote',
|
return el.name in ('p', 'blockquote',
|
||||||
'article', 'div', 'section',
|
'article', 'div', 'section',
|
||||||
'ol', 'ul', 'li',
|
'ol', 'ul', 'li',
|
||||||
|
'dl', 'dt', 'dd',
|
||||||
'table', 'thead', 'tbody', 'tfoot',
|
'table', 'thead', 'tbody', 'tfoot',
|
||||||
'tr', 'td', 'th')
|
'tr', 'td', 'th')
|
||||||
|
|
||||||
@@ -153,6 +176,7 @@ def _next_block_content_sibling(el):
|
|||||||
class MarkdownConverter(object):
|
class MarkdownConverter(object):
|
||||||
class DefaultOptions:
|
class DefaultOptions:
|
||||||
autolinks = True
|
autolinks = True
|
||||||
|
bs4_options = 'html.parser'
|
||||||
bullets = '*+-' # An iterable of bullet types.
|
bullets = '*+-' # An iterable of bullet types.
|
||||||
code_language = ''
|
code_language = ''
|
||||||
code_language_callback = None
|
code_language_callback = None
|
||||||
@@ -166,6 +190,7 @@ class MarkdownConverter(object):
|
|||||||
newline_style = SPACES
|
newline_style = SPACES
|
||||||
strip = None
|
strip = None
|
||||||
strip_document = STRIP
|
strip_document = STRIP
|
||||||
|
strip_pre = STRIP
|
||||||
strong_em_symbol = ASTERISK
|
strong_em_symbol = ASTERISK
|
||||||
sub_symbol = ''
|
sub_symbol = ''
|
||||||
sup_symbol = ''
|
sup_symbol = ''
|
||||||
@@ -186,11 +211,15 @@ class MarkdownConverter(object):
|
|||||||
raise ValueError('You may specify either tags to strip or tags to'
|
raise ValueError('You may specify either tags to strip or tags to'
|
||||||
' convert, but not both.')
|
' convert, but not both.')
|
||||||
|
|
||||||
|
# If a string or list is passed to bs4_options, assume it is a 'features' specification
|
||||||
|
if not isinstance(self.options['bs4_options'], dict):
|
||||||
|
self.options['bs4_options'] = {'features': self.options['bs4_options']}
|
||||||
|
|
||||||
# Initialize the conversion function cache
|
# Initialize the conversion function cache
|
||||||
self.convert_fn_cache = {}
|
self.convert_fn_cache = {}
|
||||||
|
|
||||||
def convert(self, html):
|
def convert(self, html):
|
||||||
soup = BeautifulSoup(html, 'html.parser')
|
soup = BeautifulSoup(html, **self.options['bs4_options'])
|
||||||
return self.convert_soup(soup)
|
return self.convert_soup(soup)
|
||||||
|
|
||||||
def convert_soup(self, soup):
|
def convert_soup(self, soup):
|
||||||
@@ -361,16 +390,20 @@ class MarkdownConverter(object):
|
|||||||
if not self.should_convert_tag(tag_name):
|
if not self.should_convert_tag(tag_name):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Handle headings with _convert_hn() function
|
# Look for an explicitly defined conversion function by tag name first
|
||||||
|
convert_fn_name = "convert_%s" % re_make_convert_fn_name.sub("_", tag_name)
|
||||||
|
convert_fn = getattr(self, convert_fn_name, None)
|
||||||
|
if convert_fn:
|
||||||
|
return convert_fn
|
||||||
|
|
||||||
|
# If tag is any heading, handle with convert_hN() function
|
||||||
match = re_html_heading.match(tag_name)
|
match = re_html_heading.match(tag_name)
|
||||||
if match:
|
if match:
|
||||||
n = int(match.group(1))
|
n = int(match.group(1)) # get value of N from <hN>
|
||||||
return lambda el, text, parent_tags: self._convert_hn(n, el, text, parent_tags)
|
return lambda el, text, parent_tags: self.convert_hN(n, el, text, parent_tags)
|
||||||
|
|
||||||
# For other tags, look up their conversion function by tag name
|
# No conversion function was found
|
||||||
convert_fn_name = "convert_%s" % re_make_convert_fn_name.sub('_', tag_name)
|
return None
|
||||||
convert_fn = getattr(self, convert_fn_name, None)
|
|
||||||
return convert_fn
|
|
||||||
|
|
||||||
def should_convert_tag(self, tag):
|
def should_convert_tag(self, tag):
|
||||||
"""Given a tag name, return whether to convert based on strip/convert options."""
|
"""Given a tag name, return whether to convert based on strip/convert options."""
|
||||||
@@ -442,7 +475,7 @@ class MarkdownConverter(object):
|
|||||||
|
|
||||||
def convert_br(self, el, text, parent_tags):
|
def convert_br(self, el, text, parent_tags):
|
||||||
if '_inline' in parent_tags:
|
if '_inline' in parent_tags:
|
||||||
return ""
|
return ' '
|
||||||
|
|
||||||
if self.options['newline_style'].lower() == BACKSLASH:
|
if self.options['newline_style'].lower() == BACKSLASH:
|
||||||
return '\\\n'
|
return '\\\n'
|
||||||
@@ -450,10 +483,24 @@ class MarkdownConverter(object):
|
|||||||
return ' \n'
|
return ' \n'
|
||||||
|
|
||||||
def convert_code(self, el, text, parent_tags):
|
def convert_code(self, el, text, parent_tags):
|
||||||
if 'pre' in parent_tags:
|
if '_noformat' in parent_tags:
|
||||||
return text
|
return text
|
||||||
converter = abstract_inline_conversion(lambda self: '`')
|
|
||||||
return converter(self, el, text, parent_tags)
|
prefix, suffix, text = chomp(text)
|
||||||
|
if not text:
|
||||||
|
return ''
|
||||||
|
|
||||||
|
# Find the maximum number of consecutive backticks in the text, then
|
||||||
|
# delimit the code span with one more backtick than that
|
||||||
|
max_backticks = max((len(match) for match in re.findall(re_backtick_runs, text)), default=0)
|
||||||
|
markup_delimiter = '`' * (max_backticks + 1)
|
||||||
|
|
||||||
|
# If the maximum number of backticks is greater than zero, add a space
|
||||||
|
# to avoid interpretation of inside backticks as literals
|
||||||
|
if max_backticks > 0:
|
||||||
|
text = " " + text + " "
|
||||||
|
|
||||||
|
return '%s%s%s%s%s' % (prefix, markup_delimiter, text, markup_delimiter, suffix)
|
||||||
|
|
||||||
convert_del = abstract_inline_conversion(lambda self: '~~')
|
convert_del = abstract_inline_conversion(lambda self: '~~')
|
||||||
|
|
||||||
@@ -489,6 +536,11 @@ class MarkdownConverter(object):
|
|||||||
|
|
||||||
return '%s\n' % text
|
return '%s\n' % text
|
||||||
|
|
||||||
|
# definition lists are formatted as follows:
|
||||||
|
# https://pandoc.org/MANUAL.html#definition-lists
|
||||||
|
# https://michelf.ca/projects/php-markdown/extra/#def-list
|
||||||
|
convert_dl = convert_div
|
||||||
|
|
||||||
def convert_dt(self, el, text, parent_tags):
|
def convert_dt(self, el, text, parent_tags):
|
||||||
# remove newlines from term text
|
# remove newlines from term text
|
||||||
text = (text or '').strip()
|
text = (text or '').strip()
|
||||||
@@ -501,14 +553,14 @@ class MarkdownConverter(object):
|
|||||||
# TODO - format consecutive <dt> elements as directly adjacent lines):
|
# TODO - format consecutive <dt> elements as directly adjacent lines):
|
||||||
# https://michelf.ca/projects/php-markdown/extra/#def-list
|
# https://michelf.ca/projects/php-markdown/extra/#def-list
|
||||||
|
|
||||||
return '\n%s\n' % text
|
return '\n\n%s\n' % text
|
||||||
|
|
||||||
def _convert_hn(self, n, el, text, parent_tags):
|
def convert_hN(self, n, el, text, parent_tags):
|
||||||
""" Method name prefixed with _ to prevent <hn> to call this """
|
# convert_hN() converts <hN> tags, where N is any integer
|
||||||
if '_inline' in parent_tags:
|
if '_inline' in parent_tags:
|
||||||
return text
|
return text
|
||||||
|
|
||||||
# prevent MemoryErrors in case of very large n
|
# Markdown does not support heading depths of n > 6
|
||||||
n = max(1, min(6, n))
|
n = max(1, min(6, n))
|
||||||
|
|
||||||
style = self.options['heading_style'].lower()
|
style = self.options['heading_style'].lower()
|
||||||
@@ -538,6 +590,24 @@ class MarkdownConverter(object):
|
|||||||
|
|
||||||
return '' % (alt, src, title_part)
|
return '' % (alt, src, title_part)
|
||||||
|
|
||||||
|
def convert_video(self, el, text, parent_tags):
|
||||||
|
if ('_inline' in parent_tags
|
||||||
|
and el.parent.name not in self.options['keep_inline_images_in']):
|
||||||
|
return text
|
||||||
|
src = el.attrs.get('src', None) or ''
|
||||||
|
if not src:
|
||||||
|
sources = el.find_all('source', attrs={'src': True})
|
||||||
|
if sources:
|
||||||
|
src = sources[0].attrs.get('src', None) or ''
|
||||||
|
poster = el.attrs.get('poster', None) or ''
|
||||||
|
if src and poster:
|
||||||
|
return '[](%s)' % (text, poster, src)
|
||||||
|
if src:
|
||||||
|
return '[%s](%s)' % (text, src)
|
||||||
|
if poster:
|
||||||
|
return '' % (text, poster)
|
||||||
|
return text
|
||||||
|
|
||||||
def convert_list(self, el, text, parent_tags):
|
def convert_list(self, el, text, parent_tags):
|
||||||
|
|
||||||
# Converting a list to inline is undefined.
|
# Converting a list to inline is undefined.
|
||||||
@@ -623,8 +693,20 @@ class MarkdownConverter(object):
|
|||||||
if self.options['code_language_callback']:
|
if self.options['code_language_callback']:
|
||||||
code_language = self.options['code_language_callback'](el) or code_language
|
code_language = self.options['code_language_callback'](el) or code_language
|
||||||
|
|
||||||
|
if self.options['strip_pre'] == STRIP:
|
||||||
|
text = strip_pre(text) # remove all leading/trailing newlines
|
||||||
|
elif self.options['strip_pre'] == STRIP_ONE:
|
||||||
|
text = strip1_pre(text) # remove one leading/trailing newline
|
||||||
|
elif self.options['strip_pre'] is None:
|
||||||
|
pass # leave leading and trailing newlines as-is
|
||||||
|
else:
|
||||||
|
raise ValueError('Invalid value for strip_pre: %s' % self.options['strip_pre'])
|
||||||
|
|
||||||
return '\n\n```%s\n%s\n```\n\n' % (code_language, text)
|
return '\n\n```%s\n%s\n```\n\n' % (code_language, text)
|
||||||
|
|
||||||
|
def convert_q(self, el, text, parent_tags):
|
||||||
|
return '"' + text + '"'
|
||||||
|
|
||||||
def convert_script(self, el, text, parent_tags):
|
def convert_script(self, el, text, parent_tags):
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
@@ -653,13 +735,13 @@ class MarkdownConverter(object):
|
|||||||
def convert_td(self, el, text, parent_tags):
|
def convert_td(self, el, text, parent_tags):
|
||||||
colspan = 1
|
colspan = 1
|
||||||
if 'colspan' in el.attrs and el['colspan'].isdigit():
|
if 'colspan' in el.attrs and el['colspan'].isdigit():
|
||||||
colspan = int(el['colspan'])
|
colspan = max(1, min(1000, int(el['colspan'])))
|
||||||
return ' ' + text.strip().replace("\n", " ") + ' |' * colspan
|
return ' ' + text.strip().replace("\n", " ") + ' |' * colspan
|
||||||
|
|
||||||
def convert_th(self, el, text, parent_tags):
|
def convert_th(self, el, text, parent_tags):
|
||||||
colspan = 1
|
colspan = 1
|
||||||
if 'colspan' in el.attrs and el['colspan'].isdigit():
|
if 'colspan' in el.attrs and el['colspan'].isdigit():
|
||||||
colspan = int(el['colspan'])
|
colspan = max(1, min(1000, int(el['colspan'])))
|
||||||
return ' ' + text.strip().replace("\n", " ") + ' |' * colspan
|
return ' ' + text.strip().replace("\n", " ") + ' |' * colspan
|
||||||
|
|
||||||
def convert_tr(self, el, text, parent_tags):
|
def convert_tr(self, el, text, parent_tags):
|
||||||
@@ -677,6 +759,12 @@ class MarkdownConverter(object):
|
|||||||
)
|
)
|
||||||
overline = ''
|
overline = ''
|
||||||
underline = ''
|
underline = ''
|
||||||
|
full_colspan = 0
|
||||||
|
for cell in cells:
|
||||||
|
if 'colspan' in cell.attrs and cell['colspan'].isdigit():
|
||||||
|
full_colspan += max(1, min(1000, int(cell['colspan'])))
|
||||||
|
else:
|
||||||
|
full_colspan += 1
|
||||||
if ((is_headrow
|
if ((is_headrow
|
||||||
or (is_head_row_missing
|
or (is_head_row_missing
|
||||||
and self.options['table_infer_header']))
|
and self.options['table_infer_header']))
|
||||||
@@ -685,12 +773,6 @@ class MarkdownConverter(object):
|
|||||||
# - is headline or
|
# - is headline or
|
||||||
# - headline is missing and header inference is enabled
|
# - headline is missing and header inference is enabled
|
||||||
# print headline underline
|
# print headline underline
|
||||||
full_colspan = 0
|
|
||||||
for cell in cells:
|
|
||||||
if 'colspan' in cell.attrs and cell['colspan'].isdigit():
|
|
||||||
full_colspan += int(cell["colspan"])
|
|
||||||
else:
|
|
||||||
full_colspan += 1
|
|
||||||
underline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
|
underline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
|
||||||
elif ((is_head_row_missing
|
elif ((is_head_row_missing
|
||||||
and not self.options['table_infer_header'])
|
and not self.options['table_infer_header'])
|
||||||
@@ -703,8 +785,8 @@ class MarkdownConverter(object):
|
|||||||
# - the parent is table or
|
# - the parent is table or
|
||||||
# - the parent is tbody at the beginning of a table.
|
# - the parent is tbody at the beginning of a table.
|
||||||
# print empty headline above this row
|
# print empty headline above this row
|
||||||
overline += '| ' + ' | '.join([''] * len(cells)) + ' |' + '\n'
|
overline += '| ' + ' | '.join([''] * full_colspan) + ' |' + '\n'
|
||||||
overline += '| ' + ' | '.join(['---'] * len(cells)) + ' |' + '\n'
|
overline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
|
||||||
return overline + '|' + text + '\n' + underline
|
return overline + '|' + text + '\n' + underline
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
77
markdownify/__init__.pyi
Normal file
77
markdownify/__init__.pyi
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
from _typeshed import Incomplete
|
||||||
|
from typing import Callable, Union
|
||||||
|
|
||||||
|
ATX: str
|
||||||
|
ATX_CLOSED: str
|
||||||
|
UNDERLINED: str
|
||||||
|
SETEXT = UNDERLINED
|
||||||
|
SPACES: str
|
||||||
|
BACKSLASH: str
|
||||||
|
ASTERISK: str
|
||||||
|
UNDERSCORE: str
|
||||||
|
LSTRIP: str
|
||||||
|
RSTRIP: str
|
||||||
|
STRIP: str
|
||||||
|
STRIP_ONE: str
|
||||||
|
|
||||||
|
|
||||||
|
def markdownify(
|
||||||
|
html: str,
|
||||||
|
autolinks: bool = ...,
|
||||||
|
bs4_options: str = ...,
|
||||||
|
bullets: str = ...,
|
||||||
|
code_language: str = ...,
|
||||||
|
code_language_callback: Union[Callable[[Incomplete], Union[str, None]], None] = ...,
|
||||||
|
convert: Union[list[str], None] = ...,
|
||||||
|
default_title: bool = ...,
|
||||||
|
escape_asterisks: bool = ...,
|
||||||
|
escape_underscores: bool = ...,
|
||||||
|
escape_misc: bool = ...,
|
||||||
|
heading_style: str = ...,
|
||||||
|
keep_inline_images_in: list[str] = ...,
|
||||||
|
newline_style: str = ...,
|
||||||
|
strip: Union[list[str], None] = ...,
|
||||||
|
strip_document: Union[str, None] = ...,
|
||||||
|
strip_pre: str = ...,
|
||||||
|
strong_em_symbol: str = ...,
|
||||||
|
sub_symbol: str = ...,
|
||||||
|
sup_symbol: str = ...,
|
||||||
|
table_infer_header: bool = ...,
|
||||||
|
wrap: bool = ...,
|
||||||
|
wrap_width: int = ...,
|
||||||
|
) -> str: ...
|
||||||
|
|
||||||
|
|
||||||
|
class MarkdownConverter:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
autolinks: bool = ...,
|
||||||
|
bs4_options: str = ...,
|
||||||
|
bullets: str = ...,
|
||||||
|
code_language: str = ...,
|
||||||
|
code_language_callback: Union[Callable[[Incomplete], Union[str, None]], None] = ...,
|
||||||
|
convert: Union[list[str], None] = ...,
|
||||||
|
default_title: bool = ...,
|
||||||
|
escape_asterisks: bool = ...,
|
||||||
|
escape_underscores: bool = ...,
|
||||||
|
escape_misc: bool = ...,
|
||||||
|
heading_style: str = ...,
|
||||||
|
keep_inline_images_in: list[str] = ...,
|
||||||
|
newline_style: str = ...,
|
||||||
|
strip: Union[list[str], None] = ...,
|
||||||
|
strip_document: Union[str, None] = ...,
|
||||||
|
strip_pre: str = ...,
|
||||||
|
strong_em_symbol: str = ...,
|
||||||
|
sub_symbol: str = ...,
|
||||||
|
sup_symbol: str = ...,
|
||||||
|
table_infer_header: bool = ...,
|
||||||
|
wrap: bool = ...,
|
||||||
|
wrap_width: int = ...,
|
||||||
|
) -> None:
|
||||||
|
...
|
||||||
|
|
||||||
|
def convert(self, html: str) -> str:
|
||||||
|
...
|
||||||
|
|
||||||
|
def convert_soup(self, soup: Incomplete) -> str:
|
||||||
|
...
|
||||||
9
markdownify/main.py
Normal file → Executable file
9
markdownify/main.py
Normal file → Executable file
@@ -55,7 +55,9 @@ def main(argv=sys.argv[1:]):
|
|||||||
parser.add_argument('--no-escape-underscores', dest='escape_underscores',
|
parser.add_argument('--no-escape-underscores', dest='escape_underscores',
|
||||||
action='store_false',
|
action='store_false',
|
||||||
help="Do not escape '_' to '\\_' in text.")
|
help="Do not escape '_' to '\\_' in text.")
|
||||||
parser.add_argument('-i', '--keep-inline-images-in', nargs='*',
|
parser.add_argument('-i', '--keep-inline-images-in',
|
||||||
|
default=[],
|
||||||
|
nargs='*',
|
||||||
help="Images are converted to their alt-text when the images are "
|
help="Images are converted to their alt-text when the images are "
|
||||||
"located inside headlines or table cells. If some inline images "
|
"located inside headlines or table cells. If some inline images "
|
||||||
"should be converted to markdown images instead, this option can "
|
"should be converted to markdown images instead, this option can "
|
||||||
@@ -68,6 +70,11 @@ def main(argv=sys.argv[1:]):
|
|||||||
parser.add_argument('-w', '--wrap', action='store_true',
|
parser.add_argument('-w', '--wrap', action='store_true',
|
||||||
help="Wrap all text paragraphs at --wrap-width characters.")
|
help="Wrap all text paragraphs at --wrap-width characters.")
|
||||||
parser.add_argument('--wrap-width', type=int, default=80)
|
parser.add_argument('--wrap-width', type=int, default=80)
|
||||||
|
parser.add_argument('--bs4-options',
|
||||||
|
default='html.parser',
|
||||||
|
help="Specifies the parser that BeautifulSoup should use to parse "
|
||||||
|
"the HTML markup. Examples include 'html5.parser', 'lxml', and "
|
||||||
|
"'html5lib'.")
|
||||||
|
|
||||||
args = parser.parse_args(argv)
|
args = parser.parse_args(argv)
|
||||||
print(markdownify(**vars(args)))
|
print(markdownify(**vars(args)))
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "markdownify"
|
name = "markdownify"
|
||||||
version = "1.0.0"
|
version = "1.2.0"
|
||||||
authors = [{name = "Matthew Tretter", email = "m@tthewwithanm.com"}]
|
authors = [{name = "Matthew Tretter", email = "m@tthewwithanm.com"}]
|
||||||
description = "Convert HTML to markdown."
|
description = "Convert HTML to markdown."
|
||||||
readme = "README.rst"
|
readme = "README.rst"
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
Test whitelisting/blacklisting of specific tags.
|
Test whitelisting/blacklisting of specific tags.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from markdownify import markdownify, LSTRIP, RSTRIP, STRIP
|
from markdownify import markdownify, LSTRIP, RSTRIP, STRIP, STRIP_ONE
|
||||||
from .utils import md
|
from .utils import md
|
||||||
|
|
||||||
|
|
||||||
@@ -32,3 +32,16 @@ def test_strip_document():
|
|||||||
assert markdownify("<p>Hello</p>", strip_document=RSTRIP) == "\n\nHello"
|
assert markdownify("<p>Hello</p>", strip_document=RSTRIP) == "\n\nHello"
|
||||||
assert markdownify("<p>Hello</p>", strip_document=STRIP) == "Hello"
|
assert markdownify("<p>Hello</p>", strip_document=STRIP) == "Hello"
|
||||||
assert markdownify("<p>Hello</p>", strip_document=None) == "\n\nHello\n\n"
|
assert markdownify("<p>Hello</p>", strip_document=None) == "\n\nHello\n\n"
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_pre():
|
||||||
|
assert markdownify("<pre> \n \n Hello \n \n </pre>") == "```\n Hello\n```"
|
||||||
|
assert markdownify("<pre> \n \n Hello \n \n </pre>", strip_pre=STRIP) == "```\n Hello\n```"
|
||||||
|
assert markdownify("<pre> \n \n Hello \n \n </pre>", strip_pre=STRIP_ONE) == "```\n \n Hello \n \n```"
|
||||||
|
assert markdownify("<pre> \n \n Hello \n \n </pre>", strip_pre=None) == "```\n \n \n Hello \n \n \n```"
|
||||||
|
|
||||||
|
|
||||||
|
def bs4_options():
|
||||||
|
assert markdownify("<p>Hello</p>", bs4_options="html.parser") == "Hello"
|
||||||
|
assert markdownify("<p>Hello</p>", bs4_options=["html.parser"]) == "Hello"
|
||||||
|
assert markdownify("<p>Hello</p>", bs4_options={"features": "html.parser"}) == "Hello"
|
||||||
|
|||||||
@@ -79,6 +79,8 @@ def test_blockquote_nested():
|
|||||||
def test_br():
|
def test_br():
|
||||||
assert md('a<br />b<br />c') == 'a \nb \nc'
|
assert md('a<br />b<br />c') == 'a \nb \nc'
|
||||||
assert md('a<br />b<br />c', newline_style=BACKSLASH) == 'a\\\nb\\\nc'
|
assert md('a<br />b<br />c', newline_style=BACKSLASH) == 'a\\\nb\\\nc'
|
||||||
|
assert md('<h1>foo<br />bar</h1>', heading_style=ATX) == '\n\n# foo bar\n\n'
|
||||||
|
assert md('<td>foo<br />bar</td>', heading_style=ATX) == ' foo bar |'
|
||||||
|
|
||||||
|
|
||||||
def test_code():
|
def test_code():
|
||||||
@@ -99,16 +101,19 @@ def test_code():
|
|||||||
assert md('<code>foo<s> bar </s>baz</code>') == '`foo bar baz`'
|
assert md('<code>foo<s> bar </s>baz</code>') == '`foo bar baz`'
|
||||||
assert md('<code>foo<sup>bar</sup>baz</code>', sup_symbol='^') == '`foobarbaz`'
|
assert md('<code>foo<sup>bar</sup>baz</code>', sup_symbol='^') == '`foobarbaz`'
|
||||||
assert md('<code>foo<sub>bar</sub>baz</code>', sub_symbol='^') == '`foobarbaz`'
|
assert md('<code>foo<sub>bar</sub>baz</code>', sub_symbol='^') == '`foobarbaz`'
|
||||||
|
assert md('foo<code>`bar`</code>baz') == 'foo`` `bar` ``baz'
|
||||||
|
assert md('foo<code>``bar``</code>baz') == 'foo``` ``bar`` ```baz'
|
||||||
|
assert md('foo<code> `bar` </code>baz') == 'foo `` `bar` `` baz'
|
||||||
|
|
||||||
|
|
||||||
def test_dl():
|
def test_dl():
|
||||||
assert md('<dl><dt>term</dt><dd>definition</dd></dl>') == '\nterm\n: definition\n'
|
assert md('<dl><dt>term</dt><dd>definition</dd></dl>') == '\n\nterm\n: definition\n\n'
|
||||||
assert md('<dl><dt><p>te</p><p>rm</p></dt><dd>definition</dd></dl>') == '\nte rm\n: definition\n'
|
assert md('<dl><dt><p>te</p><p>rm</p></dt><dd>definition</dd></dl>') == '\n\nte rm\n: definition\n\n'
|
||||||
assert md('<dl><dt>term</dt><dd><p>definition-p1</p><p>definition-p2</p></dd></dl>') == '\nterm\n: definition-p1\n\n definition-p2\n'
|
assert md('<dl><dt>term</dt><dd><p>definition-p1</p><p>definition-p2</p></dd></dl>') == '\n\nterm\n: definition-p1\n\n definition-p2\n\n'
|
||||||
assert md('<dl><dt>term</dt><dd><p>definition 1</p></dd><dd><p>definition 2</p></dd></dl>') == '\nterm\n: definition 1\n: definition 2\n'
|
assert md('<dl><dt>term</dt><dd><p>definition 1</p></dd><dd><p>definition 2</p></dd></dl>') == '\n\nterm\n: definition 1\n: definition 2\n\n'
|
||||||
assert md('<dl><dt>term 1</dt><dd>definition 1</dd><dt>term 2</dt><dd>definition 2</dd></dl>') == '\nterm 1\n: definition 1\nterm 2\n: definition 2\n'
|
assert md('<dl><dt>term 1</dt><dd>definition 1</dd><dt>term 2</dt><dd>definition 2</dd></dl>') == '\n\nterm 1\n: definition 1\n\nterm 2\n: definition 2\n\n'
|
||||||
assert md('<dl><dt>term</dt><dd><blockquote><p>line 1</p><p>line 2</p></blockquote></dd></dl>') == '\nterm\n: > line 1\n >\n > line 2\n'
|
assert md('<dl><dt>term</dt><dd><blockquote><p>line 1</p><p>line 2</p></blockquote></dd></dl>') == '\n\nterm\n: > line 1\n >\n > line 2\n\n'
|
||||||
assert md('<dl><dt>term</dt><dd><ol><li><p>1</p><ul><li>2a</li><li>2b</li></ul></li><li><p>3</p></li></ol></dd></dl>') == '\nterm\n: 1. 1\n\n * 2a\n * 2b\n 2. 3\n'
|
assert md('<dl><dt>term</dt><dd><ol><li><p>1</p><ul><li>2a</li><li>2b</li></ul></li><li><p>3</p></li></ol></dd></dl>') == '\n\nterm\n: 1. 1\n\n * 2a\n * 2b\n 2. 3\n\n'
|
||||||
|
|
||||||
|
|
||||||
def test_del():
|
def test_del():
|
||||||
@@ -162,7 +167,8 @@ def test_hn():
|
|||||||
assert md('<h5>Hello</h5>') == '\n\n##### Hello\n\n'
|
assert md('<h5>Hello</h5>') == '\n\n##### Hello\n\n'
|
||||||
assert md('<h6>Hello</h6>') == '\n\n###### Hello\n\n'
|
assert md('<h6>Hello</h6>') == '\n\n###### Hello\n\n'
|
||||||
assert md('<h10>Hello</h10>') == md('<h6>Hello</h6>')
|
assert md('<h10>Hello</h10>') == md('<h6>Hello</h6>')
|
||||||
assert md('<hn>Hello</hn>') == md('Hello')
|
assert md('<h0>Hello</h0>') == md('<h1>Hello</h1>')
|
||||||
|
assert md('<hx>Hello</hx>') == md('Hello')
|
||||||
|
|
||||||
|
|
||||||
def test_hn_chained():
|
def test_hn_chained():
|
||||||
@@ -243,6 +249,14 @@ def test_img():
|
|||||||
assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == ''
|
assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == ''
|
||||||
|
|
||||||
|
|
||||||
|
def test_video():
|
||||||
|
assert md('<video src="/path/to/video.mp4" poster="/path/to/img.jpg">text</video>') == '[](/path/to/video.mp4)'
|
||||||
|
assert md('<video src="/path/to/video.mp4">text</video>') == '[text](/path/to/video.mp4)'
|
||||||
|
assert md('<video><source src="/path/to/video.mp4"/>text</video>') == '[text](/path/to/video.mp4)'
|
||||||
|
assert md('<video poster="/path/to/img.jpg">text</video>') == ''
|
||||||
|
assert md('<video>text</video>') == 'text'
|
||||||
|
|
||||||
|
|
||||||
def test_kbd():
|
def test_kbd():
|
||||||
inline_tests('kbd', '`')
|
inline_tests('kbd', '`')
|
||||||
|
|
||||||
@@ -294,6 +308,11 @@ def test_pre():
|
|||||||
assert md("<p>foo</p>\n<pre>bar</pre>\n</p>baz</p>", sub_symbol="^") == "\n\nfoo\n\n```\nbar\n```\n\nbaz"
|
assert md("<p>foo</p>\n<pre>bar</pre>\n</p>baz</p>", sub_symbol="^") == "\n\nfoo\n\n```\nbar\n```\n\nbaz"
|
||||||
|
|
||||||
|
|
||||||
|
def test_q():
|
||||||
|
assert md('foo <q>quote</q> bar') == 'foo "quote" bar'
|
||||||
|
assert md('foo <q cite="https://example.com">quote</q> bar') == 'foo "quote" bar'
|
||||||
|
|
||||||
|
|
||||||
def test_script():
|
def test_script():
|
||||||
assert md('foo <script>var foo=42;</script> bar') == 'foo bar'
|
assert md('foo <script>var foo=42;</script> bar') == 'foo bar'
|
||||||
|
|
||||||
@@ -354,4 +373,4 @@ def test_spaces():
|
|||||||
assert md('test <blockquote> text </blockquote> after') == 'test\n> text\n\nafter'
|
assert md('test <blockquote> text </blockquote> after') == 'test\n> text\n\nafter'
|
||||||
assert md(' <ol> <li> x </li> <li> y </li> </ol> ') == '\n\n1. x\n2. y\n'
|
assert md(' <ol> <li> x </li> <li> y </li> </ol> ') == '\n\n1. x\n2. y\n'
|
||||||
assert md(' <ul> <li> x </li> <li> y </li> </ol> ') == '\n\n* x\n* y\n'
|
assert md(' <ul> <li> x </li> <li> y </li> </ol> ') == '\n\n* x\n* y\n'
|
||||||
assert md('test <pre> foo </pre> bar') == 'test\n\n```\n foo \n```\n\nbar'
|
assert md('test <pre> foo </pre> bar') == 'test\n\n```\n foo\n```\n\nbar'
|
||||||
|
|||||||
@@ -12,7 +12,15 @@ class UnitTestConverter(MarkdownConverter):
|
|||||||
|
|
||||||
def convert_custom_tag(self, el, text, parent_tags):
|
def convert_custom_tag(self, el, text, parent_tags):
|
||||||
"""Ensure conversion function is found for tags with special characters in name"""
|
"""Ensure conversion function is found for tags with special characters in name"""
|
||||||
return "FUNCTION USED: %s" % text
|
return "convert_custom_tag(): %s" % text
|
||||||
|
|
||||||
|
def convert_h1(self, el, text, parent_tags):
|
||||||
|
"""Ensure explicit heading conversion function is used"""
|
||||||
|
return "convert_h1: %s" % (text)
|
||||||
|
|
||||||
|
def convert_hN(self, n, el, text, parent_tags):
|
||||||
|
"""Ensure general heading conversion function is used"""
|
||||||
|
return "convert_hN(%d): %s" % (n, text)
|
||||||
|
|
||||||
|
|
||||||
def test_custom_conversion_functions():
|
def test_custom_conversion_functions():
|
||||||
@@ -23,7 +31,11 @@ def test_custom_conversion_functions():
|
|||||||
assert md('<img src="/path/to/img.jpg" alt="Alt text" title="Optional title" />text') == '\n\ntext'
|
assert md('<img src="/path/to/img.jpg" alt="Alt text" title="Optional title" />text') == '\n\ntext'
|
||||||
assert md('<img src="/path/to/img.jpg" alt="Alt text" />text') == '\n\ntext'
|
assert md('<img src="/path/to/img.jpg" alt="Alt text" />text') == '\n\ntext'
|
||||||
|
|
||||||
assert md("<custom-tag>text</custom-tag>") == "FUNCTION USED: text"
|
assert md("<custom-tag>text</custom-tag>") == "convert_custom_tag(): text"
|
||||||
|
|
||||||
|
assert md("<h1>text</h1>") == "convert_h1: text"
|
||||||
|
|
||||||
|
assert md("<h3>text</h3>") == "convert_hN(3): text"
|
||||||
|
|
||||||
|
|
||||||
def test_soup():
|
def test_soup():
|
||||||
|
|||||||
@@ -267,6 +267,23 @@ table_with_undefined_colspan = """<table>
|
|||||||
</tr>
|
</tr>
|
||||||
</table>"""
|
</table>"""
|
||||||
|
|
||||||
|
table_with_colspan_missing_head = """<table>
|
||||||
|
<tr>
|
||||||
|
<td colspan="2">Name</td>
|
||||||
|
<td>Age</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Jill</td>
|
||||||
|
<td>Smith</td>
|
||||||
|
<td>50</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Eve</td>
|
||||||
|
<td>Jackson</td>
|
||||||
|
<td>94</td>
|
||||||
|
</tr>
|
||||||
|
</table>"""
|
||||||
|
|
||||||
|
|
||||||
def test_table():
|
def test_table():
|
||||||
assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
||||||
@@ -283,6 +300,7 @@ def test_table():
|
|||||||
assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n'
|
assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n'
|
||||||
assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
||||||
assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
|
assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
|
||||||
|
assert md(table_with_colspan_missing_head) == '\n\n| | | |\n| --- | --- | --- |\n| Name | | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
||||||
|
|
||||||
|
|
||||||
def test_table_infer_header():
|
def test_table_infer_header():
|
||||||
@@ -300,3 +318,4 @@ def test_table_infer_header():
|
|||||||
assert md(table_with_caption, table_infer_header=True) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n'
|
assert md(table_with_caption, table_infer_header=True) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n'
|
||||||
assert md(table_with_colspan, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
assert md(table_with_colspan, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
||||||
assert md(table_with_undefined_colspan, table_infer_header=True) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
|
assert md(table_with_undefined_colspan, table_infer_header=True) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
|
||||||
|
assert md(table_with_colspan_missing_head, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
||||||
|
|||||||
70
tests/types.py
Normal file
70
tests/types.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
from markdownify import markdownify, ASTERISK, BACKSLASH, LSTRIP, RSTRIP, SPACES, STRIP, UNDERLINED, UNDERSCORE, MarkdownConverter
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
markdownify("<p>Hello</p>") == "Hello" # test default of STRIP
|
||||||
|
markdownify("<p>Hello</p>", strip_document=LSTRIP) == "Hello\n\n"
|
||||||
|
markdownify("<p>Hello</p>", strip_document=RSTRIP) == "\n\nHello"
|
||||||
|
markdownify("<p>Hello</p>", strip_document=STRIP) == "Hello"
|
||||||
|
markdownify("<p>Hello</p>", strip_document=None) == "\n\nHello\n\n"
|
||||||
|
|
||||||
|
# default options
|
||||||
|
MarkdownConverter(
|
||||||
|
autolinks=True,
|
||||||
|
bs4_options='html.parser',
|
||||||
|
bullets='*+-',
|
||||||
|
code_language='',
|
||||||
|
code_language_callback=None,
|
||||||
|
convert=None,
|
||||||
|
default_title=False,
|
||||||
|
escape_asterisks=True,
|
||||||
|
escape_underscores=True,
|
||||||
|
escape_misc=False,
|
||||||
|
heading_style=UNDERLINED,
|
||||||
|
keep_inline_images_in=[],
|
||||||
|
newline_style=SPACES,
|
||||||
|
strip=None,
|
||||||
|
strip_document=STRIP,
|
||||||
|
strip_pre=STRIP,
|
||||||
|
strong_em_symbol=ASTERISK,
|
||||||
|
sub_symbol='',
|
||||||
|
sup_symbol='',
|
||||||
|
table_infer_header=False,
|
||||||
|
wrap=False,
|
||||||
|
wrap_width=80,
|
||||||
|
).convert("")
|
||||||
|
|
||||||
|
# custom options
|
||||||
|
MarkdownConverter(
|
||||||
|
strip_document=None,
|
||||||
|
bullets="-",
|
||||||
|
escape_asterisks=True,
|
||||||
|
escape_underscores=True,
|
||||||
|
escape_misc=True,
|
||||||
|
autolinks=True,
|
||||||
|
default_title=True,
|
||||||
|
newline_style=BACKSLASH,
|
||||||
|
sup_symbol='^',
|
||||||
|
sub_symbol='^',
|
||||||
|
keep_inline_images_in=['h3'],
|
||||||
|
wrap=True,
|
||||||
|
wrap_width=80,
|
||||||
|
strong_em_symbol=UNDERSCORE,
|
||||||
|
code_language='python',
|
||||||
|
code_language_callback=None
|
||||||
|
).convert("")
|
||||||
|
|
||||||
|
html = '<b>test</b>'
|
||||||
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
|
MarkdownConverter().convert_soup(soup) == '**test**'
|
||||||
|
|
||||||
|
|
||||||
|
def callback(el: BeautifulSoup) -> Union[str, None]:
|
||||||
|
return el['class'][0] if el.has_attr('class') else None
|
||||||
|
|
||||||
|
|
||||||
|
MarkdownConverter(code_language_callback=callback).convert("")
|
||||||
|
MarkdownConverter(code_language_callback=lambda el: None).convert("")
|
||||||
|
|
||||||
|
markdownify('<pre class="python">test\n foo\nbar</pre>', code_language_callback=callback)
|
||||||
|
markdownify('<pre class="python">test\n foo\nbar</pre>', code_language_callback=lambda el: None)
|
||||||
Reference in New Issue
Block a user