Compare commits

..

2 Commits

Author SHA1 Message Date
AlexVonB
e249e58818 fix github action to work with tox 2022-08-28 21:39:26 +02:00
AlexVonB
2840653e29 switching to tox 2022-08-28 21:38:06 +02:00
14 changed files with 87 additions and 337 deletions

View File

@@ -23,10 +23,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install --upgrade setuptools setuptools_scm wheel build tox
pip install tox
- name: Lint and test
run: |
tox
- name: Build
run: |
python -m build -nwsx .

View File

@@ -21,11 +21,11 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install --upgrade setuptools setuptools_scm wheel build twine
pip install setuptools wheel twine
- name: Build and publish
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
python -m build -nwsx .
python setup.py sdist bdist_wheel
twine upload dist/*

View File

@@ -1,2 +1 @@
include README.rst
prune tests

View File

@@ -1,8 +1,8 @@
|build| |version| |license| |downloads|
.. |build| image:: https://img.shields.io/github/actions/workflow/status/matthewwithanm/python-markdownify/python-app.yml?branch=develop
.. |build| image:: https://img.shields.io/github/workflow/status/matthewwithanm/python-markdownify/Python%20application/develop
:alt: GitHub Workflow Status
:target: https://github.com/matthewwithanm/python-markdownify/actions/workflows/python-app.yml?query=workflow%3A%22Python+application%22
:target: https://github.com/matthewwithanm/python-markdownify/actions?query=workflow%3A%22Python+application%22
.. |version| image:: https://img.shields.io/pypi/v/markdownify
:alt: Pypi version
@@ -87,11 +87,7 @@ strong_em_symbol
sub_symbol, sup_symbol
Define the chars that surround ``<sub>`` and ``<sup>`` text. Defaults to an
empty string, because this is non-standard behavior. Could be something like
``~`` and ``^`` to result in ``~sub~`` and ``^sup^``. If the value starts
with ``<`` and ends with ``>``, it is treated as an HTML tag and a ``/`` is
inserted after the ``<`` in the string used after the text; this allows
specifying ``<sub>`` to use raw HTML in the output for subscripts, for
example.
``~`` and ``^`` to result in ``~sub~`` and ``^sup^``.
newline_style
Defines the style of marking linebreaks (``<br>``) in markdown. The default
@@ -127,11 +123,6 @@ escape_underscores
If set to ``False``, do not escape ``_`` to ``\_`` in text.
Defaults to ``True``.
escape_misc
If set to ``False``, do not escape miscellaneous punctuation characters
that sometimes have Markdown significance in text.
Defaults to ``True``.
keep_inline_images_in
Images are converted to their alt-text when the images are located inside
headlines or table cells. If some inline images should be converted to
@@ -165,12 +156,7 @@ Creating Custom Converters
If you have a special usecase that calls for a special conversion, you can
always inherit from ``MarkdownConverter`` and override the method you want to
change.
The function that handles a HTML tag named ``abc`` is called
``convert_abc(self, el, text, convert_as_inline)`` and returns a string
containing the converted HTML tag.
The ``MarkdownConverter`` object will handle the conversion based on the
function names:
change:
.. code:: python
@@ -187,24 +173,9 @@ function names:
def md(html, **options):
return ImageBlockConverter(**options).convert(html)
.. code:: python
from markdownify import MarkdownConverter
class IgnoreParagraphsConverter(MarkdownConverter):
"""
Create a custom MarkdownConverter that ignores paragraphs
"""
def convert_p(self, el, text, convert_as_inline):
return ''
# Create shorthand method for conversion
def md(html, **options):
return IgnoreParagraphsConverter(**options).convert(html)
Command Line Interface
======================
=====================
Use ``markdownify example.html > example.md`` or pipe input from stdin
(``cat example.html | markdownify > example.md``).

View File

@@ -43,22 +43,15 @@ def abstract_inline_conversion(markup_fn):
"""
This abstracts all simple inline tags like b, em, del, ...
Returns a function that wraps the chomped text in a pair of the string
that is returned by markup_fn, with '/' inserted in the string used after
the text if it looks like an HTML tag. markup_fn is necessary to allow for
that is returned by markup_fn. markup_fn is necessary to allow for
references to self.strong_em_symbol etc.
"""
def implementation(self, el, text, convert_as_inline):
markup_prefix = markup_fn(self)
if markup_prefix.startswith('<') and markup_prefix.endswith('>'):
markup_suffix = '</' + markup_prefix[1:]
else:
markup_suffix = markup_prefix
if el.find_parent(['pre', 'code', 'kbd', 'samp']):
return text
markup = markup_fn(self)
prefix, suffix, text = chomp(text)
if not text:
return ''
return '%s%s%s%s%s' % (prefix, markup_prefix, text, markup_suffix, suffix)
return '%s%s%s%s%s' % (prefix, markup, text, markup, suffix)
return implementation
@@ -76,7 +69,6 @@ class MarkdownConverter(object):
default_title = False
escape_asterisks = True
escape_underscores = True
escape_misc = True
heading_style = UNDERLINED
keep_inline_images_in = []
newline_style = SPACES
@@ -160,12 +152,13 @@ class MarkdownConverter(object):
def process_text(self, el):
text = six.text_type(el) or ''
# normalize whitespace if we're not inside a preformatted element
if not el.find_parent('pre'):
# dont remove any whitespace when handling pre or code in pre
if not (el.parent.name == 'pre'
or (el.parent.name == 'code'
and el.parent.parent.name == 'pre')):
text = whitespace_re.sub(' ', text)
# escape special characters if we're not inside a preformatted or code element
if not el.find_parent(['pre', 'code', 'kbd', 'samp']):
if el.parent.name != 'code' and el.parent.name != 'pre':
text = self.escape(text)
# remove trailing whitespaces if any of the following condition is true:
@@ -207,9 +200,6 @@ class MarkdownConverter(object):
def escape(self, text):
if not text:
return ''
if self.options['escape_misc']:
text = re.sub(r'([\\&<`[>~#=+|-])', r'\\\1', text)
text = re.sub(r'([0-9])([.)])', r'\1\\\2', text)
if self.options['escape_asterisks']:
text = text.replace('*', r'\*')
if self.options['escape_underscores']:
@@ -248,7 +238,7 @@ class MarkdownConverter(object):
if convert_as_inline:
return text
return '\n' + (line_beginning_re.sub('> ', text.strip()) + '\n\n') if text else ''
return '\n' + (line_beginning_re.sub('> ', text) + '\n\n') if text else ''
def convert_br(self, el, text, convert_as_inline):
if convert_as_inline:
@@ -276,7 +266,7 @@ class MarkdownConverter(object):
return text
style = self.options['heading_style'].lower()
text = text.strip()
text = text.rstrip()
if style == UNDERLINED and n <= 2:
line = '=' if n == 1 else '-'
return self.underline(text, line)
@@ -326,7 +316,7 @@ class MarkdownConverter(object):
def convert_li(self, el, text, convert_as_inline):
parent = el.parent
if parent is not None and parent.name == 'ol':
if parent.get("start") and str(parent.get("start")).isnumeric():
if parent.get("start"):
start = int(parent.get("start"))
else:
start = 1
@@ -361,12 +351,6 @@ class MarkdownConverter(object):
return '\n```%s\n%s\n```\n' % (code_language, text)
def convert_script(self, el, text, convert_as_inline):
return ''
def convert_style(self, el, text, convert_as_inline):
return ''
convert_s = convert_del
convert_strong = convert_b
@@ -380,42 +364,20 @@ class MarkdownConverter(object):
def convert_table(self, el, text, convert_as_inline):
return '\n\n' + text + '\n'
def convert_caption(self, el, text, convert_as_inline):
return text + '\n'
def convert_figcaption(self, el, text, convert_as_inline):
return '\n\n' + text + '\n\n'
def convert_td(self, el, text, convert_as_inline):
colspan = 1
if 'colspan' in el.attrs and el['colspan'].isdigit():
colspan = int(el['colspan'])
return ' ' + text.strip().replace("\n", " ") + ' |' * colspan
return ' ' + text + ' |'
def convert_th(self, el, text, convert_as_inline):
colspan = 1
if 'colspan' in el.attrs and el['colspan'].isdigit():
colspan = int(el['colspan'])
return ' ' + text.strip().replace("\n", " ") + ' |' * colspan
return ' ' + text + ' |'
def convert_tr(self, el, text, convert_as_inline):
cells = el.find_all(['td', 'th'])
is_headrow = (
all([cell.name == 'th' for cell in cells])
or (not el.previous_sibling and not el.parent.name == 'tbody')
or (not el.previous_sibling and el.parent.name == 'tbody' and len(el.parent.parent.find_all(['thead'])) < 1)
)
is_headrow = all([cell.name == 'th' for cell in cells])
overline = ''
underline = ''
if is_headrow and not el.previous_sibling:
# first row and is headline: print headline underline
full_colspan = 0
for cell in cells:
if 'colspan' in cell.attrs and cell['colspan'].isdigit():
full_colspan += int(cell["colspan"])
else:
full_colspan += 1
underline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
underline += '| ' + ' | '.join(['---'] * len(cells)) + ' |' + '\n'
elif (not el.previous_sibling
and (el.parent.name == 'table'
or (el.parent.name == 'tbody'

View File

@@ -3,8 +3,7 @@
import argparse
import sys
from markdownify import markdownify, ATX, ATX_CLOSED, UNDERLINED, \
SPACES, BACKSLASH, ASTERISK, UNDERSCORE
from markdownify import markdownify
def main(argv=sys.argv[1:]):
@@ -29,23 +28,16 @@ def main(argv=sys.argv[1:]):
parser.add_argument('--default-title', action='store_false',
help="A boolean to enable setting the title of a link to its "
"href, if no title is given.")
parser.add_argument('--heading-style', default=UNDERLINED,
choices=(ATX, ATX_CLOSED, UNDERLINED),
parser.add_argument('--heading-style',
choices=('ATX', 'ATX_CLOSED', 'SETEXT', 'UNDERLINED'),
help="Defines how headings should be converted.")
parser.add_argument('-b', '--bullets', default='*+-',
help="A string of bullet styles to use; the bullet will "
"alternate based on nesting level.")
parser.add_argument('--strong-em-symbol', default=ASTERISK,
choices=(ASTERISK, UNDERSCORE),
help="Use * or _ to convert strong and italics text"),
parser.add_argument('--sub-symbol', default='',
help="Define the chars that surround '<sub>'.")
parser.add_argument('--sup-symbol', default='',
help="Define the chars that surround '<sup>'.")
parser.add_argument('--newline-style', default=SPACES,
choices=(SPACES, BACKSLASH),
help="Defines the style of <br> conversions: two spaces "
"or backslash at the and of the line thet should break.")
parser.add_argument('--code-language', default='',
help="Defines the language that should be assumed for all "
"'<pre>' sections.")

View File

@@ -1,45 +0,0 @@
[build-system]
requires = ["setuptools>=61.2", "setuptools_scm[toml]>=3.4.3"]
build-backend = "setuptools.build_meta"
[project]
name = "markdownify"
version = "0.13.1"
authors = [{name = "Matthew Tretter", email = "m@tthewwithanm.com"}]
description = "Convert HTML to markdown."
readme = "README.rst"
classifiers = [
"Environment :: Web Environment",
"Framework :: Django",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 2.5",
"Programming Language :: Python :: 2.6",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Topic :: Utilities",
]
dependencies = [
"beautifulsoup4>=4.9,<5",
"six>=1.15,<2"
]
[project.urls]
Homepage = "http://github.com/matthewwithanm/python-markdownify"
Download = "http://github.com/matthewwithanm/python-markdownify/tarball/master"
[project.scripts]
markdownify = "markdownify.main:main"
[tool.setuptools]
zip-safe = false
include-package-data = true
[tool.setuptools.packages.find]
include = ["markdownify", "markdownify.*"]
namespaces = false
[tool.setuptools_scm]

52
setup.py Normal file
View File

@@ -0,0 +1,52 @@
#/usr/bin/env python
import codecs
import os
from setuptools import setup, find_packages
read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read()
pkgmeta = {
'__title__': 'markdownify',
'__author__': 'Matthew Tretter',
'__version__': '0.11.2',
}
read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read()
setup(
name='markdownify',
description='Convert HTML to markdown.',
long_description=read(os.path.join(os.path.dirname(__file__), 'README.rst')),
version=pkgmeta['__version__'],
author=pkgmeta['__author__'],
author_email='m@tthewwithanm.com',
url='http://github.com/matthewwithanm/python-markdownify',
download_url='http://github.com/matthewwithanm/python-markdownify/tarball/master',
packages=find_packages(),
zip_safe=False,
include_package_data=True,
install_requires=[
'beautifulsoup4>=4.9,<5',
'six>=1.15,<2',
],
classifiers=[
'Environment :: Web Environment',
'Framework :: Django',
'Intended Audience :: Developers',
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
'Programming Language :: Python :: 2.5',
'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Topic :: Utilities'
],
entry_points={
'console_scripts': [
'markdownify = markdownify.main:main'
]
}
)

View File

@@ -1,10 +0,0 @@
{ pkgs ? import <nixpkgs> {} }:
pkgs.mkShell {
name = "python-shell";
buildInputs = with pkgs; [
python38
python38Packages.tox
python38Packages.setuptools
python38Packages.virtualenv
];
}

View File

@@ -52,12 +52,6 @@ def test_b_spaces():
def test_blockquote():
assert md('<blockquote>Hello</blockquote>') == '\n> Hello\n\n'
assert md('<blockquote>\nHello\n</blockquote>') == '\n> Hello\n\n'
def test_blockquote_with_nested_paragraph():
assert md('<blockquote><p>Hello</p></blockquote>') == '\n> Hello\n\n'
assert md('<blockquote><p>Hello</p><p>Hello again</p></blockquote>') == '\n> Hello\n> \n> Hello again\n\n'
def test_blockquote_with_paragraph():
@@ -66,7 +60,7 @@ def test_blockquote_with_paragraph():
def test_blockquote_nested():
text = md('<blockquote>And she was like <blockquote>Hello</blockquote></blockquote>')
assert text == '\n> And she was like \n> > Hello\n\n'
assert text == '\n> And she was like \n> > Hello\n> \n> \n\n'
def test_br():
@@ -74,29 +68,9 @@ def test_br():
assert md('a<br />b<br />c', newline_style=BACKSLASH) == 'a\\\nb\\\nc'
def test_caption():
assert md('TEXT<figure><figcaption>Caption</figcaption><span>SPAN</span></figure>') == 'TEXT\n\nCaption\n\nSPAN'
assert md('<figure><span>SPAN</span><figcaption>Caption</figcaption></figure>TEXT') == 'SPAN\n\nCaption\n\nTEXT'
def test_code():
inline_tests('code', '`')
assert md('<code>*this_should_not_escape*</code>') == '`*this_should_not_escape*`'
assert md('<kbd>*this_should_not_escape*</kbd>') == '`*this_should_not_escape*`'
assert md('<samp>*this_should_not_escape*</samp>') == '`*this_should_not_escape*`'
assert md('<code><span>*this_should_not_escape*</span></code>') == '`*this_should_not_escape*`'
assert md('<code>this should\t\tnormalize</code>') == '`this should normalize`'
assert md('<code><span>this should\t\tnormalize</span></code>') == '`this should normalize`'
assert md('<code>foo<b>bar</b>baz</code>') == '`foobarbaz`'
assert md('<kbd>foo<i>bar</i>baz</kbd>') == '`foobarbaz`'
assert md('<samp>foo<del> bar </del>baz</samp>') == '`foo bar baz`'
assert md('<samp>foo <del>bar</del> baz</samp>') == '`foo bar baz`'
assert md('<code>foo<em> bar </em>baz</code>') == '`foo bar baz`'
assert md('<code>foo<code> bar </code>baz</code>') == '`foo bar baz`'
assert md('<code>foo<strong> bar </strong>baz</code>') == '`foo bar baz`'
assert md('<code>foo<s> bar </s>baz</code>') == '`foo bar baz`'
assert md('<code>foo<sup>bar</sup>baz</code>', sup_symbol='^') == '`foobarbaz`'
assert md('<code>foo<sub>bar</sub>baz</code>', sub_symbol='^') == '`foobarbaz`'
assert md('<code>this_should_not_escape</code>') == '`this_should_not_escape`'
def test_del():
@@ -111,14 +85,6 @@ def test_em():
inline_tests('em', '*')
def test_header_with_space():
assert md('<h3>\n\nHello</h3>') == '### Hello\n\n'
assert md('<h4>\n\nHello</h4>') == '#### Hello\n\n'
assert md('<h5>\n\nHello</h5>') == '##### Hello\n\n'
assert md('<h5>\n\nHello\n\n</h5>') == '##### Hello\n\n'
assert md('<h5>\n\nHello \n\n</h5>') == '##### Hello\n\n'
def test_h1():
assert md('<h1>Hello</h1>') == 'Hello\n=====\n\n'
@@ -221,29 +187,7 @@ def test_p():
def test_pre():
assert md('<pre>test\n foo\nbar</pre>') == '\n```\ntest\n foo\nbar\n```\n'
assert md('<pre><code>test\n foo\nbar</code></pre>') == '\n```\ntest\n foo\nbar\n```\n'
assert md('<pre>*this_should_not_escape*</pre>') == '\n```\n*this_should_not_escape*\n```\n'
assert md('<pre><span>*this_should_not_escape*</span></pre>') == '\n```\n*this_should_not_escape*\n```\n'
assert md('<pre>\t\tthis should\t\tnot normalize</pre>') == '\n```\n\t\tthis should\t\tnot normalize\n```\n'
assert md('<pre><span>\t\tthis should\t\tnot normalize</span></pre>') == '\n```\n\t\tthis should\t\tnot normalize\n```\n'
assert md('<pre>foo<b>\nbar\n</b>baz</pre>') == '\n```\nfoo\nbar\nbaz\n```\n'
assert md('<pre>foo<i>\nbar\n</i>baz</pre>') == '\n```\nfoo\nbar\nbaz\n```\n'
assert md('<pre>foo\n<i>bar</i>\nbaz</pre>') == '\n```\nfoo\nbar\nbaz\n```\n'
assert md('<pre>foo<i>\n</i>baz</pre>') == '\n```\nfoo\nbaz\n```\n'
assert md('<pre>foo<del>\nbar\n</del>baz</pre>') == '\n```\nfoo\nbar\nbaz\n```\n'
assert md('<pre>foo<em>\nbar\n</em>baz</pre>') == '\n```\nfoo\nbar\nbaz\n```\n'
assert md('<pre>foo<code>\nbar\n</code>baz</pre>') == '\n```\nfoo\nbar\nbaz\n```\n'
assert md('<pre>foo<strong>\nbar\n</strong>baz</pre>') == '\n```\nfoo\nbar\nbaz\n```\n'
assert md('<pre>foo<s>\nbar\n</s>baz</pre>') == '\n```\nfoo\nbar\nbaz\n```\n'
assert md('<pre>foo<sup>\nbar\n</sup>baz</pre>', sup_symbol='^') == '\n```\nfoo\nbar\nbaz\n```\n'
assert md('<pre>foo<sub>\nbar\n</sub>baz</pre>', sub_symbol='^') == '\n```\nfoo\nbar\nbaz\n```\n'
def test_script():
assert md('foo <script>var foo=42;</script> bar') == 'foo bar'
def test_style():
assert md('foo <style>h1 { font-size: larger }</style> bar') == 'foo bar'
assert md('<pre>this_should_not_escape</pre>') == '\n```\nthis_should_not_escape\n```\n'
def test_s():
@@ -268,13 +212,11 @@ def test_strong_em_symbol():
def test_sub():
assert md('<sub>foo</sub>') == 'foo'
assert md('<sub>foo</sub>', sub_symbol='~') == '~foo~'
assert md('<sub>foo</sub>', sub_symbol='<sub>') == '<sub>foo</sub>'
def test_sup():
assert md('<sup>foo</sup>') == 'foo'
assert md('<sup>foo</sup>', sup_symbol='^') == '^foo^'
assert md('<sup>foo</sup>', sup_symbol='<sup>') == '<sup>foo</sup>'
def test_lang():

View File

@@ -12,7 +12,7 @@ def test_underscore():
def test_xml_entities():
assert md('&amp;') == r'\&'
assert md('&amp;') == '&'
def test_named_entities():
@@ -25,23 +25,4 @@ def test_hexadecimal_entities():
def test_single_escaping_entities():
assert md('&amp;amp;') == r'\&amp;'
def text_misc():
assert md('\\*') == r'\\\*'
assert md('<foo>') == r'\<foo\>'
assert md('# foo') == r'\# foo'
assert md('> foo') == r'\> foo'
assert md('~~foo~~') == r'\~\~foo\~\~'
assert md('foo\n===\n') == 'foo\n\\=\\=\\=\n'
assert md('---\n') == '\\-\\-\\-\n'
assert md('+ x\n+ y\n') == '\\+ x\n\\+ y\n'
assert md('`x`') == r'\`x\`'
assert md('[text](link)') == r'\[text](link)'
assert md('1. x') == r'1\. x'
assert md('not a number. x') == r'not a number. x'
assert md('1) x') == r'1\) x'
assert md('not a number) x') == r'not a number) x'
assert md('|not table|') == r'\|not table\|'
assert md(r'\ <foo> &amp;amp; | ` `', escape_misc=False) == r'\ <foo> &amp; | ` `'
assert md('&amp;amp;') == '&amp;'

View File

@@ -43,9 +43,6 @@ nested_ols = """
def test_ol():
assert md('<ol><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
assert md('<ol start="3"><li>a</li><li>b</li></ol>') == '3. a\n4. b\n'
assert md('<ol start="-1"><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
assert md('<ol start="foo"><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
assert md('<ol start="1.5"><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
def test_nested_ols():

View File

@@ -57,26 +57,6 @@ table_with_paragraphs = """<table>
</tr>
</table>"""
table_with_linebreaks = """<table>
<tr>
<th>Firstname</th>
<th>Lastname</th>
<th>Age</th>
</tr>
<tr>
<td>Jill</td>
<td>Smith
Jackson</td>
<td>50</td>
</tr>
<tr>
<td>Eve</td>
<td>Jackson
Smith</td>
<td>94</td>
</tr>
</table>"""
table_with_header_column = """<table>
<tr>
@@ -119,28 +99,6 @@ table_head_body = """<table>
</tbody>
</table>"""
table_head_body_missing_head = """<table>
<thead>
<tr>
<td>Firstname</td>
<td>Lastname</td>
<td>Age</td>
</tr>
</thead>
<tbody>
<tr>
<td>Jill</td>
<td>Smith</td>
<td>50</td>
</tr>
<tr>
<td>Eve</td>
<td>Jackson</td>
<td>94</td>
</tr>
</tbody>
</table>"""
table_missing_text = """<table>
<thead>
<tr>
@@ -201,54 +159,13 @@ table_body = """<table>
</tbody>
</table>"""
table_with_caption = """TEXT<table><caption>Caption</caption>
<tbody><tr><td>Firstname</td>
<td>Lastname</td>
<td>Age</td>
</tr>
</tbody>
</table>"""
table_with_colspan = """<table>
<tr>
<th colspan="2">Name</th>
<th>Age</th>
</tr>
<tr>
<td colspan="1">Jill</td>
<td>Smith</td>
<td>50</td>
</tr>
<tr>
<td>Eve</td>
<td>Jackson</td>
<td>94</td>
</tr>
</table>"""
table_with_undefined_colspan = """<table>
<tr>
<th colspan="undefined">Name</th>
<th>Age</th>
</tr>
<tr>
<td colspan="-1">Jill</td>
<td>Smith</td>
</tr>
</table>"""
def test_table():
assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_html_content) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_paragraphs) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_linebreaks) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n'
assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_head_body_missing_head) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_missing_text) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_missing_head) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_caption) == 'TEXT\n\nCaption\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n'
assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
assert md(table_missing_head) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_body) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'

View File

@@ -2,14 +2,9 @@
envlist = py38
[testenv]
passenv = PYTHONPATH
deps =
pytest==8
flake8
restructuredtext_lint
Pygments
commands =
pytest
commands =
flake8 --ignore=E501,W503 markdownify tests
restructuredtext-lint README.rst
pytest