Merge branch 'develop' into para-newlines-92-98

2024-09-30 18:05:32 +00:00
parent 60d86663d7 964d89fa8a
commit 4399ee75db
11 changed files with 88 additions and 65 deletions
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -23,7 +23,10 @@ jobs:
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
-        pip install tox
+        pip install --upgrade setuptools setuptools_scm wheel build tox
    - name: Lint and test
      run: |
        tox
+    - name: Build
+      run: |
+        python -m build -nwsx .
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -21,11 +21,11 @@ jobs:
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
-        pip install setuptools wheel twine
+        pip install --upgrade setuptools setuptools_scm wheel build twine
    - name: Build and publish
      env:
        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
      run: |
-        python setup.py sdist bdist_wheel
+        python -m build -nwsx .
        twine upload dist/*
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +1,2 @@
 include README.rst
+prune tests
--- a/README.rst
+++ b/README.rst
@@ -87,7 +87,11 @@ strong_em_symbol
 sub_symbol, sup_symbol
  Define the chars that surround ``<sub>`` and ``<sup>`` text. Defaults to an
  empty string, because this is non-standard behavior. Could be something like
-  ``~`` and ``^`` to result in ``~sub~`` and ``^sup^``.
+  ``~`` and ``^`` to result in ``~sub~`` and ``^sup^``.  If the value starts
+  with ``<`` and ends with ``>``, it is treated as an HTML tag and a ``/`` is
+  inserted after the ``<`` in the string used after the text; this allows
+  specifying ``<sub>`` to use raw HTML in the output for subscripts, for
+  example.

 newline_style
  Defines the style of marking linebreaks (``<br>``) in markdown. The default
--- a/markdownify/init.py
+++ b/markdownify/init.py
@@ -43,17 +43,22 @@ def abstract_inline_conversion(markup_fn):
    """
    This abstracts all simple inline tags like b, em, del, ...
    Returns a function that wraps the chomped text in a pair of the string
-    that is returned by markup_fn. markup_fn is necessary to allow for
+    that is returned by markup_fn, with '/' inserted in the string used after
+    the text if it looks like an HTML tag. markup_fn is necessary to allow for
    references to self.strong_em_symbol etc.
    """
    def implementation(self, el, text, convert_as_inline):
-        markup = markup_fn(self)
+        markup_prefix = markup_fn(self)
+        if markup_prefix.startswith('<') and markup_prefix.endswith('>'):
+            markup_suffix = '</' + markup_prefix[1:]
+        else:
+            markup_suffix = markup_prefix
        if el.find_parent(['pre', 'code', 'kbd', 'samp']):
            return text
        prefix, suffix, text = chomp(text)
        if not text:
            return ''
-        return '%s%s%s%s%s' % (prefix, markup, text, markup, suffix)
+        return '%s%s%s%s%s' % (prefix, markup_prefix, text, markup_suffix, suffix)
    return implementation


@@ -327,7 +332,7 @@ class MarkdownConverter(object):
    def convert_li(self, el, text, convert_as_inline):
        parent = el.parent
        if parent is not None and parent.name == 'ol':
-            if parent.get("start"):
+            if parent.get("start") and str(parent.get("start")).isnumeric():
                start = int(parent.get("start"))
            else:
                start = 1
@@ -389,13 +394,13 @@ class MarkdownConverter(object):

    def convert_td(self, el, text, convert_as_inline):
        colspan = 1
-        if 'colspan' in el.attrs:
+        if 'colspan' in el.attrs and el['colspan'].isdigit():
            colspan = int(el['colspan'])
        return ' ' + text.strip().replace("\n", " ") + ' |' * colspan

    def convert_th(self, el, text, convert_as_inline):
        colspan = 1
-        if 'colspan' in el.attrs:
+        if 'colspan' in el.attrs and el['colspan'].isdigit():
            colspan = int(el['colspan'])
        return ' ' + text.strip().replace("\n", " ") + ' |' * colspan

@@ -412,7 +417,7 @@ class MarkdownConverter(object):
            # first row and is headline: print headline underline
            full_colspan = 0
            for cell in cells:
-                if "colspan" in cell.attrs:
+                if 'colspan' in cell.attrs and cell['colspan'].isdigit():
                    full_colspan += int(cell["colspan"])
                else:
                    full_colspan += 1
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,45 @@
+[build-system]
+requires = ["setuptools>=61.2", "setuptools_scm[toml]>=3.4.3"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "markdownify"
+version = "0.13.1"
+authors = [{name = "Matthew Tretter", email = "m@tthewwithanm.com"}]
+description = "Convert HTML to markdown."
+readme = "README.rst"
+classifiers = [
+    "Environment :: Web Environment",
+    "Framework :: Django",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 2.5",
+    "Programming Language :: Python :: 2.6",
+    "Programming Language :: Python :: 2.7",
+    "Programming Language :: Python :: 3.6",
+    "Programming Language :: Python :: 3.7",
+    "Programming Language :: Python :: 3.8",
+    "Topic :: Utilities",
+]
+dependencies = [
+    "beautifulsoup4>=4.9,<5",
+    "six>=1.15,<2"
+]
+
+[project.urls]
+Homepage = "http://github.com/matthewwithanm/python-markdownify"
+Download = "http://github.com/matthewwithanm/python-markdownify/tarball/master"
+
+[project.scripts]
+markdownify = "markdownify.main:main"
+
+[tool.setuptools]
+zip-safe = false
+include-package-data = true
+
+[tool.setuptools.packages.find]
+include = ["markdownify", "markdownify.*"]
+namespaces = false
+
+[tool.setuptools_scm]
--- a/setup.py
+++ b/setup.py
@@ -1,52 +0,0 @@
-#/usr/bin/env python
-import codecs
-import os
-from setuptools import setup, find_packages
-
-
-read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read()
-
-pkgmeta = {
-    '__title__': 'markdownify',
-    '__author__': 'Matthew Tretter',
-    '__version__': '0.12.1',
-}
-
-read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read()
-
-setup(
-    name='markdownify',
-    description='Convert HTML to markdown.',
-    long_description=read(os.path.join(os.path.dirname(__file__), 'README.rst')),
-    version=pkgmeta['__version__'],
-    author=pkgmeta['__author__'],
-    author_email='m@tthewwithanm.com',
-    url='http://github.com/matthewwithanm/python-markdownify',
-    download_url='http://github.com/matthewwithanm/python-markdownify/tarball/master',
-    packages=find_packages(),
-    zip_safe=False,
-    include_package_data=True,
-    install_requires=[
-        'beautifulsoup4>=4.9,<5',
-        'six>=1.15,<2',
-    ],
-    classifiers=[
-        'Environment :: Web Environment',
-        'Framework :: Django',
-        'Intended Audience :: Developers',
-        'License :: OSI Approved :: MIT License',
-        'Operating System :: OS Independent',
-        'Programming Language :: Python :: 2.5',
-        'Programming Language :: Python :: 2.6',
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3.6',
-        'Programming Language :: Python :: 3.7',
-        'Programming Language :: Python :: 3.8',
-        'Topic :: Utilities'
-    ],
-    entry_points={
-        'console_scripts': [
-            'markdownify = markdownify.main:main'
-        ]
-    }
-)
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -271,11 +271,13 @@ def test_strong_em_symbol():
 def test_sub():
    assert md('<sub>foo</sub>') == 'foo'
    assert md('<sub>foo</sub>', sub_symbol='~') == '~foo~'
+    assert md('<sub>foo</sub>', sub_symbol='<sub>') == '<sub>foo</sub>'


 def test_sup():
    assert md('<sup>foo</sup>') == 'foo'
    assert md('<sup>foo</sup>', sup_symbol='^') == '^foo^'
+    assert md('<sup>foo</sup>', sup_symbol='<sup>') == '<sup>foo</sup>'


 def test_lang():
--- a/tests/test_lists.py
+++ b/tests/test_lists.py
@@ -44,6 +44,9 @@ def test_ol():
    assert md('<ol><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
    assert md('<ol start="3"><li>a</li><li>b</li></ol>') == '\n\n3. a\n4. b\n'
    assert md('foo<ol start="3"><li>a</li><li>b</li></ol>bar') == 'foo\n\n3. a\n4. b\n\nbar'
+    assert md('<ol start="-1"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
+    assert md('<ol start="foo"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
+    assert md('<ol start="1.5"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'


 def test_nested_ols():
--- a/tests/test_tables.py
+++ b/tests/test_tables.py
@@ -215,7 +215,7 @@ table_with_colspan = """<table>
        <th>Age</th>
    </tr>
    <tr>
-        <td>Jill</td>
+        <td colspan="1">Jill</td>
        <td>Smith</td>
        <td>50</td>
    </tr>
@@ -226,6 +226,17 @@ table_with_colspan = """<table>
    </tr>
 </table>"""

+table_with_undefined_colspan = """<table>
+    <tr>
+        <th colspan="undefined">Name</th>
+        <th>Age</th>
+    </tr>
+    <tr>
+        <td colspan="-1">Jill</td>
+        <td>Smith</td>
+    </tr>
+</table>"""
+

 def test_table():
    assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
@@ -240,3 +251,4 @@ def test_table():
    assert md(table_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
    assert md(table_with_caption) == 'TEXT\n\nCaption\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n'
    assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+    assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,7 @@ envlist = py38
 [testenv]
 passenv = PYTHONPATH
 deps =
-	pytest
+	pytest==8
 	flake8
 	restructuredtext_lint
 	Pygments