From c1672aee444d4fa8c76a7be37b0746ce769d2631 Mon Sep 17 00:00:00 2001 From: samypr100 <3933065+samypr100@users.noreply.github.com> Date: Sun, 23 Jun 2024 06:59:14 -0400 Subject: [PATCH 01/10] Update MANIFEST.in to exclude tests during packaging (#125) --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index 9561fb1..70656c8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,2 @@ include README.rst +prune tests From 2ec33384de85d0906b4b40a59f1a3650846150cb Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Sun, 23 Jun 2024 13:17:20 +0200 Subject: [PATCH 02/10] handle un-parsable colspan values fixes #126 --- markdownify/__init__.py | 6 +++--- tests/test_tables.py | 14 +++++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index eaa6ded..6a983d9 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -383,13 +383,13 @@ class MarkdownConverter(object): def convert_td(self, el, text, convert_as_inline): colspan = 1 - if 'colspan' in el.attrs: + if 'colspan' in el.attrs and el['colspan'].isdigit(): colspan = int(el['colspan']) return ' ' + text.strip().replace("\n", " ") + ' |' * colspan def convert_th(self, el, text, convert_as_inline): colspan = 1 - if 'colspan' in el.attrs: + if 'colspan' in el.attrs and el['colspan'].isdigit(): colspan = int(el['colspan']) return ' ' + text.strip().replace("\n", " ") + ' |' * colspan @@ -406,7 +406,7 @@ class MarkdownConverter(object): # first row and is headline: print headline underline full_colspan = 0 for cell in cells: - if "colspan" in cell.attrs: + if 'colspan' in cell.attrs and cell['colspan'].isdigit(): full_colspan += int(cell["colspan"]) else: full_colspan += 1 diff --git a/tests/test_tables.py b/tests/test_tables.py index 9120c29..594e5bf 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -215,7 +215,7 @@ table_with_colspan = """ - + @@ -226,6 +226,17 @@ table_with_colspan = """
Age
JillJill Smith 50
""" +table_with_undefined_colspan = """ + + + + + + + + +
NameAge
JillSmith
""" + def test_table(): assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' @@ -240,3 +251,4 @@ def test_table(): assert md(table_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_with_caption) == 'TEXT\n\nCaption\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n' assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n' From 7861b330cd05c0c19fc496530f02922d5493c568 Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Sun, 23 Jun 2024 11:28:05 +0000 Subject: [PATCH 03/10] Special-case use of HTML tags for converting `` / `` (#119) Allow different strings before / after `` / `` content In particular, this allows setting `sub_symbol=''`, `sup_symbol=''`, to use raw HTML in the output when converting subscripts and superscripts. --- README.rst | 6 +++++- markdownify/__init__.py | 9 +++++++-- tests/test_conversions.py | 2 ++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 35d58fd..55ea7cf 100644 --- a/README.rst +++ b/README.rst @@ -87,7 +87,11 @@ strong_em_symbol sub_symbol, sup_symbol Define the chars that surround ```` and ```` text. Defaults to an empty string, because this is non-standard behavior. Could be something like - ``~`` and ``^`` to result in ``~sub~`` and ``^sup^``. + ``~`` and ``^`` to result in ``~sub~`` and ``^sup^``. If the value starts + with ``<`` and ends with ``>``, it is treated as an HTML tag and a ``/`` is + inserted after the ``<`` in the string used after the text; this allows + specifying ```` to use raw HTML in the output for subscripts, for + example. newline_style Defines the style of marking linebreaks (``
``) in markdown. The default diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 6a983d9..d7bd780 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -43,17 +43,22 @@ def abstract_inline_conversion(markup_fn): """ This abstracts all simple inline tags like b, em, del, ... Returns a function that wraps the chomped text in a pair of the string - that is returned by markup_fn. markup_fn is necessary to allow for + that is returned by markup_fn, with '/' inserted in the string used after + the text if it looks like an HTML tag. markup_fn is necessary to allow for references to self.strong_em_symbol etc. """ def implementation(self, el, text, convert_as_inline): markup = markup_fn(self) + if markup.startswith('<') and markup.endswith('>'): + markup_after = 'foo
') == 'foo' assert md('foo', sub_symbol='~') == '~foo~' + assert md('foo', sub_symbol='') == 'foo' def test_sup(): assert md('foo') == 'foo' assert md('foo', sup_symbol='^') == '^foo^' + assert md('foo', sup_symbol='') == 'foo' def test_lang(): From 50b4640db2d7f88b44c20f947e705ba59f1b9fe0 Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Sun, 23 Jun 2024 13:30:08 +0200 Subject: [PATCH 04/10] better naming for markup variables --- markdownify/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index d7bd780..2f71cad 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -48,17 +48,17 @@ def abstract_inline_conversion(markup_fn): references to self.strong_em_symbol etc. """ def implementation(self, el, text, convert_as_inline): - markup = markup_fn(self) - if markup.startswith('<') and markup.endswith('>'): - markup_after = ''): + markup_suffix = ' Date: Sun, 23 Jun 2024 20:28:53 +0800 Subject: [PATCH 05/10] handle ol start value is not number (#127) Co-authored-by: Mico --- markdownify/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 2f71cad..cd66a39 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -326,7 +326,7 @@ class MarkdownConverter(object): def convert_li(self, el, text, convert_as_inline): parent = el.parent if parent is not None and parent.name == 'ol': - if parent.get("start"): + if parent.get("start") and str(parent.get("start")).isnumeric(): start = int(parent.get("start")) else: start = 1 From 0a5c89aa493ae0cdc090305ba14ef7fa1c6f13c4 Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Sun, 23 Jun 2024 14:30:07 +0200 Subject: [PATCH 06/10] added test for ol start check --- tests/test_lists.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_lists.py b/tests/test_lists.py index 5a04430..35eee13 100644 --- a/tests/test_lists.py +++ b/tests/test_lists.py @@ -43,6 +43,9 @@ nested_ols = """ def test_ol(): assert md('
  1. a
  2. b
') == '1. a\n2. b\n' assert md('
  1. a
  2. b
') == '3. a\n4. b\n' + assert md('
  1. a
  2. b
') == '1. a\n2. b\n' + assert md('
  1. a
  2. b
') == '1. a\n2. b\n' + assert md('
  1. a
  2. b
') == '1. a\n2. b\n' def test_nested_ols(): From 75a678dab9d7cec2c18b58489ea4a66b6f794908 Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Sun, 14 Jul 2024 21:02:49 +0200 Subject: [PATCH 07/10] fix pytest version to 8 --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 9eb8750..54ba143 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ envlist = py38 [testenv] passenv = PYTHONPATH deps = - pytest + pytest==8 flake8 restructuredtext_lint Pygments From f6c8daf8a58948c88256a09a60085e28e628564e Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Sun, 14 Jul 2024 21:19:23 +0200 Subject: [PATCH 08/10] bump to v0.13.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9a26468..9a703d0 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read() pkgmeta = { '__title__': 'markdownify', '__author__': 'Matthew Tretter', - '__version__': '0.12.1', + '__version__': '0.13.0', } read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read() From 46dc1a002db6899759f0cc80162e365229593375 Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Sun, 14 Jul 2024 22:38:29 +0200 Subject: [PATCH 09/10] Migrated the metadata into PEP 621-compliant pyproject.toml (#138) * Move the metadata from `setup.py` into `setup.cfg`. Added `pyproject.toml`. Removed `setup.py` - it is no longer needed. Got rid of tests erroroneously finding their way into the wheel. * Started populating version automatically from git tags using `setuptools_scm`. * Migrated the metadata into `PEP 621`-compliant `pyproject.toml`, got rid of `setup.cfg`. * test build in develop and pull requests * use static version instead of dynamic git tag info --------- Co-authored-by: KOLANICH --- .github/workflows/python-app.yml | 5 ++- .github/workflows/python-publish.yml | 4 +-- pyproject.toml | 45 ++++++++++++++++++++++++ setup.py | 52 ---------------------------- 4 files changed, 51 insertions(+), 55 deletions(-) create mode 100644 pyproject.toml delete mode 100644 setup.py diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index eb64947..481ade5 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -23,7 +23,10 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install tox + pip install --upgrade setuptools setuptools_scm wheel build tox - name: Lint and test run: | tox + - name: Build + run: | + python -m build -nwsx . diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 9e3a349..c337bab 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -21,11 +21,11 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install setuptools wheel twine + pip install --upgrade setuptools setuptools_scm wheel build twine - name: Build and publish env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | - python setup.py sdist bdist_wheel + python -m build -nwsx . twine upload dist/* diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..69dd27e --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,45 @@ +[build-system] +requires = ["setuptools>=61.2", "setuptools_scm[toml]>=3.4.3"] +build-backend = "setuptools.build_meta" + +[project] +name = "markdownify" +version = "0.13.0" +authors = [{name = "Matthew Tretter", email = "m@tthewwithanm.com"}] +description = "Convert HTML to markdown." +readme = "README.rst" +classifiers = [ + "Environment :: Web Environment", + "Framework :: Django", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 2.5", + "Programming Language :: Python :: 2.6", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Topic :: Utilities", +] +dependencies = [ + "beautifulsoup4>=4.9,<5", + "six>=1.15,<2" +] + +[project.urls] +Homepage = "http://github.com/matthewwithanm/python-markdownify" +Download = "http://github.com/matthewwithanm/python-markdownify/tarball/master" + +[project.scripts] +markdownify = "markdownify.main:main" + +[tool.setuptools] +zip-safe = false +include-package-data = true + +[tool.setuptools.packages.find] +include = ["markdownify", "markdownify.*"] +namespaces = false + +[tool.setuptools_scm] diff --git a/setup.py b/setup.py deleted file mode 100644 index 9a703d0..0000000 --- a/setup.py +++ /dev/null @@ -1,52 +0,0 @@ -#/usr/bin/env python -import codecs -import os -from setuptools import setup, find_packages - - -read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read() - -pkgmeta = { - '__title__': 'markdownify', - '__author__': 'Matthew Tretter', - '__version__': '0.13.0', -} - -read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read() - -setup( - name='markdownify', - description='Convert HTML to markdown.', - long_description=read(os.path.join(os.path.dirname(__file__), 'README.rst')), - version=pkgmeta['__version__'], - author=pkgmeta['__author__'], - author_email='m@tthewwithanm.com', - url='http://github.com/matthewwithanm/python-markdownify', - download_url='http://github.com/matthewwithanm/python-markdownify/tarball/master', - packages=find_packages(), - zip_safe=False, - include_package_data=True, - install_requires=[ - 'beautifulsoup4>=4.9,<5', - 'six>=1.15,<2', - ], - classifiers=[ - 'Environment :: Web Environment', - 'Framework :: Django', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: MIT License', - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 2.5', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Topic :: Utilities' - ], - entry_points={ - 'console_scripts': [ - 'markdownify = markdownify.main:main' - ] - } -) From 964d89fa8ace65181402f69ca2482d83b84600f8 Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Sun, 14 Jul 2024 22:40:02 +0200 Subject: [PATCH 10/10] bump to version v0.13.1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 69dd27e..c0d1ce6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "markdownify" -version = "0.13.0" +version = "0.13.1" authors = [{name = "Matthew Tretter", email = "m@tthewwithanm.com"}] description = "Convert HTML to markdown." readme = "README.rst"