diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml new file mode 100644 index 0000000..41240f8 --- /dev/null +++ b/.github/workflows/python-app.yml @@ -0,0 +1,33 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Python application + +on: + push: + branches: [ develop ] + pull_request: + branches: [ develop ] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.6 + uses: actions/setup-python@v2 + with: + python-version: 3.6 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8==2.5.4 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + python setup.py lint + - name: Test with pytest + run: | + python setup.py test diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..1a03a7b --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,31 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: Upload Python Package + +on: + release: + types: [created] + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/* diff --git a/.gitignore b/.gitignore index ae9fdc5..8817941 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,10 @@ *.pyc *.egg +.eggs/ +*.egg-info/ .DS_Store /.env /dist /MANIFEST /venv +build/ diff --git a/README.rst b/README.rst index 4e93b92..4d21411 100644 --- a/README.rst +++ b/README.rst @@ -1,3 +1,21 @@ +|build| |version| |license| |downloads| + +.. |build| image:: https://img.shields.io/github/workflow/status/matthewwithanm/python-markdownify/Python%20application/develop + :alt: GitHub Workflow Status + :target: https://github.com/matthewwithanm/python-markdownify/actions?query=workflow%3A%22Python+application%22 + +.. |version| image:: https://img.shields.io/pypi/v/markdownify + :alt: Pypi version + :target: https://pypi.org/project/markdownify/ + +.. |license| image:: https://img.shields.io/pypi/l/markdownify + :alt: License + :target: https://github.com/matthewwithanm/python-markdownify/blob/develop/LICENSE + +.. |downloads| image:: https://pepy.tech/badge/markdownify + :alt: Pypi Downloads + :target: https://pepy.tech/project/markdownify + Installation ============ diff --git a/markdownify/__init__.py b/markdownify/__init__.py index f0fe118..aa5f283 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -23,6 +23,19 @@ def escape(text): return text.replace('_', r'\_') +def chomp(text): + """ + If the text in an inline tag like b, a, or em contains a leading or trailing + space, strip the string and return a space as suffix of prefix, if needed. + This function is used to prevent conversions like + foo => ** foo** + """ + prefix = ' ' if text and text[0] == ' ' else '' + suffix = ' ' if text and text[-1] == ' ' else '' + text = text.strip() + return (prefix, suffix, text) + + def _todict(obj): return dict((k, getattr(obj, k)) for k in dir(obj) if not k.startswith('_')) @@ -115,13 +128,16 @@ class MarkdownConverter(object): return '%s\n%s\n\n' % (text, pad_char * len(text)) if text else '' def convert_a(self, el, text): + prefix, suffix, text = chomp(text) + if not text: + return '' href = el.get('href') title = el.get('title') if self.options['autolinks'] and text == href and not title: # Shortcut syntax return '<%s>' % href title_part = ' "%s"' % title.replace('"', r'\"') if title else '' - return '[%s](%s%s)' % (text or '', href, title_part) if href else text or '' + return '%s[%s](%s%s)%s' % (prefix, text, href, title_part, suffix) if href else text def convert_b(self, el, text): return self.convert_strong(el, text) @@ -133,7 +149,10 @@ class MarkdownConverter(object): return ' \n' def convert_em(self, el, text): - return '*%s*' % text if text else '' + prefix, suffix, text = chomp(text) + if not text: + return '' + return '%s*%s*%s' % (prefix, text, suffix) def convert_hn(self, n, el, text): style = self.options['heading_style'] @@ -160,7 +179,8 @@ class MarkdownConverter(object): break el = el.parent if nested: - text = '\n' + self.indent(text, 1).rstrip() + # remove trailing newline if nested + return '\n' + self.indent(text, 1).rstrip() return text + ('\n' if before_paragraph else '') convert_ul = convert_list @@ -169,7 +189,11 @@ class MarkdownConverter(object): def convert_li(self, el, text): parent = el.parent if parent is not None and parent.name == 'ol': - bullet = '%s.' % (parent.index(el) + 1) + if parent.get("start"): + start = int(parent.get("start")) + else: + start = 1 + bullet = '%s.' % (start + parent.index(el)) else: depth = -1 while el: @@ -184,7 +208,10 @@ class MarkdownConverter(object): return '%s\n\n' % text if text else '' def convert_strong(self, el, text): - return '**%s**' % text if text else '' + prefix, suffix, text = chomp(text) + if not text: + return '' + return '%s**%s**%s' % (prefix, text, suffix) def convert_img(self, el, text): alt = el.attrs.get('alt', None) or '' diff --git a/setup.py b/setup.py index ea57c27..06ab404 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read() pkgmeta = { '__title__': 'markdownify', '__author__': 'Matthew Tretter', - '__version__': '0.4.1', + '__version__': '0.5.2', } diff --git a/tests/test_conversions.py b/tests/test_conversions.py index d27b008..3a75907 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -1,5 +1,4 @@ from markdownify import markdownify as md, ATX, ATX_CLOSED -import re nested_uls = """ @@ -41,10 +40,28 @@ nested_ols = """ """ +def test_chomp(): + assert md(' ') == ' ' + assert md(' ') == ' ' + assert md(' ') == ' ' + assert md(' ') == ' ' + assert md(' s ') == ' **s** ' + assert md(' s ') == ' **s** ' + assert md(' s ') == ' **s** ' + assert md(' s ') == ' **s** ' + + def test_a(): assert md('Google') == '[Google](http://google.com)' +def test_a_spaces(): + assert md('foo Google bar') == 'foo [Google](http://google.com) bar' + assert md('foo Google bar') == 'foo [Google](http://google.com) bar' + assert md('foo Google bar') == 'foo [Google](http://google.com) bar' + assert md('foo bar') == 'foo bar' + + def test_a_with_title(): text = md('Google') assert text == r'[Google](http://google.com "The \"Goog\"")' @@ -64,6 +81,13 @@ def test_b(): assert md('Hello') == '**Hello**' +def test_b_spaces(): + assert md('foo Hello bar') == 'foo **Hello** bar' + assert md('foo Hello bar') == 'foo **Hello** bar' + assert md('foo Hello bar') == 'foo **Hello** bar' + assert md('foo bar') == 'foo bar' + + def test_blockquote(): assert md('
Hello').strip() == '> Hello' @@ -81,6 +105,13 @@ def test_em(): assert md('Hello') == '*Hello*' +def test_em_spaces(): + assert md('foo Hello bar') == 'foo *Hello* bar' + assert md('foo Hello bar') == 'foo *Hello* bar' + assert md('foo Hello bar') == 'foo *Hello* bar' + assert md('foo bar') == 'foo bar' + + def test_h1(): assert md('
hello
') == 'hello\n\n' @@ -123,21 +157,23 @@ def test_strong(): def test_ul(): - assert md('foo
bar
') == 'foo\n\n* a\n* b\n\nbar\n\n' + def test_nested_uls(): """ Nested ULs should alternate bullet characters. """ - assert md(nested_uls) == '* 1 \n\t+ a \n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n' + assert md(nested_uls) == '* 1 \n\t+ a \n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n\n' def test_bullets(): - assert md(nested_uls, bullets='-') == '- 1 \n\t- a \n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n' + assert md(nested_uls, bullets='-') == '- 1 \n\t- a \n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n\n' def test_img():