Merge remote-tracking branch 'upstream/develop' into ordered-list

# Conflicts: # markdownify/__init__.py # tests/test_conversions.py
2020-08-26 19:41:43 +02:00
parent d23596706d 987a2a9cae
commit 4f00d638d2
7 changed files with 160 additions and 12 deletions
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -0,0 +1,33 @@
+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: Python application
+
+on:
+  push:
+    branches: [ develop ]
+  pull_request:
+    branches: [ develop ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python 3.6
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.6
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install flake8==2.5.4 pytest
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    - name: Lint with flake8
+      run: |
+        python setup.py lint
+    - name: Test with pytest
+      run: |
+        python setup.py test
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,31 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [created]
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install setuptools wheel twine
+    - name: Build and publish
+      env:
+        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+      run: |
+        python setup.py sdist bdist_wheel
+        twine upload dist/*
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,10 @@
 *.pyc
 *.egg
+.eggs/
+*.egg-info/
 .DS_Store
 /.env
 /dist
 /MANIFEST
 /venv
+build/
--- a/README.rst
+++ b/README.rst
@@ -1,3 +1,21 @@
+|build| |version| |license| |downloads|
+
+.. |build| image:: https://img.shields.io/github/workflow/status/matthewwithanm/python-markdownify/Python%20application/develop
+    :alt: GitHub Workflow Status
+    :target: https://github.com/matthewwithanm/python-markdownify/actions?query=workflow%3A%22Python+application%22
+
+.. |version| image:: https://img.shields.io/pypi/v/markdownify
+    :alt: Pypi version
+    :target: https://pypi.org/project/markdownify/
+
+.. |license| image:: https://img.shields.io/pypi/l/markdownify
+    :alt: License
+    :target: https://github.com/matthewwithanm/python-markdownify/blob/develop/LICENSE
+
+.. |downloads| image:: https://pepy.tech/badge/markdownify
+    :alt: Pypi Downloads
+    :target: https://pepy.tech/project/markdownify
+
 Installation
 ============

--- a/markdownify/init.py
+++ b/markdownify/init.py
@@ -23,6 +23,19 @@ def escape(text):
    return text.replace('_', r'\_')


+def chomp(text):
+    """
+    If the text in an inline tag like b, a, or em contains a leading or trailing
+    space, strip the string and return a space as suffix of prefix, if needed.
+    This function is used to prevent conversions like
+        <b> foo</b> => ** foo**
+    """
+    prefix = ' ' if text and text[0] == ' ' else ''
+    suffix = ' ' if text and text[-1] == ' ' else ''
+    text = text.strip()
+    return (prefix, suffix, text)
+
+
 def _todict(obj):
    return dict((k, getattr(obj, k)) for k in dir(obj) if not k.startswith('_'))

@@ -115,13 +128,16 @@ class MarkdownConverter(object):
        return '%s\n%s\n\n' % (text, pad_char * len(text)) if text else ''

    def convert_a(self, el, text):
+        prefix, suffix, text = chomp(text)
+        if not text:
+            return ''
        href = el.get('href')
        title = el.get('title')
        if self.options['autolinks'] and text == href and not title:
            # Shortcut syntax
            return '<%s>' % href
        title_part = ' "%s"' % title.replace('"', r'\"') if title else ''
-        return '[%s](%s%s)' % (text or '', href, title_part) if href else text or ''
+        return '%s[%s](%s%s)%s' % (prefix, text, href, title_part, suffix) if href else text

    def convert_b(self, el, text):
        return self.convert_strong(el, text)
@@ -133,7 +149,10 @@ class MarkdownConverter(object):
        return '  \n'

    def convert_em(self, el, text):
-        return '*%s*' % text if text else ''
+        prefix, suffix, text = chomp(text)
+        if not text:
+            return ''
+        return '%s*%s*%s' % (prefix, text, suffix)

    def convert_hn(self, n, el, text):
        style = self.options['heading_style']
@@ -160,7 +179,8 @@ class MarkdownConverter(object):
                break
            el = el.parent
        if nested:
-            text = '\n' + self.indent(text, 1).rstrip()
+            # remove trailing newline if nested
+            return '\n' + self.indent(text, 1).rstrip()
        return text + ('\n' if before_paragraph else '')

    convert_ul = convert_list
@@ -169,7 +189,11 @@ class MarkdownConverter(object):
    def convert_li(self, el, text):
        parent = el.parent
        if parent is not None and parent.name == 'ol':
-            bullet = '%s.' % (parent.index(el) + 1)
+            if parent.get("start"):
+                start = int(parent.get("start"))
+            else:
+                start = 1
+            bullet = '%s.' % (start + parent.index(el))
        else:
            depth = -1
            while el:
@@ -184,7 +208,10 @@ class MarkdownConverter(object):
        return '%s\n\n' % text if text else ''

    def convert_strong(self, el, text):
-        return '**%s**' % text if text else ''
+        prefix, suffix, text = chomp(text)
+        if not text:
+            return ''
+        return '%s**%s**%s' % (prefix, text, suffix)

    def convert_img(self, el, text):
        alt = el.attrs.get('alt', None) or ''
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read()
 pkgmeta = {
    '__title__': 'markdownify',
    '__author__': 'Matthew Tretter',
-    '__version__': '0.4.1',
+    '__version__': '0.5.2',
 }


--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -1,5 +1,4 @@
 from markdownify import markdownify as md, ATX, ATX_CLOSED
-import re


 nested_uls = """
@@ -41,10 +40,28 @@ nested_ols = """
    </ul>"""


+def test_chomp():
+    assert md(' <b></b> ') == '  '
+    assert md(' <b> </b> ') == '  '
+    assert md(' <b>  </b> ') == '  '
+    assert md(' <b>   </b> ') == '  '
+    assert md(' <b>s </b> ') == ' **s**  '
+    assert md(' <b> s</b> ') == '  **s** '
+    assert md(' <b> s </b> ') == '  **s**  '
+    assert md(' <b>  s  </b> ') == '  **s**  '
+
+
 def test_a():
    assert md('<a href="http://google.com">Google</a>') == '[Google](http://google.com)'


+def test_a_spaces():
+    assert md('foo <a href="http://google.com">Google</a> bar') == 'foo [Google](http://google.com) bar'
+    assert md('foo<a href="http://google.com"> Google</a> bar') == 'foo [Google](http://google.com) bar'
+    assert md('foo <a href="http://google.com">Google </a>bar') == 'foo [Google](http://google.com) bar'
+    assert md('foo <a href="http://google.com"></a> bar') == 'foo  bar'
+
+
 def test_a_with_title():
    text = md('<a href="http://google.com" title="The &quot;Goog&quot;">Google</a>')
    assert text == r'[Google](http://google.com "The \"Goog\"")'
@@ -64,6 +81,13 @@ def test_b():
    assert md('<b>Hello</b>') == '**Hello**'


+def test_b_spaces():
+    assert md('foo <b>Hello</b> bar') == 'foo **Hello** bar'
+    assert md('foo<b> Hello</b> bar') == 'foo **Hello** bar'
+    assert md('foo <b>Hello </b>bar') == 'foo **Hello** bar'
+    assert md('foo <b></b> bar') == 'foo  bar'
+
+
 def test_blockquote():
    assert md('<blockquote>Hello</blockquote>').strip() == '> Hello'

@@ -81,6 +105,13 @@ def test_em():
    assert md('<em>Hello</em>') == '*Hello*'


+def test_em_spaces():
+    assert md('foo <em>Hello</em> bar') == 'foo *Hello* bar'
+    assert md('foo<em> Hello</em> bar') == 'foo *Hello* bar'
+    assert md('foo <em>Hello </em>bar') == 'foo *Hello* bar'
+    assert md('foo <em></em> bar') == 'foo  bar'
+
+
 def test_h1():
    assert md('<h1>Hello</h1>') == 'Hello\n=====\n\n'

@@ -109,11 +140,14 @@ def test_i():


 def test_ol():
-    assert md('<ol><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
+    assert md('<ol><li>a</li><li>b</li></ol>') == '\n1. a\n2. b\n\n'
+    assert md('<ol start="3"><li>a</li><li>b</li></ol>') == '\n3. a\n4. b\n\n'
+

 def test_nested_ols():
    assert md(nested_ols) == '1. 1 \n\t1. a \n\t\t1. I\n\t\t2. II\n\t\t3. III\n\t2. b\n\t3. c\n2. 2\n3. 3\n'

+
 def test_p():
    assert md('<p>hello</p>') == 'hello\n\n'

@@ -123,21 +157,23 @@ def test_strong():


 def test_ul():
-    assert md('<ul><li>a</li><li>b</li></ul>') == '* a\n* b\n'
-    
+    assert md('<ul><li>a</li><li>b</li></ul>') == '\n* a\n* b\n\n'
+
+
 def test_inline_ul():
    assert md('<p>foo</p><ul><li>a</li><li>b</li></ul><p>bar</p>') == 'foo\n\n* a\n* b\n\nbar\n\n'

+
 def test_nested_uls():
    """
    Nested ULs should alternate bullet characters.

    """
-    assert md(nested_uls) == '* 1 \n\t+ a \n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n'
+    assert md(nested_uls) == '* 1 \n\t+ a \n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n\n'


 def test_bullets():
-    assert md(nested_uls, bullets='-') == '- 1 \n\t- a \n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n'
+    assert md(nested_uls, bullets='-') == '- 1 \n\t- a \n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n\n'


 def test_img():