Bump to 0.5.0

Merge branch 'develop'
cleaning up changes with help of linter
2020-08-09 21:22:15 +02:00 · 2020-08-09 21:20:44 +02:00 · 2020-08-09 21:17:39 +02:00 · 2020-08-09 21:13:33 +02:00 · 2020-08-09 21:11:16 +02:00 · 2020-08-09 20:24:23 +02:00
7 changed files with 134 additions and 26 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,7 @@
 *.pyc
 *.egg
 .eggs/
 *.egg-info/
 .DS_Store
 /.env
 /dist
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
 The MIT License (MIT)
 Copyright 2012-2018 Matthew Tretter
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/README.rst
+++ b/README.rst
@@ -29,6 +29,38 @@ Specify tags to exclude (blacklist):
    md('<b>Yay</b> <a href="http://github.com">GitHub</a>', convert=['b'])  # > '**Yay** GitHub'
 Options
 =======
 Markdownify supports the following options:
 strip
  A list of tags to strip (blacklist). This option can't be used with the
  ``convert`` option.
 convert
  A list of tags to convert (whitelist). This option can't be used with the
  ``strip`` option.
 autolinks
  A boolean indicating whether the "automatic link" style should be used when
  a ``a`` tag's contents match its href. Defaults to ``True``
 heading_style
  Defines how headings should be converted. Accepted values are ``ATX``,
  ``ATX_CLOSED``, ``SETEXT``, and ``UNDERLINED`` (which is an alias for
  ``SETEXT``). Defaults to ``UNDERLINED``.
 bullets
  An iterable (string, list, or tuple) of bullet styles to be used. If the
  iterable only contains one item, it will be used regardless of how deeply
  lists are nested. Otherwise, the bullet will alternate based on nesting
  level. Defaults to ``'*+-'``.
 Options may be specified as kwargs to the ``markdownify`` function, or as a
 nested ``Options`` class in ``MarkdownConverter`` subclasses.
 Development
 ===========
--- a/markdownify/init.py
+++ b/markdownify/init.py
@@ -1,5 +1,6 @@
 from bs4 import BeautifulSoup, NavigableString
 import re
 import six
 convert_heading_re = re.compile(r'convert_h(\d+)')
@@ -22,6 +23,19 @@ def escape(text):
    return text.replace('_', r'\_')
 def chomp(text):
    """
    If the text in an inline tag like b, a, or em contains a leading or trailing
    space, strip the string and return a space as suffix of prefix, if needed.
    This function is used to prevent conversions like
        <b> foo</b> => ** foo**
    """
    prefix = ' ' if text and text[0] == ' ' else ''
    suffix = ' ' if text and text[-1] == ' ' else ''
    text = text.strip()
    return (prefix, suffix, text)
 def _todict(obj):
    return dict((k, getattr(obj, k)) for k in dir(obj) if not k.startswith('_'))
@@ -52,7 +66,7 @@ class MarkdownConverter(object):
        # want a full document. Therefore, we'll mark our fragment with an id,
        # create the document, and extract the element with the id.
        html = wrapped % html
-        soup = BeautifulSoup(html)
+        soup = BeautifulSoup(html, 'html.parser')
        return self.process_tag(soup.find(id=FRAGMENT_ID), children_only=True)
    def process_tag(self, node, children_only=False):
@@ -61,7 +75,7 @@ class MarkdownConverter(object):
        # Convert the children first
        for el in node.children:
            if isinstance(el, NavigableString):
-                text += self.process_text(unicode(el))
+                text += self.process_text(six.text_type(el))
            else:
                text += self.process_tag(el)
@@ -109,13 +123,16 @@ class MarkdownConverter(object):
        return '%s\n%s\n\n' % (text, pad_char * len(text)) if text else ''
    def convert_a(self, el, text):
        prefix, suffix, text = chomp(text)
        if not text:
            return ''
        href = el.get('href')
        title = el.get('title')
        if self.options['autolinks'] and text == href and not title:
            # Shortcut syntax
            return '<%s>' % href
        title_part = ' "%s"' % title.replace('"', r'\"') if title else ''
-        return '[%s](%s%s)' % (text or '', href, title_part) if href else text or ''
+        return '%s[%s](%s%s)%s' % (prefix, text, href, title_part, suffix) if href else text
    def convert_b(self, el, text):
        return self.convert_strong(el, text)
@@ -127,7 +144,10 @@ class MarkdownConverter(object):
        return '  \n'
    def convert_em(self, el, text):
-        return '*%s*' % text if text else ''
+        prefix, suffix, text = chomp(text)
        if not text:
            return ''
        return '%s*%s*%s' % (prefix, text, suffix)
    def convert_hn(self, n, el, text):
        style = self.options['heading_style']
@@ -151,8 +171,9 @@ class MarkdownConverter(object):
                break
            el = el.parent
        if nested:
-            text = '\n' + self.indent(text, 1)
+            # remove trailing newline if nested
-        return text
+            return '\n' + self.indent(text, 1).rstrip()
        return '\n' + text + '\n'
    convert_ul = convert_list
    convert_ol = convert_list
@@ -175,7 +196,10 @@ class MarkdownConverter(object):
        return '%s\n\n' % text if text else ''
    def convert_strong(self, el, text):
-        return '**%s**' % text if text else ''
+        prefix, suffix, text = chomp(text)
        if not text:
            return ''
        return '%s**%s**%s' % (prefix, text, suffix)
    def convert_img(self, el, text):
        alt = el.attrs.get('alt', None) or ''
--- a/markdownify/pkgmeta.py
+++ b/markdownify/pkgmeta.py
@@ -1,8 +0,0 @@
 pkgmeta = dict(
    __title__='markdownify',
    __author__='Matthew Tretter',
    __version__='0.4.0',
 )
 globals().update(pkgmeta)
 __all__ = pkgmeta.keys()
--- a/setup.py
+++ b/setup.py
@@ -7,10 +7,11 @@ from setuptools.command.test import test as TestCommand, Command
 read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read()
-
+pkgmeta = {
-pkgmeta = {}
+    '__title__': 'markdownify',
-execfile(os.path.join(os.path.dirname(__file__), 'markdownify', 'pkgmeta.py'),
+    '__author__': 'Matthew Tretter',
-         pkgmeta)
+    '__version__': '0.5.0',
 }
 class PyTest(TestCommand):
@@ -75,13 +76,13 @@ setup(
        'pytest',
    ],
    install_requires=[
-        'beautifulsoup4',
+        'beautifulsoup4', 'six'
    ],
    classifiers=[
        'Environment :: Web Environment',
        'Framework :: Django',
        'Intended Audience :: Developers',
-        'License :: OSI Approved :: BSD License',
+        'License :: OSI Approved :: MIT License',
        'Operating System :: OS Independent',
        'Programming Language :: Python :: 2.5',
        'Programming Language :: Python :: 2.6',
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -2,7 +2,7 @@ from markdownify import markdownify as md, ATX, ATX_CLOSED
 import re
-nested_uls = re.sub('\s+', '', """
+nested_uls = re.sub(r'\s+', '', """
    <ul>
        <li>1
            <ul>
@@ -22,10 +22,28 @@ nested_uls = re.sub('\s+', '', """
    </ul>""")
 def test_chomp():
    assert md(' <b></b> ') == '  '
    assert md(' <b> </b> ') == '  '
    assert md(' <b>  </b> ') == '  '
    assert md(' <b>   </b> ') == '  '
    assert md(' <b>s </b> ') == ' **s**  '
    assert md(' <b> s</b> ') == '  **s** '
    assert md(' <b> s </b> ') == '  **s**  '
    assert md(' <b>  s  </b> ') == '  **s**  '
 def test_a():
    assert md('<a href="http://google.com">Google</a>') == '[Google](http://google.com)'
 def test_a_spaces():
    assert md('foo <a href="http://google.com">Google</a> bar') == 'foo [Google](http://google.com) bar'
    assert md('foo<a href="http://google.com"> Google</a> bar') == 'foo [Google](http://google.com) bar'
    assert md('foo <a href="http://google.com">Google </a>bar') == 'foo [Google](http://google.com) bar'
    assert md('foo <a href="http://google.com"></a> bar') == 'foo  bar'
 def test_a_with_title():
    text = md('<a href="http://google.com" title="The &quot;Goog&quot;">Google</a>')
    assert text == r'[Google](http://google.com "The \"Goog\"")'
@@ -45,6 +63,13 @@ def test_b():
    assert md('<b>Hello</b>') == '**Hello**'
 def test_b_spaces():
    assert md('foo <b>Hello</b> bar') == 'foo **Hello** bar'
    assert md('foo<b> Hello</b> bar') == 'foo **Hello** bar'
    assert md('foo <b>Hello </b>bar') == 'foo **Hello** bar'
    assert md('foo <b></b> bar') == 'foo  bar'
 def test_blockquote():
    assert md('<blockquote>Hello</blockquote>').strip() == '> Hello'
@@ -62,6 +87,13 @@ def test_em():
    assert md('<em>Hello</em>') == '*Hello*'
 def test_em_spaces():
    assert md('foo <em>Hello</em> bar') == 'foo *Hello* bar'
    assert md('foo<em> Hello</em> bar') == 'foo *Hello* bar'
    assert md('foo <em>Hello </em>bar') == 'foo *Hello* bar'
    assert md('foo <em></em> bar') == 'foo  bar'
 def test_h1():
    assert md('<h1>Hello</h1>') == 'Hello\n=====\n\n'
@@ -90,7 +122,7 @@ def test_i():
 def test_ol():
-    assert md('<ol><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
+    assert md('<ol><li>a</li><li>b</li></ol>') == '\n1. a\n2. b\n\n'
 def test_p():
@@ -102,7 +134,11 @@ def test_strong():
 def test_ul():
-    assert md('<ul><li>a</li><li>b</li></ul>') == '* a\n* b\n'
+    assert md('<ul><li>a</li><li>b</li></ul>') == '\n* a\n* b\n\n'
 def test_inline_ul():
    assert md('<p>foo</p><ul><li>a</li><li>b</li></ul><p>bar</p>') == 'foo\n\n\n* a\n* b\n\nbar\n\n'
 def test_nested_uls():
@@ -110,11 +146,11 @@ def test_nested_uls():
    Nested ULs should alternate bullet characters.
    """
-    assert md(nested_uls) == '* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t\t\n\t+ b\n\t+ c\n\t\n* 2\n* 3\n'
+    assert md(nested_uls) == '\n* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n\n'
 def test_bullets():
-    assert md(nested_uls, bullets='-') == '- 1\n\t- a\n\t\t- I\n\t\t- II\n\t\t- III\n\t\t\n\t- b\n\t- c\n\t\n- 2\n- 3\n'
+    assert md(nested_uls, bullets='-') == '\n- 1\n\t- a\n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n\n'
 def test_img():
Author	SHA1	Message	Date
AlexVonB	0c4b856b9c	Bump to 0.5.0	2020-08-09 21:22:15 +02:00
AlexVonB	e9cc01938a	Merge branch 'develop'	2020-08-09 21:20:44 +02:00
AlexVonB	aceced68eb	cleaning up changes with help of linter	2020-08-09 21:17:39 +02:00
AlexVonB	3b049cdb9c	added egg dirs to gitignore	2020-08-09 21:13:33 +02:00
AlexVonB	b747378b52	fixed nested lists and wrote correct tests nested lists did not work: after a nested list was over, a new line was inserted. this leads to a large gap before the rest of the parent list. lists are prefixed and suffixed with a single newline, this is now represented in the tests.	2020-08-09 21:11:16 +02:00
AlexVonB	ee73d89879	Merge pull request #14 from AlexVonB/fix-inline-spaces remove prefixed and suffixed spaces from inline tags	2020-08-09 20:24:23 +02:00
AlexVonB	5563161c86	remove needless checks for emtpy text	2019-07-12 10:23:17 +02:00
AlexVonB	28e447d9ae	remove prefixed and suffixed spaces from inline tags fixes matthewwithanm#13	2019-07-11 23:27:52 +02:00
Matthew Dapena-Tretter	89d14f4487	Merge pull request #11 from AlexVonB/AlexVonB-patch-1 Add newline before and after a markdown list	2019-07-04 08:53:25 -07:00
AlexVonB	5f9243d91d	added tests for matthewwithanm#11	2019-07-04 16:32:21 +02:00
AlexVonB	d0f688d2e4	Add newline before and after a markdown list Fixes matthewwithanm#5 as well as an issue where `<p>foo<p><ul><li>bar</li></ul>` gets converted to `foo * bar` which is not correct	2019-07-04 16:26:09 +02:00
Jonathan Vanasco	5ac08522be	updating classifer to mit license issue #9	2019-06-19 16:17:47 -07:00
Thomas Lange	78afcc173e	Adding MIT license file	2018-10-16 19:11:02 -07:00
Steven Skoczen	b132a6f5b3	Updates to 0.4.1, pkgmeta included directly in setup.	2017-11-28 12:07:31 +13:00
Steven Skoczen	0abe0a29e8	Merge pull request #2 from crhallberg/html-parser Suppress BeautifulSoup warning	2017-11-13 08:48:45 +13:00
Steven Skoczen	4932df631f	Merge pull request #1 from dmpayton/develop Fixes to get tests passing in Python 3.	2017-11-13 08:48:38 +13:00
Chris Hallberg	8696e2bde1	Suppress BeautifulSoup warning by explicitly passing in the default parser as recommended by the error message: ``` /home/challberg/.local/lib/python2.7/site-packages/bs4/__init__.py:181: UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("html.parser"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently. The code that caused this warning is on line 35 of the file unroll.py. To get rid of this warning, change code that looks like this: BeautifulSoup(YOUR_MARKUP}) to this: BeautifulSoup(YOUR_MARKUP, "html.parser") markup_type=markup_type)) ```	2017-06-12 16:03:04 -04:00
dmpayton	ee53d85c41	Fixes to get tests passing in Python 3.	2016-02-23 15:15:29 -08:00
Matthew Tretter	53ba0daa77	Document options	2013-07-31 23:23:44 -04:00