From 8f6d7e500d635ca4d48338b1561efd778f1dc624 Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Sun, 30 May 2021 18:40:40 +0200 Subject: [PATCH 1/4] add option 'default_title' to links fixes #39 --- README.rst | 6 +++++- markdownify/__init__.py | 8 +++++++- tests/test_conversions.py | 5 ++--- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 1e245c1..e4004ea 100644 --- a/README.rst +++ b/README.rst @@ -62,7 +62,11 @@ convert autolinks A boolean indicating whether the "automatic link" style should be used when - a ``a`` tag's contents match its href. Defaults to ``True`` + a ``a`` tag's contents match its href. Defaults to ``True``. + +default_title + A boolean to enable setting the title of a link to its href, if no title is + given. Defaults to ``False``. heading_style Defines how headings should be converted. Accepted values are ``ATX``, diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 0b849fb..013b1b4 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -69,6 +69,7 @@ class MarkdownConverter(object): strip = None convert = None autolinks = True + default_title = False heading_style = UNDERLINED bullets = '*+-' # An iterable of bullet types. strong_em_symbol = ASTERISK @@ -198,9 +199,14 @@ class MarkdownConverter(object): href = el.get('href') title = el.get('title') # For the replacement see #29: text nodes underscores are escaped - if self.options['autolinks'] and text.replace(r'\_', '_') == href and not title: + if (self.options['autolinks'] + and text.replace(r'\_', '_') == href + and not title + and not self.options['default_title']): # Shortcut syntax return '<%s>' % href + if self.options['default_title'] and not title: + title = href title_part = ' "%s"' % title.replace('"', r'\"') if title else '' return '%s[%s](%s%s)%s' % (prefix, text, href, title_part, suffix) if href else text diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 354212b..5c544ed 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -173,7 +173,6 @@ def test_chomp(): def test_a(): assert md('Google') == '[Google](https://google.com)' - assert md('https://google.com', autolinks=False) == '[https://google.com](https://google.com)' assert md('https://google.com') == '' assert md('https://community.kde.org/Get_Involved') == '' assert md('https://community.kde.org/Get_Involved', autolinks=False) == '[https://community.kde.org/Get\\_Involved](https://community.kde.org/Get_Involved)' @@ -189,6 +188,7 @@ def test_a_spaces(): def test_a_with_title(): text = md('Google') assert text == r'[Google](http://google.com "The \"Goog\"")' + assert md('https://google.com', default_title=True) == '[https://google.com](https://google.com "https://google.com")' def test_a_shortcut(): @@ -197,8 +197,7 @@ def test_a_shortcut(): def test_a_no_autolinks(): - text = md('http://google.com', autolinks=False) - assert text == '[http://google.com](http://google.com)' + assert md('https://google.com', autolinks=False) == '[https://google.com](https://google.com)' def test_b(): From 6f3732307d8b2e30e9dbc2f0099a5b3023d3a20e Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Sun, 30 May 2021 18:56:46 +0200 Subject: [PATCH 2/4] restructured test files --- tests/test_advanced.py | 11 ++ tests/test_conversions.py | 320 ++++++-------------------------------- tests/test_lists.py | 73 +++++++++ tests/test_tables.py | 130 ++++++++++++++++ 4 files changed, 264 insertions(+), 270 deletions(-) create mode 100644 tests/test_lists.py create mode 100644 tests/test_tables.py diff --git a/tests/test_advanced.py b/tests/test_advanced.py index 846d8aa..14bf3cd 100644 --- a/tests/test_advanced.py +++ b/tests/test_advanced.py @@ -1,6 +1,17 @@ from markdownify import markdownify as md +def test_chomp(): + assert md(' ') == ' ' + assert md(' ') == ' ' + assert md(' ') == ' ' + assert md(' ') == ' ' + assert md(' s ') == ' **s** ' + assert md(' s ') == ' **s** ' + assert md(' s ') == ' **s** ' + assert md(' s ') == ' **s** ' + + def test_nested(): text = md('

This is an example link.

') assert text == 'This is an [example link](http://example.com/).\n\n' diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 5c544ed..b90067a 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -1,174 +1,13 @@ from markdownify import markdownify as md, ATX, ATX_CLOSED, BACKSLASH, UNDERSCORE -nested_uls = """ -
    -
  • 1 -
      -
    • a -
        -
      • I
      • -
      • II
      • -
      • III
      • -
      -
    • -
    • b
    • -
    • c
    • -
    -
  • -
  • 2
  • -
  • 3
  • -
""" - -nested_ols = """ -
    -
  1. 1 -
      -
    1. a -
        -
      1. I
      2. -
      3. II
      4. -
      5. III
      6. -
      -
    2. -
    3. b
    4. -
    5. c
    6. -
    -
  2. -
  3. 2
  4. -
  5. 3
  6. - """ - - -table = """ - - - - - - - - - - - - - - - -
    FirstnameLastnameAge
    JillSmith50
    EveJackson94
    """ - - -table_with_html_content = """ - - - - - - - - - - - - - - - -
    FirstnameLastnameAge
    JillSmith50
    EveJackson94
    """ - - -table_with_header_column = """ - - - - - - - - - - - - - - - -
    FirstnameLastnameAge
    JillSmith50
    EveJackson94
    """ - - -table_head_body = """ - - - - - - - - - - - - - - - - - - - -
    FirstnameLastnameAge
    JillSmith50
    EveJackson94
    """ - -table_missing_text = """ - - - - - - - - - - - - - - - - - - - -
    LastnameAge
    Jill50
    EveJackson94
    """ - -table_missing_head = """ - - - - - - - - - - - - - - - -
    FirstnameLastnameAge
    JillSmith50
    EveJackson94
    """ - - -def test_chomp(): - assert md(' ') == ' ' - assert md(' ') == ' ' - assert md(' ') == ' ' - assert md(' ') == ' ' - assert md(' s ') == ' **s** ' - assert md(' s ') == ' **s** ' - assert md(' s ') == ' **s** ' - assert md(' s ') == ' **s** ' +def inline_tests(tag, markup): + # test template for different inline tags + assert md(f'<{tag}>Hello') == f'{markup}Hello{markup}' + assert md(f'foo <{tag}>Hello bar') == f'foo {markup}Hello{markup} bar' + assert md(f'foo<{tag}> Hello bar') == f'foo {markup}Hello{markup} bar' + assert md(f'foo <{tag}>Hello bar') == f'foo {markup}Hello{markup} bar' + assert md(f'foo <{tag}> bar') in ['foo bar', 'foo bar'] # Either is OK def test_a(): @@ -219,58 +58,30 @@ def test_blockquote_with_paragraph(): assert md('
    Hello

    handsome

    ') == '\n> Hello\n\nhandsome\n\n' -def test_nested_blockquote(): +def test_blockquote_nested(): text = md('
    And she was like
    Hello
    ') assert text == '\n> And she was like \n> > Hello\n> \n> \n\n' def test_br(): assert md('a
    b
    c') == 'a \nb \nc' - - -def test_em(): - assert md('Hello') == '*Hello*' - - -def test_em_spaces(): - assert md('foo Hello bar') == 'foo *Hello* bar' - assert md('foo Hello bar') == 'foo *Hello* bar' - assert md('foo Hello bar') == 'foo *Hello* bar' - assert md('foo bar') == 'foo bar' - - -def inline_tests(tag, markup): - # Basically re-use test_em() and test_em_spaces(), - assert md(f'<{tag}>Hello') == f'{markup}Hello{markup}' - assert md(f'foo <{tag}>Hello bar') == f'foo {markup}Hello{markup} bar' - assert md(f'foo<{tag}> Hello bar') == f'foo {markup}Hello{markup} bar' - assert md(f'foo <{tag}>Hello bar') == f'foo {markup}Hello{markup} bar' - assert md(f'foo <{tag}> bar') in ['foo bar', 'foo bar'] # Either is OK + assert md('a
    b
    c', newline_style=BACKSLASH) == 'a\\\nb\\\nc' def test_code(): inline_tests('code', '`') -def test_samp(): - inline_tests('samp', '`') - - -def test_kbd(): - inline_tests('kbd', '`') - - -def test_pre(): - assert md('
    test\n    foo\nbar
    ') == '\n```\ntest\n foo\nbar\n```\n' - assert md('
    test\n    foo\nbar
    ') == '\n```\ntest\n foo\nbar\n```\n' - - def test_del(): inline_tests('del', '~~') -def test_s(): - inline_tests('s', '~~') +def test_div(): + assert md('Hello World') == 'Hello World' + + +def test_em(): + inline_tests('em', '*') def test_h1(): @@ -283,6 +94,8 @@ def test_h2(): def test_hn(): assert md('

    Hello

    ') == '### Hello\n\n' + assert md('

    Hello

    ') == '#### Hello\n\n' + assert md('
    Hello
    ') == '##### Hello\n\n' assert md('
    Hello
    ') == '###### Hello\n\n' @@ -329,87 +142,58 @@ def test_hn_nested_img(): assert md('

    A B

    ') == '### A ' + markdown + ' B\n\n' -def test_hr(): - assert md('Hello
    World') == 'Hello\n\n---\n\nWorld' - assert md('Hello
    World') == 'Hello\n\n---\n\nWorld' - assert md('

    Hello

    \n
    \n

    World

    ') == 'Hello\n\n\n\n\n---\n\n\nWorld\n\n' +def test_hn_atx_headings(): + assert md('

    Hello

    ', heading_style=ATX) == '# Hello\n\n' + assert md('

    Hello

    ', heading_style=ATX) == '## Hello\n\n' + + +def test_hn_atx_closed_headings(): + assert md('

    Hello

    ', heading_style=ATX_CLOSED) == '# Hello #\n\n' + assert md('

    Hello

    ', heading_style=ATX_CLOSED) == '## Hello ##\n\n' def test_head(): assert md('head') == 'head' -def test_atx_headings(): - assert md('

    Hello

    ', heading_style=ATX) == '# Hello\n\n' - assert md('

    Hello

    ', heading_style=ATX) == '## Hello\n\n' - - -def test_atx_closed_headings(): - assert md('

    Hello

    ', heading_style=ATX_CLOSED) == '# Hello #\n\n' - assert md('

    Hello

    ', heading_style=ATX_CLOSED) == '## Hello ##\n\n' +def test_hr(): + assert md('Hello
    World') == 'Hello\n\n---\n\nWorld' + assert md('Hello
    World') == 'Hello\n\n---\n\nWorld' + assert md('

    Hello

    \n
    \n

    World

    ') == 'Hello\n\n\n\n\n---\n\n\nWorld\n\n' def test_i(): assert md('Hello') == '*Hello*' -def test_ol(): - assert md('
    1. a
    2. b
    ') == '1. a\n2. b\n' - assert md('
    1. a
    2. b
    ') == '3. a\n4. b\n' - - -def test_p(): - assert md('

    hello

    ') == 'hello\n\n' - - -def test_strong(): - assert md('Hello') == '**Hello**' - - -def test_ul(): - assert md('
    • a
    • b
    ') == '* a\n* b\n' - - -def test_nested_ols(): - assert md(nested_ols) == '\n1. 1\n\t1. a\n\t\t1. I\n\t\t2. II\n\t\t3. III\n\t2. b\n\t3. c\n2. 2\n3. 3\n' - - -def test_inline_ul(): - assert md('

    foo

    • a
    • b

    bar

    ') == 'foo\n\n* a\n* b\n\nbar\n\n' - - -def test_nested_uls(): - """ - Nested ULs should alternate bullet characters. - - """ - assert md(nested_uls) == '\n* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n' - - -def test_bullets(): - assert md(nested_uls, bullets='-') == '\n- 1\n\t- a\n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n' - - -def test_li_text(): - assert md('
    • foo bar
    • foo bar
    • foo bar space.
    ') == '* foo [bar](#)\n* foo bar\n* foo **bar** *space*.\n' - - def test_img(): assert md('Alt text') == '![Alt text](/path/to/img.jpg "Optional title")' assert md('Alt text') == '![Alt text](/path/to/img.jpg)' -def test_div(): - assert md('Hello World') == 'Hello World' +def test_kbd(): + inline_tests('kbd', '`') -def test_table(): - assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_with_html_content) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n' - assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_missing_text) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_missing_head) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' +def test_p(): + assert md('

    hello

    ') == 'hello\n\n' + + +def test_pre(): + assert md('
    test\n    foo\nbar
    ') == '\n```\ntest\n foo\nbar\n```\n' + assert md('
    test\n    foo\nbar
    ') == '\n```\ntest\n foo\nbar\n```\n' + + +def test_s(): + inline_tests('s', '~~') + + +def test_samp(): + inline_tests('samp', '`') + + +def test_strong(): + assert md('Hello') == '**Hello**' def test_strong_em_symbol(): @@ -417,7 +201,3 @@ def test_strong_em_symbol(): assert md('Hello', strong_em_symbol=UNDERSCORE) == '__Hello__' assert md('Hello', strong_em_symbol=UNDERSCORE) == '_Hello_' assert md('Hello', strong_em_symbol=UNDERSCORE) == '_Hello_' - - -def test_newline_style(): - assert md('a
    b
    c', newline_style=BACKSLASH) == 'a\\\nb\\\nc' diff --git a/tests/test_lists.py b/tests/test_lists.py new file mode 100644 index 0000000..08c82c7 --- /dev/null +++ b/tests/test_lists.py @@ -0,0 +1,73 @@ +from markdownify import markdownify as md + + +nested_uls = """ +
      +
    • 1 +
        +
      • a +
          +
        • I
        • +
        • II
        • +
        • III
        • +
        +
      • +
      • b
      • +
      • c
      • +
      +
    • +
    • 2
    • +
    • 3
    • +
    """ + +nested_ols = """ +
      +
    1. 1 +
        +
      1. a +
          +
        1. I
        2. +
        3. II
        4. +
        5. III
        6. +
        +
      2. +
      3. b
      4. +
      5. c
      6. +
      +
    2. +
    3. 2
    4. +
    5. 3
    6. + """ + + +def test_ol(): + assert md('
      1. a
      2. b
      ') == '1. a\n2. b\n' + assert md('
      1. a
      2. b
      ') == '3. a\n4. b\n' + + +def test_nested_ols(): + assert md(nested_ols) == '\n1. 1\n\t1. a\n\t\t1. I\n\t\t2. II\n\t\t3. III\n\t2. b\n\t3. c\n2. 2\n3. 3\n' + + +def test_ul(): + assert md('
      • a
      • b
      ') == '* a\n* b\n' + + +def test_inline_ul(): + assert md('

      foo

      • a
      • b

      bar

      ') == 'foo\n\n* a\n* b\n\nbar\n\n' + + +def test_nested_uls(): + """ + Nested ULs should alternate bullet characters. + + """ + assert md(nested_uls) == '\n* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n' + + +def test_bullets(): + assert md(nested_uls, bullets='-') == '\n- 1\n\t- a\n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n' + + +def test_li_text(): + assert md('
      • foo bar
      • foo bar
      • foo bar space.
      ') == '* foo [bar](#)\n* foo bar\n* foo **bar** *space*.\n' diff --git a/tests/test_tables.py b/tests/test_tables.py new file mode 100644 index 0000000..e481e92 --- /dev/null +++ b/tests/test_tables.py @@ -0,0 +1,130 @@ +from markdownify import markdownify as md + + +table = """ + + + + + + + + + + + + + + + +
      FirstnameLastnameAge
      JillSmith50
      EveJackson94
      """ + + +table_with_html_content = """ + + + + + + + + + + + + + + + +
      FirstnameLastnameAge
      JillSmith50
      EveJackson94
      """ + + +table_with_header_column = """ + + + + + + + + + + + + + + + +
      FirstnameLastnameAge
      JillSmith50
      EveJackson94
      """ + + +table_head_body = """ + + + + + + + + + + + + + + + + + + + +
      FirstnameLastnameAge
      JillSmith50
      EveJackson94
      """ + +table_missing_text = """ + + + + + + + + + + + + + + + + + + + +
      LastnameAge
      Jill50
      EveJackson94
      """ + +table_missing_head = """ + + + + + + + + + + + + + + + +
      FirstnameLastnameAge
      JillSmith50
      EveJackson94
      """ + + +def test_table(): + assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_html_content) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_missing_text) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n' + assert md(table_missing_head) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' From a6a31624ad4e0fb15f17d29853d33c34bb7e20c2 Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Sun, 30 May 2021 19:07:43 +0200 Subject: [PATCH 3/4] add options for sub and sup tags fixes #44 --- README.rst | 5 +++++ markdownify/__init__.py | 14 ++++++++++---- tests/test_conversions.py | 10 ++++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index e4004ea..6e914b7 100644 --- a/README.rst +++ b/README.rst @@ -84,6 +84,11 @@ strong_em_symbol *emphasized* texts. Either of these symbols can be chosen by the options ``ASTERISK`` (default) or ``UNDERSCORE`` respectively. +sub_symbol, sup_symbol + Define the chars that surround ```` and ```` text. Defaults to an + empty string, because this is non-standard behavior. Could be something like + ``~`` and ``^`` to result in ``~sub~`` and ``^sup^``. + newline_style Defines the style of marking linebreaks (``
      ``) in markdown. The default value ``SPACES`` of this option will adopt the usual two spaces and a newline, diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 013b1b4..ffde006 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -66,14 +66,16 @@ def _todict(obj): class MarkdownConverter(object): class DefaultOptions: - strip = None - convert = None autolinks = True + bullets = '*+-' # An iterable of bullet types. + convert = None default_title = False heading_style = UNDERLINED - bullets = '*+-' # An iterable of bullet types. - strong_em_symbol = ASTERISK newline_style = SPACES + strip = None + strong_em_symbol = ASTERISK + sub_symbol = '' + sup_symbol = '' class Options(DefaultOptions): pass @@ -325,6 +327,10 @@ class MarkdownConverter(object): convert_samp = convert_code + convert_sub = abstract_inline_conversion(lambda self: self.options['sub_symbol']) + + convert_sup = abstract_inline_conversion(lambda self: self.options['sup_symbol']) + def convert_table(self, el, text, convert_as_inline): return '\n\n' + text + '\n' diff --git a/tests/test_conversions.py b/tests/test_conversions.py index b90067a..b88d887 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -201,3 +201,13 @@ def test_strong_em_symbol(): assert md('Hello', strong_em_symbol=UNDERSCORE) == '__Hello__' assert md('Hello', strong_em_symbol=UNDERSCORE) == '_Hello_' assert md('Hello', strong_em_symbol=UNDERSCORE) == '_Hello_' + + +def test_sub(): + assert md('foo') == 'foo' + assert md('foo', sub_symbol='~') == '~foo~' + + +def test_sup(): + assert md('foo') == 'foo' + assert md('foo', sup_symbol='^') == '^foo^' From cec570fc49d30e2725902269f913c48964f4bbf3 Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Sun, 30 May 2021 19:10:31 +0200 Subject: [PATCH 4/4] bump to v0.9.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 370bc7f..126526e 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read() pkgmeta = { '__title__': 'markdownify', '__author__': 'Matthew Tretter', - '__version__': '0.8.1', + '__version__': '0.9.0', }