diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index efb2d15..a37f870 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -7,7 +7,8 @@ import six
convert_heading_re = re.compile(r'convert_h(\d+)')
line_beginning_re = re.compile(r'^', re.MULTILINE)
whitespace_re = re.compile(r'[\t ]+')
-all_whitespace_re = re.compile(r'[\s]+')
+all_whitespace_re = re.compile(r'[\t \r\n]+')
+newline_whitespace_re = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*')
html_heading_re = re.compile(r'h[1-6]')
@@ -168,7 +169,11 @@ class MarkdownConverter(object):
# normalize whitespace if we're not inside a preformatted element
if not el.find_parent('pre'):
- text = whitespace_re.sub(' ', text)
+ if self.options['wrap']:
+ text = all_whitespace_re.sub(' ', text)
+ else:
+ text = newline_whitespace_re.sub('\n', text)
+ text = whitespace_re.sub(' ', text)
# escape special characters if we're not inside a preformatted or code element
if not el.find_parent(['pre', 'code', 'kbd', 'samp']):
@@ -286,6 +291,7 @@ class MarkdownConverter(object):
if style == UNDERLINED and n <= 2:
line = '=' if n == 1 else '-'
return self.underline(text, line)
+ text = all_whitespace_re.sub(' ', text)
hashes = '#' * n
if style == ATX_CLOSED:
return '\n%s %s %s\n\n' % (hashes, text, hashes)
@@ -351,10 +357,21 @@ class MarkdownConverter(object):
if convert_as_inline:
return text
if self.options['wrap']:
- text = fill(text,
- width=self.options['wrap_width'],
- break_long_words=False,
- break_on_hyphens=False)
+ # Preserve newlines (and preceding whitespace) resulting
+ # from
tags. Newlines in the input have already been
+ # replaced by spaces.
+ lines = text.split('\n')
+ new_lines = []
+ for line in lines:
+ line = line.lstrip()
+ line_no_trailing = line.rstrip()
+ trailing = line[len(line_no_trailing):]
+ line = fill(line,
+ width=self.options['wrap_width'],
+ break_long_words=False,
+ break_on_hyphens=False)
+ new_lines.append(line + trailing)
+ text = '\n'.join(new_lines)
return '\n\n%s\n\n' % text if text else ''
def convert_pre(self, el, text, convert_as_inline):
diff --git a/tests/test_basic.py b/tests/test_basic.py
index bf25ee0..66f8b6c 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -11,3 +11,4 @@ def test_soup():
def test_whitespace():
assert md(' a b \t\t c ') == ' a b c '
+ assert md(' a b \n\n c ') == ' a b\nc '
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index baa294b..9c1edc3 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -1,4 +1,4 @@
-from markdownify import markdownify as md, ATX, ATX_CLOSED, BACKSLASH, UNDERSCORE
+from markdownify import markdownify as md, ATX, ATX_CLOSED, BACKSLASH, SPACES, UNDERSCORE
def inline_tests(tag, markup):
@@ -113,6 +113,7 @@ def test_em():
def test_header_with_space():
assert md('
B
B
Bhello
') == '\n\nhello\n\n' assert md('123456789 123456789
') == '\n\n123456789 123456789\n\n' + assert md('123456789\n\n\n123456789
') == '\n\n123456789\n123456789\n\n' + assert md('123456789\n\n\n123456789
', wrap=True, wrap_width=80) == '\n\n123456789 123456789\n\n' assert md('123456789 123456789
', wrap=True, wrap_width=10) == '\n\n123456789\n123456789\n\n' assert md('', wrap=True, wrap_width=10) == '\n\n[Some long\nlink](https://example.com)\n\n' assert md('12345
67890
12345
67890
12345
67890
12345
67890
12345678901
12345
12345678901
12345
12345678901
12345
12345678901
12345
1234 5678 9012
67890
1234 5678 9012
67890
Second
Third
Fourth') == 'First\n\nSecond\n\nThird\n\nFourth' diff --git a/tests/test_tables.py b/tests/test_tables.py index 594e5bf..fc6eee6 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -242,7 +242,7 @@ def test_table(): assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_with_html_content) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n' assert md(table_with_paragraphs) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' - assert md(table_with_linebreaks) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n' + assert md(table_with_linebreaks) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n' assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_head_body_missing_head) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'