diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 5575ca4..355b9fa 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -4,6 +4,7 @@ import re convert_heading_re = re.compile(r'convert_h(\d+)') +whitespace_re = re.compile(r'[\r\n\s\t ]+') def escape(text): @@ -26,14 +27,14 @@ class MarkdownConverter(object): return soup.text def process_tag(self, node): - text = escape(node.text) + text = self.process_text(node.text) # Convert the children first for el in node.findall('*'): self.process_tag(el) convert_fn = getattr(self, 'convert_%s' % el.tag, None) - tail = escape(el.tail) + tail = self.process_text(el.tail) el.tail = '' if convert_fn: @@ -48,6 +49,9 @@ class MarkdownConverter(object): node.text = text + def process_text(self, text): + return escape(whitespace_re.sub(' ', text or '')) + def __getattr__(self, attr): # Handle heading levels > 2 m = convert_heading_re.match(attr) diff --git a/tests.py b/tests.py index cde2d4f..3c9a9c0 100644 --- a/tests.py +++ b/tests.py @@ -10,6 +10,9 @@ class BasicTests(unittest.TestCase): def test_soup(self): self.assertEqual(md('