From 7780f82c302483a5537175f435d271d66cfc4d84 Mon Sep 17 00:00:00 2001 From: Igor Dvorkin Date: Fri, 11 Dec 2020 16:54:14 -0800 Subject: [PATCH] Using a regexp to determine if a tag is a heading. --- markdownify/__init__.py | 3 ++- tests/test_conversions.py | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 0a376a7..cb12d43 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -6,6 +6,7 @@ import six convert_heading_re = re.compile(r'convert_h(\d+)') line_beginning_re = re.compile(r'^', re.MULTILINE) whitespace_re = re.compile(r'[\r\n\s\t ]+') +html_heading_re = re.compile(r'h[1-6]') # Heading styles @@ -66,7 +67,7 @@ class MarkdownConverter(object): def process_tag(self, node, convert_as_inline, children_only=False): text = '' # markdown headings can't include block elements (elements w/newlines) - isHeading = node.name.startswith('h') + isHeading = html_heading_re.match(node.name) is not None convert_children_as_inline = convert_as_inline if not children_only and isHeading: diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 65dbfd2..ab1ce05 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -115,6 +115,14 @@ def test_hn_nested_tag(): assert md('

A
BQ
C

') == '### A BQ C\n\n' +def test_hr(): + assert md('
hr') == 'hr' + + +def test_head(): + assert md('head') == 'head' + + def test_atx_headings(): assert md('

Hello

', heading_style=ATX) == '# Hello\n\n' assert md('

Hello

', heading_style=ATX) == '## Hello\n\n'