diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 11c6270..6e842d8 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -5,6 +5,8 @@ import re
convert_heading_re = re.compile(r'convert_h(\d+)')
line_beginning_re = re.compile(r'^', re.MULTILINE)
whitespace_re = re.compile(r'[\r\n\s\t ]+')
+FRAGMENT_ID = '__MARKDOWNIFY_WRAPPER__'
+wrapped = '
%%s
' % FRAGMENT_ID
def escape(text):
@@ -22,10 +24,14 @@ class MarkdownConverter(object):
self.tags_to_convert = tags_to_convert
def convert(self, html):
+ # We want to take advantage of the html5 parsing, but we don't actually
+ # want a full document. Therefore, we'll mark our fragment with an id,
+ # create the document, and extract the element with the id.
+ html = wrapped % html
soup = BeautifulSoup(html)
- return self.process_tag(soup)
+ return self.process_tag(soup.find(id=FRAGMENT_ID), children_only=True)
- def process_tag(self, node):
+ def process_tag(self, node, children_only=False):
text = ''
# Convert the children first
@@ -35,9 +41,10 @@ class MarkdownConverter(object):
else:
text += self.process_tag(el)
- convert_fn = getattr(self, 'convert_%s' % node.name, None)
- if convert_fn and self.should_convert_tag(node.name):
- text = convert_fn(node, text)
+ if not children_only:
+ convert_fn = getattr(self, 'convert_%s' % node.name, None)
+ if convert_fn and self.should_convert_tag(node.name):
+ text = convert_fn(node, text)
return text