Identify and single out HTML fragment
This commit is contained in:
@@ -5,6 +5,8 @@ import re
|
||||
convert_heading_re = re.compile(r'convert_h(\d+)')
|
||||
line_beginning_re = re.compile(r'^', re.MULTILINE)
|
||||
whitespace_re = re.compile(r'[\r\n\s\t ]+')
|
||||
FRAGMENT_ID = '__MARKDOWNIFY_WRAPPER__'
|
||||
wrapped = '<div id="%s">%%s</div>' % FRAGMENT_ID
|
||||
|
||||
|
||||
def escape(text):
|
||||
@@ -22,10 +24,14 @@ class MarkdownConverter(object):
|
||||
self.tags_to_convert = tags_to_convert
|
||||
|
||||
def convert(self, html):
|
||||
# We want to take advantage of the html5 parsing, but we don't actually
|
||||
# want a full document. Therefore, we'll mark our fragment with an id,
|
||||
# create the document, and extract the element with the id.
|
||||
html = wrapped % html
|
||||
soup = BeautifulSoup(html)
|
||||
return self.process_tag(soup)
|
||||
return self.process_tag(soup.find(id=FRAGMENT_ID), children_only=True)
|
||||
|
||||
def process_tag(self, node):
|
||||
def process_tag(self, node, children_only=False):
|
||||
text = ''
|
||||
|
||||
# Convert the children first
|
||||
@@ -35,9 +41,10 @@ class MarkdownConverter(object):
|
||||
else:
|
||||
text += self.process_tag(el)
|
||||
|
||||
convert_fn = getattr(self, 'convert_%s' % node.name, None)
|
||||
if convert_fn and self.should_convert_tag(node.name):
|
||||
text = convert_fn(node, text)
|
||||
if not children_only:
|
||||
convert_fn = getattr(self, 'convert_%s' % node.name, None)
|
||||
if convert_fn and self.should_convert_tag(node.name):
|
||||
text = convert_fn(node, text)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
Reference in New Issue
Block a user