Use bs4
This causes a lot more tests to fail. But it'll be worth it in the end.
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
from lxml.html.soupparser import fromstring
|
from bs4 import BeautifulSoup, NavigableString
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
@@ -22,22 +22,23 @@ class MarkdownConverter(object):
|
|||||||
self.tags_to_convert = tags_to_convert
|
self.tags_to_convert = tags_to_convert
|
||||||
|
|
||||||
def convert(self, html):
|
def convert(self, html):
|
||||||
soup = fromstring(html)
|
soup = BeautifulSoup(html)
|
||||||
return self.process_tag(soup)
|
return self.process_tag(soup)
|
||||||
|
|
||||||
def process_tag(self, node):
|
def process_tag(self, node):
|
||||||
text = self.process_text(node.text)
|
text = ''
|
||||||
|
|
||||||
# Convert the children first
|
# Convert the children first
|
||||||
for el in node.findall('*'):
|
for el in node.children:
|
||||||
text += self.process_tag(el)
|
if isinstance(el, NavigableString):
|
||||||
|
text += self.process_text(unicode(el))
|
||||||
|
else:
|
||||||
|
text += self.process_tag(el)
|
||||||
|
|
||||||
convert_fn = getattr(self, 'convert_%s' % node.tag, None)
|
convert_fn = getattr(self, 'convert_%s' % node.name, None)
|
||||||
if convert_fn and self.should_convert_tag(node.tag):
|
if convert_fn and self.should_convert_tag(node.name):
|
||||||
text = convert_fn(node, text)
|
text = convert_fn(node, text)
|
||||||
|
|
||||||
text += self.process_text(node.tail)
|
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def process_text(self, text):
|
def process_text(self, text):
|
||||||
@@ -102,8 +103,8 @@ class MarkdownConverter(object):
|
|||||||
return self.convert_em(el, text)
|
return self.convert_em(el, text)
|
||||||
|
|
||||||
def convert_li(self, el, text):
|
def convert_li(self, el, text):
|
||||||
parent = el.getparent()
|
parent = el.parent
|
||||||
if parent is not None and parent.tag == 'ol':
|
if parent is not None and parent.name == 'ol':
|
||||||
bullet = '%s.' % (parent.index(el) + 1)
|
bullet = '%s.' % (parent.index(el) + 1)
|
||||||
else:
|
else:
|
||||||
bullet = '*'
|
bullet = '*'
|
||||||
|
|||||||
Reference in New Issue
Block a user