This causes a lot more tests to fail. But it'll be worth it in the end.
This commit is contained in:
Matthew Tretter
2013-07-31 18:01:46 -04:00
parent 87c8f3bd5e
commit 7bdeb15b18
2 changed files with 13 additions and 13 deletions

View File

@@ -1,4 +1,4 @@
from lxml.html.soupparser import fromstring from bs4 import BeautifulSoup, NavigableString
import re import re
@@ -22,22 +22,23 @@ class MarkdownConverter(object):
self.tags_to_convert = tags_to_convert self.tags_to_convert = tags_to_convert
def convert(self, html): def convert(self, html):
soup = fromstring(html) soup = BeautifulSoup(html)
return self.process_tag(soup) return self.process_tag(soup)
def process_tag(self, node): def process_tag(self, node):
text = self.process_text(node.text) text = ''
# Convert the children first # Convert the children first
for el in node.findall('*'): for el in node.children:
text += self.process_tag(el) if isinstance(el, NavigableString):
text += self.process_text(unicode(el))
else:
text += self.process_tag(el)
convert_fn = getattr(self, 'convert_%s' % node.tag, None) convert_fn = getattr(self, 'convert_%s' % node.name, None)
if convert_fn and self.should_convert_tag(node.tag): if convert_fn and self.should_convert_tag(node.name):
text = convert_fn(node, text) text = convert_fn(node, text)
text += self.process_text(node.tail)
return text return text
def process_text(self, text): def process_text(self, text):
@@ -102,8 +103,8 @@ class MarkdownConverter(object):
return self.convert_em(el, text) return self.convert_em(el, text)
def convert_li(self, el, text): def convert_li(self, el, text):
parent = el.getparent() parent = el.parent
if parent is not None and parent.tag == 'ol': if parent is not None and parent.name == 'ol':
bullet = '%s.' % (parent.index(el) + 1) bullet = '%s.' % (parent.index(el) + 1)
else: else:
bullet = '*' bullet = '*'

View File

@@ -75,8 +75,7 @@ setup(
'pytest', 'pytest',
], ],
install_requires=[ install_requires=[
'lxml', 'beautifulsoup4',
'BeautifulSoup',
], ],
classifiers=[ classifiers=[
'Environment :: Web Environment', 'Environment :: Web Environment',