From 7bdeb15b187708dac21d57b657b183910cc94be6 Mon Sep 17 00:00:00 2001 From: Matthew Tretter Date: Wed, 31 Jul 2013 18:01:46 -0400 Subject: [PATCH] Use bs4 This causes a lot more tests to fail. But it'll be worth it in the end. --- markdownify/__init__.py | 23 ++++++++++++----------- setup.py | 3 +-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 496b12e..11c6270 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -1,4 +1,4 @@ -from lxml.html.soupparser import fromstring +from bs4 import BeautifulSoup, NavigableString import re @@ -22,22 +22,23 @@ class MarkdownConverter(object): self.tags_to_convert = tags_to_convert def convert(self, html): - soup = fromstring(html) + soup = BeautifulSoup(html) return self.process_tag(soup) def process_tag(self, node): - text = self.process_text(node.text) + text = '' # Convert the children first - for el in node.findall('*'): - text += self.process_tag(el) + for el in node.children: + if isinstance(el, NavigableString): + text += self.process_text(unicode(el)) + else: + text += self.process_tag(el) - convert_fn = getattr(self, 'convert_%s' % node.tag, None) - if convert_fn and self.should_convert_tag(node.tag): + convert_fn = getattr(self, 'convert_%s' % node.name, None) + if convert_fn and self.should_convert_tag(node.name): text = convert_fn(node, text) - text += self.process_text(node.tail) - return text def process_text(self, text): @@ -102,8 +103,8 @@ class MarkdownConverter(object): return self.convert_em(el, text) def convert_li(self, el, text): - parent = el.getparent() - if parent is not None and parent.tag == 'ol': + parent = el.parent + if parent is not None and parent.name == 'ol': bullet = '%s.' % (parent.index(el) + 1) else: bullet = '*' diff --git a/setup.py b/setup.py index 2c52d01..7a3414c 100644 --- a/setup.py +++ b/setup.py @@ -75,8 +75,7 @@ setup( 'pytest', ], install_requires=[ - 'lxml', - 'BeautifulSoup', + 'beautifulsoup4', ], classifiers=[ 'Environment :: Web Environment',