Use bs4

This causes a lot more tests to fail. But it'll be worth it in the end.
2013-07-31 18:01:46 -04:00
parent 87c8f3bd5e
commit 7bdeb15b18
2 changed files with 13 additions and 13 deletions
--- a/markdownify/init.py
+++ b/markdownify/init.py
@@ -1,4 +1,4 @@
-from lxml.html.soupparser import fromstring
+from bs4 import BeautifulSoup, NavigableString
 import re


@@ -22,22 +22,23 @@ class MarkdownConverter(object):
        self.tags_to_convert = tags_to_convert

    def convert(self, html):
-        soup = fromstring(html)
+        soup = BeautifulSoup(html)
        return self.process_tag(soup)

    def process_tag(self, node):
-        text = self.process_text(node.text)
+        text = ''

        # Convert the children first
-        for el in node.findall('*'):
-            text += self.process_tag(el)
+        for el in node.children:
+            if isinstance(el, NavigableString):
+                text += self.process_text(unicode(el))
+            else:
+                text += self.process_tag(el)

-        convert_fn = getattr(self, 'convert_%s' % node.tag, None)
-        if convert_fn and self.should_convert_tag(node.tag):
+        convert_fn = getattr(self, 'convert_%s' % node.name, None)
+        if convert_fn and self.should_convert_tag(node.name):
            text = convert_fn(node, text)

-        text += self.process_text(node.tail)
-
        return text

    def process_text(self, text):
@@ -102,8 +103,8 @@ class MarkdownConverter(object):
        return self.convert_em(el, text)

    def convert_li(self, el, text):
-        parent = el.getparent()
-        if parent is not None and parent.tag == 'ol':
+        parent = el.parent
+        if parent is not None and parent.name == 'ol':
            bullet = '%s.' % (parent.index(el) + 1)
        else:
            bullet = '*'
--- a/setup.py
+++ b/setup.py
@@ -75,8 +75,7 @@ setup(
        'pytest',
    ],
    install_requires=[
-        'lxml',
-        'BeautifulSoup',
+        'beautifulsoup4',
    ],
    classifiers=[
        'Environment :: Web Environment',