implement a strip_pre configuration option (#218) (#222)

Signed-off-by: chrispy <chrispy@synopsys.com>
2025-06-14 16:37:47 -04:00
parent 75ab3064dd
commit 9b1412aa5b
4 changed files with 45 additions and 3 deletions
--- a/README.rst
+++ b/README.rst
@@ -157,6 +157,12 @@ strip_document
  within the document are unaffected.
  Defaults to ``STRIP``.
 strip_pre
  Controls whether leading/trailing blank lines are removed from ``<pre>``
  tags. Supported values are ``STRIP`` (all leading/trailing blank lines),
  ``STRIP_ONE`` (one leading/trailing blank line), and ``None`` (neither).
  Defaults to ``STRIP``.
 bs4_options
  Specify additional configuration options for the ``BeautifulSoup`` object
  used to interpret the HTML markup. String and list values (such as ``lxml``
--- a/markdownify/init.py
+++ b/markdownify/init.py
@@ -11,6 +11,10 @@ re_whitespace = re.compile(r'[\t ]+')
 re_all_whitespace = re.compile(r'[\t \r\n]+')
 re_newline_whitespace = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*')
 re_html_heading = re.compile(r'h(\d+)')
 re_pre_lstrip1 = re.compile(r'^ *\n')
 re_pre_rstrip1 = re.compile(r'\n *$')
 re_pre_lstrip = re.compile(r'^[ \n]*\n')
 re_pre_rstrip = re.compile(r'[ \n]*$')
 # Pattern for creating convert_<tag> function names from tag names
 re_make_convert_fn_name = re.compile(r'[\[\]:-]')
@@ -51,10 +55,25 @@ BACKSLASH = 'backslash'
 ASTERISK = '*'
 UNDERSCORE = '_'
-# Document strip styles
+# Document/pre strip styles
 LSTRIP = 'lstrip'
 RSTRIP = 'rstrip'
 STRIP = 'strip'
 STRIP_ONE = 'strip_one'
 def strip1_pre(text):
    """Strip one leading and trailing newline from a <pre> string."""
    text = re_pre_lstrip1.sub('', text)
    text = re_pre_rstrip1.sub('', text)
    return text
 def strip_pre(text):
    """Strip all leading and trailing newlines from a <pre> string."""
    text = re_pre_lstrip.sub('', text)
    text = re_pre_rstrip.sub('', text)
    return text
 def chomp(text):
@@ -168,6 +187,7 @@ class MarkdownConverter(object):
        newline_style = SPACES
        strip = None
        strip_document = STRIP
        strip_pre = STRIP
        strong_em_symbol = ASTERISK
        sub_symbol = ''
        sup_symbol = ''
@@ -656,6 +676,15 @@ class MarkdownConverter(object):
        if self.options['code_language_callback']:
            code_language = self.options['code_language_callback'](el) or code_language
        if self.options['strip_pre'] == STRIP:
            text = strip_pre(text)  # remove all leading/trailing newlines
        elif self.options['strip_pre'] == STRIP_ONE:
            text = strip1_pre(text)  # remove one leading/trailing newline
        elif self.options['strip_pre'] is None:
            pass  # leave leading and trailing newlines as-is
        else:
            raise ValueError('Invalid value for strip_pre: %s' % self.options['strip_pre'])
        return '\n\n```%s\n%s\n```\n\n' % (code_language, text)
    def convert_q(self, el, text, parent_tags):
--- a/tests/test_args.py
+++ b/tests/test_args.py
@@ -2,7 +2,7 @@
 Test whitelisting/blacklisting of specific tags.
 """
-from markdownify import markdownify, LSTRIP, RSTRIP, STRIP
+from markdownify import markdownify, LSTRIP, RSTRIP, STRIP, STRIP_ONE
 from .utils import md
@@ -34,6 +34,13 @@ def test_strip_document():
    assert markdownify("<p>Hello</p>", strip_document=None) == "\n\nHello\n\n"
 def test_strip_pre():
    assert markdownify("<pre>  \n  \n  Hello  \n  \n  </pre>") == "```\n  Hello\n```"
    assert markdownify("<pre>  \n  \n  Hello  \n  \n  </pre>", strip_pre=STRIP) == "```\n  Hello\n```"
    assert markdownify("<pre>  \n  \n  Hello  \n  \n  </pre>", strip_pre=STRIP_ONE) == "```\n  \n  Hello  \n  \n```"
    assert markdownify("<pre>  \n  \n  Hello  \n  \n  </pre>", strip_pre=None) == "```\n  \n  \n  Hello  \n  \n  \n```"
 def bs4_options():
    assert markdownify("<p>Hello</p>", bs4_options="html.parser") == "Hello"
    assert markdownify("<p>Hello</p>", bs4_options=["html.parser"]) == "Hello"
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -370,4 +370,4 @@ def test_spaces():
    assert md('test <blockquote> text </blockquote> after') == 'test\n> text\n\nafter'
    assert md(' <ol> <li> x </li> <li> y </li> </ol> ') == '\n\n1. x\n2. y\n'
    assert md(' <ul> <li> x </li> <li> y </li> </ol> ') == '\n\n* x\n* y\n'
-    assert md('test <pre> foo </pre> bar') == 'test\n\n```\n foo \n```\n\nbar'
+    assert md('test <pre> foo </pre> bar') == 'test\n\n```\n foo\n```\n\nbar'