Merge pull request #2 from HaoLiHaiO/feat/combine-scripts
Make image dl optional with a flag
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
.venv
|
||||
md_output/
|
||||
13
README.md
13
README.md
@@ -20,6 +20,19 @@ pip3 install -r requirements.txt
|
||||
python3 wiki-to-md.py <topic_name>
|
||||
```
|
||||
|
||||
Specifying if you want to download the images is optional. It is set to `yes`
|
||||
by default, you can set it to `no`.
|
||||
|
||||
```bash
|
||||
python3 wiki-to-md.py --image-dl=no <topic_name>
|
||||
```
|
||||
|
||||
For help:
|
||||
|
||||
```bash
|
||||
python3 wiki-to-md.py --help
|
||||
```
|
||||
|
||||
## Output
|
||||
|
||||
The output is a Markdown file with the same name as the topic name under the newly created directory `md_output` if using `wiki-to-md.py`. If you want to download images too, use the `wiki-to-md-images.py` file and the images will be placed inside `md_output/images/`.
|
||||
|
||||
@@ -6,7 +6,7 @@ import requests
|
||||
import urllib.parse
|
||||
|
||||
|
||||
def generate_markdown(topic):
|
||||
def generate_markdown(topic, download_images):
|
||||
try:
|
||||
page = wikipedia.page(topic)
|
||||
except wikipedia.exceptions.DisambiguationError as e:
|
||||
@@ -32,21 +32,22 @@ def generate_markdown(topic):
|
||||
output_directory = "md_output"
|
||||
os.makedirs(output_directory, exist_ok=True)
|
||||
|
||||
# Create a directory for image files
|
||||
image_directory = os.path.join(output_directory, "images")
|
||||
os.makedirs(image_directory, exist_ok=True)
|
||||
if download_images:
|
||||
# Create a directory for image files
|
||||
image_directory = os.path.join(output_directory, "images")
|
||||
os.makedirs(image_directory, exist_ok=True)
|
||||
|
||||
for image_url in page.images:
|
||||
image_filename = urllib.parse.unquote(os.path.basename(image_url))
|
||||
image_path = os.path.join(image_directory, image_filename)
|
||||
image_data = requests.get(image_url).content
|
||||
with open(image_path, "wb") as image_file:
|
||||
image_file.write(image_data)
|
||||
markdown_text += f"\n"
|
||||
for image_url in page.images:
|
||||
image_filename = urllib.parse.unquote(os.path.basename(image_url))
|
||||
image_path = os.path.join(image_directory, image_filename)
|
||||
image_data = requests.get(image_url).content
|
||||
with open(image_path, "wb") as image_file:
|
||||
image_file.write(image_data)
|
||||
markdown_text += f"\n"
|
||||
|
||||
filename = os.path.join(output_directory, f'{topic.replace(" ", "_")}.md')
|
||||
|
||||
with open(filename, "w") as md_file:
|
||||
with open(filename, "w", encoding="utf-8") as md_file:
|
||||
md_file.write(markdown_text)
|
||||
|
||||
print(f"Markdown file created: {filename}")
|
||||
@@ -61,9 +62,16 @@ parser.add_argument(
|
||||
type=str,
|
||||
help="The topic to generate a markdown file for.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dl-image",
|
||||
choices=['yes', 'no'],
|
||||
default='yes',
|
||||
help="Specify whether to download images (yes or no).",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
topic = f"{args.topic}"
|
||||
download_images = args.dl_image == 'yes'
|
||||
|
||||
generate_markdown(topic)
|
||||
generate_markdown(topic, download_images)
|
||||
|
||||
@@ -1,55 +0,0 @@
|
||||
import os
|
||||
import wikipedia
|
||||
import argparse
|
||||
import re
|
||||
|
||||
|
||||
def generate_markdown(topic):
|
||||
try:
|
||||
page = wikipedia.page(topic)
|
||||
except wikipedia.exceptions.DisambiguationError as e:
|
||||
print(e.options)
|
||||
return None
|
||||
except wikipedia.exceptions.PageError:
|
||||
print(f"Page not found for the topic: {topic}")
|
||||
return None
|
||||
|
||||
markdown_text = f"# {topic}\n\n"
|
||||
|
||||
page_content = re.sub(r"=== ([^=]+) ===", r"### \1", page.content)
|
||||
page_content = re.sub(r"== ([^=]+) ==", r"## \1", page_content)
|
||||
|
||||
sections = re.split(r"\n(## .*)\n", page_content)
|
||||
for i in range(0, len(sections), 2):
|
||||
if i + 1 < len(sections) and any(
|
||||
line.strip() for line in sections[i + 1].split("\n")
|
||||
):
|
||||
markdown_text += f"{sections[i]}\n{sections[i+1]}\n\n"
|
||||
|
||||
# Create a directory for markdown files
|
||||
directory = "md_output"
|
||||
os.makedirs(directory, exist_ok=True)
|
||||
|
||||
filename = os.path.join(directory, f"{topic.replace(' ', '_')}.md")
|
||||
|
||||
with open(filename, "w") as md_file:
|
||||
md_file.write(markdown_text)
|
||||
|
||||
print(f"Markdown file created: {filename}")
|
||||
return filename
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate a markdown file for a provided topic."
|
||||
)
|
||||
parser.add_argument(
|
||||
"topic",
|
||||
type=str,
|
||||
help="The topic to generate a markdown file for.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
topic = f"{args.topic}"
|
||||
|
||||
generate_markdown(topic)
|
||||
Reference in New Issue
Block a user