1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63import os
import requests
import argparse
import logging
from markdownify import markdownify as md
def fetch_posts(blog_url, api_key):
# Extract the blog ID
blog_id_url = f'https://www.googleapis.com/blogger/v3/blogs/byurl?key={api_key}&url={blog_url}'
blog_id_response = requests.get(blog_id_url)
blog_id_response.raise_for_status()
blog_id = blog_id_response.json()['id']
# Fetch posts
posts_url = f'https://www.googleapis.com/blogger/v3/blogs/{blog_id}/posts?key={api_key}&maxResults=500'
response = requests.get(posts_url)
response.raise_for_status()
return response.json().get('items', [])
def save_markdown(post, directory):
title = post['title']
content_html = post['content']
content_md = md(content_html)
publish_date = post['published'][:10] # Extract YYYY-MM-DD from the published date
original_url = post['url'] # Get the original URL of the post
filename = f"{publish_date}_{title.replace(' ', '_').replace('/', '_')}.md"
filepath = os.path.join(directory, filename)
with open(filepath, 'w', encoding='utf-8') as f:
f.write(f"# {title}\n\n")
f.write(f"> Original URL: {original_url}\n\n") # Add the original URL
f.write(content_md)
def main():
parser = argparse.ArgumentParser(description="Export Blogspot posts to Markdown files.")
parser.add_argument('--blog-url', required=True, help='URL of the Blogspot blog to export.')
parser.add_argument('--api-key', required=True, help='Blogger API key.')
parser.add_argument('--output-dir', default='markdown_posts', help='Directory to save Markdown files.')
args = parser.parse_args()
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler()
]
)
logging.info("Starting the export process.")
os.makedirs(args.output_dir, exist_ok=True)
posts = fetch_posts(args.blog_url, args.api_key)
logging.info(f"Fetched {len(posts)} posts.")
for idx, post in enumerate(posts, start=1):
logging.info(f"Exporting post {idx}/{len(posts)}: {post['title']}")
save_markdown(post, args.output_dir)
logging.info(f"Successfully exported: {post['title']}")
logging.info(f"Exported {len(posts)} posts to the '{args.output_dir}' directory.")
if __name__ == "__main__":
main()