๐Ÿ“ฆ p0n1 / blogspot_to_markdown

๐Ÿ“„ main.py ยท 63 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63import os
import requests
import argparse
import logging
from markdownify import markdownify as md

def fetch_posts(blog_url, api_key):
    # Extract the blog ID
    blog_id_url = f'https://www.googleapis.com/blogger/v3/blogs/byurl?key={api_key}&url={blog_url}'
    blog_id_response = requests.get(blog_id_url)
    blog_id_response.raise_for_status()
    blog_id = blog_id_response.json()['id']
    
    # Fetch posts
    posts_url = f'https://www.googleapis.com/blogger/v3/blogs/{blog_id}/posts?key={api_key}&maxResults=500'
    response = requests.get(posts_url)
    response.raise_for_status()
    return response.json().get('items', [])

def save_markdown(post, directory):
    title = post['title']
    content_html = post['content']
    content_md = md(content_html)
    publish_date = post['published'][:10]  # Extract YYYY-MM-DD from the published date
    original_url = post['url']  # Get the original URL of the post
    filename = f"{publish_date}_{title.replace(' ', '_').replace('/', '_')}.md"
    filepath = os.path.join(directory, filename)
    with open(filepath, 'w', encoding='utf-8') as f:
        f.write(f"# {title}\n\n")
        f.write(f"> Original URL: {original_url}\n\n")  # Add the original URL
        f.write(content_md)

def main():
    parser = argparse.ArgumentParser(description="Export Blogspot posts to Markdown files.")
    parser.add_argument('--blog-url', required=True, help='URL of the Blogspot blog to export.')
    parser.add_argument('--api-key', required=True, help='Blogger API key.')
    parser.add_argument('--output-dir', default='markdown_posts', help='Directory to save Markdown files.')
    args = parser.parse_args()

    # Configure logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.StreamHandler()
        ]
    )

    logging.info("Starting the export process.")
    os.makedirs(args.output_dir, exist_ok=True)
    posts = fetch_posts(args.blog_url, args.api_key)
    logging.info(f"Fetched {len(posts)} posts.")

    for idx, post in enumerate(posts, start=1):
        logging.info(f"Exporting post {idx}/{len(posts)}: {post['title']}")
        save_markdown(post, args.output_dir)
        logging.info(f"Successfully exported: {post['title']}")

    logging.info(f"Exported {len(posts)} posts to the '{args.output_dir}' directory.")

if __name__ == "__main__":
    main()