博客图床迁移

发表于 2024-07-20 分类于经验 Waline：

本博客之前的图床是 mdnice，但是最近 mdnice 开始收费了，不开会员图床容量只有 100 MB，毕竟是第三方，担心图床突然哪天突然失效。之前没有用博客本地图床是因为博客部署在国外服务器上，虽然用了 CDN，但是使用国内网络，如果没梯子并且文章中有图，网页可能会加载缓慢。

如今没有稳定的图床，只能放在博本地，可以对图片进行压缩，让图片加载速度更快，200 KB 的图片可以压缩为 60 KB，而且肉眼看不出来图片差别。

难点是博客中使用 mdnice 图床的图片比较多，格式为 https://files...png ，手动下载，压缩，替换路径，费时费力。

具体的实现方案是使用 Python 正则匹配文章中 url 的值，使用 requests 下载图片至本地，使用 pngquant 将图片进行压缩并覆盖原图片，最后替换掉文章中的 url 为压缩之后图片的路径。

用 Python 写了一个程序，一键替换

import os
import re
import sys

import requests

# 压缩工具
pngquant_exe = sys.path[0] + './pngquant/pngquant.exe'
images_path_out = 'images'


def compression(file_path: str):
    # https://github.com/kornelski/pngquant?tab=readme-ov-file
    # pngquant.exe --force input.png --quality 80 -o input.png # 压缩80%的质量，直接覆盖压缩至原文件
    cmd = f'{pngquant_exe} --force {file_path} --quality 80 -o {file_path}'
    os.system(cmd)


def get_files_with_extension(folder_path, extension):
    file_names = []
    # 递归遍历文件夹及其子文件夹
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith(extension):
                # 提取文件名（不包括路径）
                file_names.append(os.path.abspath(os.path.join(root, file)))
    return file_names


def replace_urls_in_file(file_path, replacement_func):
    # 读取原文件内容
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    match_count = [0]

    # 替换内容
    def replacement(match):
        match_count[0] += 1
        matched_string = match.group()
        return replacement_func(matched_string, file_path, match_count[0])

    # 定义正则表达式模式
    str_pattern = re.compile(r'https://files\.mdnice\.com\S*\.png')
    new_content = str_pattern.sub(replacement, content)
    # 将替换后的内容写回原文件
    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(new_content)


def download_png(url: str, out_path: str):
    try:
        # 发起GET请求
        response = requests.get(url, proxies={
            "http": None,
            "https": None,
        }, timeout=3)
        # 检查请求是否成功
        response.raise_for_status()
        # 将内容写入文件
        with open(out_path, 'wb') as file:
            file.write(response.content)
        # 压缩图片
        compression(out_path)
    except requests.RequestException as e:
        raise e


def dynamic_replacement(matched_string, file_path, num):
    dir_name, base_name = os.path.split(file_path)
    last_component = os.path.basename(dir_name)
    file_name, ext = os.path.splitext(base_name)
    # 你可以根据匹配的字符串生成不同的替换内容
    out_path = os.path.join(images_path_out, last_component, file_name)
    # 创建文件夹
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    out_file_path = os.path.join(out_path, f'{num}.png').replace(os.path.sep, '/')
    download_png(matched_string, out_file_path)
    return f'{out_file_path}'


if __name__ == '__main__':
    file_list = get_files_with_extension('D:/WebstormProjects/hexo-blog/source/_posts', '.md')
    for item in file_list:
        replace_urls_in_file(item, dynamic_replacement)