使用方法
安装插件,访问人民日报电子版网页,点击右上角的下载按钮,耐心等一会

代码
// ==UserScript==
// @name 人民日报 PDF 批量下载 + 合并
// @namespace http://blog.liyunhe.wang/
// @version 1.0
// @description 自动抓取人民日报当前日期所有版面 PDF 并可合并下载
// @author liyunhe && ChatGPT
// @match https://paper.people.com.cn/rmrb/html/*
// @match https://paper.people.com.cn/rmrb/pc/*
// @match http://paper.people.com.cn/rmrb/html/*
// @match http://paper.people.com.cn/rmrb/pc/*
// @grant none
// @require https://cdn.jsdelivr.net/npm/pdf-lib/dist/pdf-lib.min.js
// ==/UserScript==
(function() {
'use strict';
// 添加按钮
const btn = document.createElement('button');
btn.textContent = '📥 下载全部 PDF(含合并)';
btn.style = 'position:fixed;top:20px;right:20px;z-index:9999;padding:10px;background:#007bff;color:white;border:none;border-radius:5px;cursor:pointer;';
document.body.appendChild(btn);
btn.onclick = async () => {
btn.disabled = true;
btn.textContent = '⏳ 正在抓取数据...';
try {
const { date, detailedLinks } = await getPDFLinks(location.href);
const validLinks = detailedLinks.filter(link => link.pdfUrl);
if (validLinks.length === 0) {
alert('❌ 未找到任何 PDF 链接');
return;
}
const pdfBlobs = [];
for (let i = 0; i < validLinks.length; i++) {
const { title, pdfUrl } = validLinks[i];
btn.textContent = `⬇️ 下载中:${title} (${i + 1}/${validLinks.length})`;
const res = await fetch(pdfUrl);
const blob = await res.blob();
pdfBlobs.push({ title, blob });
}
btn.textContent = '📚 合并 PDF 中...';
const mergedBlob = await mergePDFs(pdfBlobs.map(p => p.blob));
const mergedFilename = `人民日报-${date}.pdf`;
triggerDownload(mergedBlob, mergedFilename);
btn.textContent = `✅ 下载完成:${mergedFilename}`;
} catch (err) {
console.error(err);
alert('❌ 出错:' + err.message);
} finally {
setTimeout(() => {
btn.disabled = false;
btn.textContent = '📥 下载全部 PDF(含合并)';
}, 5000);
}
};
// 抓取当前页面所有 PDF 链接
async function getPDFLinks(mainUrl) {
const res = await fetch(mainUrl);
const html = await res.text();
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
const dateMatch = doc.querySelector('.date')?.textContent.match(/(\d{4})年(\d{2})月(\d{2})日/);
const date = dateMatch ? `${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}` : 'unknown';
const pageLinks = [...doc.querySelectorAll('#pageLink')]
.map(el => ({
title: el.textContent.trim(),
url: new URL(el.getAttribute('href'), mainUrl).href
}));
const detailedLinks = await Promise.all(pageLinks.map(async ({ title, url }) => {
try {
const res = await fetch(url);
const html = await res.text();
const subDoc = new DOMParser().parseFromString(html, 'text/html');
const href = subDoc.querySelector('.paper-bot a')?.getAttribute('href');
const pdfUrl = href ? new URL(href, mainUrl).href : null;
return { title, pdfUrl };
} catch {
return { title, pdfUrl: null };
}
}));
return { date, detailedLinks };
}
// 合并多个 PDF Blob
async function mergePDFs(blobs) {
const mergedPdf = await PDFLib.PDFDocument.create();
for (const blob of blobs) {
const arrayBuffer = await blob.arrayBuffer();
const pdf = await PDFLib.PDFDocument.load(arrayBuffer);
const pages = await mergedPdf.copyPages(pdf, pdf.getPageIndices());
pages.forEach(p => mergedPdf.addPage(p));
}
const mergedBytes = await mergedPdf.save();
return new Blob([mergedBytes], { type: 'application/pdf' });
}
// 触发浏览器下载
function triggerDownload(blob, filename) {
const a = document.createElement('a');
a.href = URL.createObjectURL(blob);
a.download = filename;
a.click();
URL.revokeObjectURL(a.href);
}
})();

已有 0 条评论