使用gulp和puppeteer将epub转为图片

9/2/2020


Image

有一批epub格式的电子书需要转化为图片,网上找了一些工具,效果不太理想。

epub转化为HTML文件

 

const { src, dest } = require("gulp");
const glob = require("glob");
const unzip = require("gulp-unzip");

async function unzipEpub() {
  const epubFiles = glob.sync("source/*.epub");

  await epubFiles.forEach((epub) =>
    src(epub)
      .pipe(unzip({ keepEmpty: true }))
      .pipe(dest(path.join("unpacked", path.basename(epub, ".epub"))))
  );
}

exports.unzipEpub = unzipEpub;

HTML转化为图片

 

const path = require("path");
const { readdirSync, mkdirSync } = require("fs");
const puppeteer = require("puppeteer");

async function epub2png() {
  function listSubDirectories(currentDir) {
    return readdirSync(currentDir, { withFileTypes: true })
      .filter((dirent) => dirent.isDirectory())
      .map((dirent) => dirent.name);
  }

  const epubXmlDirs = listSubDirectories("unpacked/");

  epubXmlDirs.forEach((epubXmlDir) => {
    mkdirSync(`images/${epubXmlDir}`);
    const epubXmls = glob.sync(`unpacked/${epubXmlDir}/OEBPS/Text/*.xhtml`);

    epubXmls.forEach(async (epubXml) => {
      const browser = await puppeteer.launch();
      const page = await browser.newPage();

      await page.setViewport({
        width: 1601,
        height: 1601,
        deviceScaleFactor: 1,
      });
      
      await page.goto(`file://${path.resolve(epubXml)}`);
      
      await page.screenshot({
        path: `images/${epubXmlDir}/${path.basename(epubXml, ".xhtml")}.png`,
      });
      await browser.close();
    });
  });
}

exports.epub2png = epub2png;

结论

Puppeteer通过Headless浏览器截屏是生成图片的一种方式。