[LyX/master] ePub: copy images into the ePub archive.
Thibaut Cuvelier
tcuvelier at lyx.org
Sun Feb 7 19:00:04 UTC 2021
commit 7f4782d51d45b1a4770273930ff130b1d0d63091
Author: Thibaut Cuvelier <tcuvelier at lyx.org>
Date: Sun Feb 7 20:00:31 2021 +0100
ePub: copy images into the ePub archive.
---
lib/scripts/docbook2epub.py | 57 ++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 56 insertions(+), 1 deletions(-)
diff --git a/lib/scripts/docbook2epub.py b/lib/scripts/docbook2epub.py
index 28b7e4b..5799ede 100644
--- a/lib/scripts/docbook2epub.py
+++ b/lib/scripts/docbook2epub.py
@@ -13,7 +13,7 @@
from __future__ import print_function
-# import glob # Not powerful enough before Python 3.5.
+import glob
import os
import shutil
import sys
@@ -29,6 +29,7 @@ def parse_arguments():
print('Generating ePub with the following parameters:')
print(own_path)
+ print(java_path)
print(input)
print(output)
@@ -68,6 +69,59 @@ def start_xslt_transformation(input, output_dir, script_folder, java_path):
print('Generated ePub contents.')
+def get_images_from_package_opf(package_opf):
+ images = []
+
+ # Example in the OPF file:
+ # <item id="d436e1" href="D:/LyX/lib/images/buffer-view.svgz" media-type="image/SVGZ"/>
+ # The XHTML files are also <item> tags:
+ # <item id="id-d0e2" href="index.xhtml" media-type="application/xhtml+xml"/>
+ try:
+ with open(package_opf, 'r') as f:
+ for line in f.readlines():
+ if '<item' in line and 'media-type="image' in line:
+ images.append(line.split('href="')[1].split('"')[0])
+ except FileNotFoundError:
+ print('The package.opf file was not found, probably due to a DocBook error. The ePub file will be corrupt.')
+
+ return images
+
+
+def change_image_paths(file, renamed):
+ # This could be optimised, as the same operation is performed a zillion times on many files:
+ # https://www.oreilly.com/library/view/python-cookbook/0596001673/ch03s15.html
+ with open(file, 'r', encoding='utf8') as f:
+ contents = list(f)
+
+ with open(file, 'w', encoding='utf8') as f:
+ for line in contents:
+ for (old, new) in renamed.items():
+ line = line.replace(old, new)
+ f.write(line)
+
+
+def copy_images(output_dir):
+ # Copy the assets to the OEBPS/images/. All paths are available in OEBPS/package.opf, but they must also be changed
+ # in the XHTML files. Typically, the current paths are absolute.
+
+ # First, get the mapping old file => file in the ePub archive.
+ original_images = get_images_from_package_opf(output_dir + '/OEBPS/package.opf')
+ renamed = {img: 'images/' + os.path.basename(img) for img in original_images}
+
+ # Then, transform all paths (both OPF and XHTML files).
+ change_image_paths(output_dir + '/OEBPS/package.opf', renamed)
+ for file in glob.glob(output_dir + '/OEBPS/*.xhtml'):
+ change_image_paths(file, renamed)
+
+ # Ensure that the destination path exists.
+ if not os.path.exists(output_dir + '/OEBPS/images/'):
+ os.mkdir(output_dir + '/OEBPS/images/')
+
+ # Finally, actually copy the image files.
+ for (old, new) in renamed.items():
+ shutil.copyfile(old, output_dir + '/OEBPS/' + new)
+
+
def create_zip_archive(output, output_dir):
with zipfile.ZipFile(output, 'w', zipfile.ZIP_DEFLATED) as zip:
# Python 3.5 brings the `recursive` argument. For older versions, this trick is required...
@@ -83,4 +137,5 @@ if __name__ == '__main__':
java_path, input, output, script_folder = parse_arguments()
output_dir = create_temporary_folder()
start_xslt_transformation(input, output_dir, script_folder, java_path)
+ copy_images(output_dir)
create_zip_archive(output, output_dir)
More information about the lyx-cvs
mailing list