[LyX/master] ePub: copy images into the ePub archive.

Thibaut Cuvelier tcuvelier at lyx.org
Sun Feb 7 19:00:04 UTC 2021


commit 7f4782d51d45b1a4770273930ff130b1d0d63091
Author: Thibaut Cuvelier <tcuvelier at lyx.org>
Date:   Sun Feb 7 20:00:31 2021 +0100

    ePub: copy images into the ePub archive.
---
 lib/scripts/docbook2epub.py |   57 ++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 56 insertions(+), 1 deletions(-)

diff --git a/lib/scripts/docbook2epub.py b/lib/scripts/docbook2epub.py
index 28b7e4b..5799ede 100644
--- a/lib/scripts/docbook2epub.py
+++ b/lib/scripts/docbook2epub.py
@@ -13,7 +13,7 @@
 
 from __future__ import print_function
 
-# import glob  # Not powerful enough before Python 3.5.
+import glob
 import os
 import shutil
 import sys
@@ -29,6 +29,7 @@ def parse_arguments():
 
     print('Generating ePub with the following parameters:')
     print(own_path)
+    print(java_path)
     print(input)
     print(output)
 
@@ -68,6 +69,59 @@ def start_xslt_transformation(input, output_dir, script_folder, java_path):
     print('Generated ePub contents.')
 
 
+def get_images_from_package_opf(package_opf):
+    images = []
+
+    # Example in the OPF file:
+    #     <item id="d436e1" href="D:/LyX/lib/images/buffer-view.svgz" media-type="image/SVGZ"/>
+    # The XHTML files are also <item> tags:
+    #     <item id="id-d0e2" href="index.xhtml" media-type="application/xhtml+xml"/>
+    try:
+        with open(package_opf, 'r') as f:
+            for line in f.readlines():
+                if '<item' in line and 'media-type="image' in line:
+                    images.append(line.split('href="')[1].split('"')[0])
+    except FileNotFoundError:
+        print('The package.opf file was not found, probably due to a DocBook error. The ePub file will be corrupt.')
+
+    return images
+
+
+def change_image_paths(file, renamed):
+    # This could be optimised, as the same operation is performed a zillion times on many files:
+    # https://www.oreilly.com/library/view/python-cookbook/0596001673/ch03s15.html
+    with open(file, 'r', encoding='utf8') as f:
+        contents = list(f)
+
+    with open(file, 'w', encoding='utf8') as f:
+        for line in contents:
+            for (old, new) in renamed.items():
+                line = line.replace(old, new)
+            f.write(line)
+
+
+def copy_images(output_dir):
+    # Copy the assets to the OEBPS/images/. All paths are available in OEBPS/package.opf, but they must also be changed
+    # in the XHTML files. Typically, the current paths are absolute.
+
+    # First, get the mapping old file => file in the ePub archive.
+    original_images = get_images_from_package_opf(output_dir + '/OEBPS/package.opf')
+    renamed = {img: 'images/' + os.path.basename(img) for img in original_images}
+
+    # Then, transform all paths (both OPF and XHTML files).
+    change_image_paths(output_dir + '/OEBPS/package.opf', renamed)
+    for file in glob.glob(output_dir + '/OEBPS/*.xhtml'):
+        change_image_paths(file, renamed)
+
+    # Ensure that the destination path exists.
+    if not os.path.exists(output_dir + '/OEBPS/images/'):
+        os.mkdir(output_dir + '/OEBPS/images/')
+
+    # Finally, actually copy the image files.
+    for (old, new) in renamed.items():
+        shutil.copyfile(old, output_dir + '/OEBPS/' + new)
+
+
 def create_zip_archive(output, output_dir):
     with zipfile.ZipFile(output, 'w', zipfile.ZIP_DEFLATED) as zip:
         # Python 3.5 brings the `recursive` argument. For older versions, this trick is required...
@@ -83,4 +137,5 @@ if __name__ == '__main__':
     java_path, input, output, script_folder = parse_arguments()
     output_dir = create_temporary_folder()
     start_xslt_transformation(input, output_dir, script_folder, java_path)
+    copy_images(output_dir)
     create_zip_archive(output, output_dir)


More information about the lyx-cvs mailing list