• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zestedesavoir / zds-site / 11059094899

26 Sep 2024 07:38PM UTC coverage: 89.18% (+0.002%) from 89.178%
11059094899

push

github

Situphen
Met à jour les versions Python supportées

- 3.11 est la version sur le serveur de production
- Django 4.2 est compatible avec les version 3.8-3.12

5844 of 7011 branches covered (83.35%)

16707 of 18734 relevant lines covered (89.18%)

1.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.93
/zds/tutorialv2/epub_utils.py
1
import contextlib
3✔
2
import logging
3✔
3
import os
3✔
4
import re
3✔
5
import shutil
3✔
6
from collections import namedtuple
3✔
7
from urllib import parse
3✔
8
from os import path
3✔
9
from bs4 import BeautifulSoup
3✔
10
from pathlib import Path
3✔
11
from shutil import copy
3✔
12
from django.template.loader import render_to_string
3✔
13
from django.conf import settings
3✔
14

15
from zds.tutorialv2.publish_container import publish_container
3✔
16
from zds.utils import old_slugify
3✔
17

18

19
def __build_mime_type_conf():
3✔
20
    # this is just a way to make the "mime" more mockable. For now it's compatible with
21
    # EPUB 3 standard (https://fr.flossmanuals.net/creer-un-epub/epub-3/ (fr))
22
    return {"filename": "mimetype", "content": "application/epub+zip"}
3✔
23

24

25
def __traverse_and_identify_images(root_image_dir, current_dir=None):
3✔
26
    """
27
    :param root_image_dir: Root folder of the images
28
    :type root_image_dir: pathlib.Path
29
    :param current_dir:  Folder currently explored
30
    :type current_dir: pathlib.Path
31
    :return:
32
    """
33
    media_type_map = {
3✔
34
        ".png": "image/png",
35
        ".jpeg": "image/jpeg",
36
        ".jpg": "image/jpeg",
37
        ".gif": "image/gif",
38
        ".svg": "image/svg",
39
    }
40

41
    if current_dir is None:
3✔
42
        current_dir = root_image_dir
3✔
43

44
    for image_file_path in current_dir.iterdir():
3✔
45
        if image_file_path.is_dir():
3✔
46
            yield from __traverse_and_identify_images(root_image_dir, image_file_path)
2✔
47
            continue
2✔
48
        ext = path.splitext(image_file_path.name)[1]
3✔
49
        ebook_image_path = Path("images", image_file_path.relative_to(root_image_dir))
3✔
50
        identifier = "image_" + str(ebook_image_path)[7:].lower().replace(".", "-").replace("@", "-").replace("/", "-")
3✔
51
        yield ebook_image_path, identifier, media_type_map.get(ext.lower(), "image/png")
3✔
52

53

54
def build_html_chapter_file(published_object, versioned_object, working_dir, root_dir, image_handler):
3✔
55
    """
56
    Parses the full html file, extracts the ``<hX>`` tags and splits their content into new files.
57
    Yields all the produced files.
58

59
    :param root_dir: the root directory into which dump the ebook
60
    :type root_dir: pathlib.Path
61
    :param working_dir:
62
    :type working_dir: pathlib.Path
63
    :param versioned_object: the object representing the public version in git file system
64
    :type versioned_object: zds.tutorialv2.models.models_versioned.VersionedContent
65
    :param published_object: the published content as saved in database
66
    :type published_object: zds.tutorialv2.models.models_database.PublishedContent
67
    :type image_handler: ImageHandling
68
    :return: a generator of tuples composed as ``[splitted_html_file_relative_path, chapter-identifier, chapter-title]``
69
    """
70
    DirTuple = namedtuple("DirTuple", ["absolute", "relative"])
3✔
71
    img_dir = working_dir.parent / "images"
3✔
72
    path_to_title_dict = publish_container(
3✔
73
        published_object,
74
        str(working_dir),
75
        versioned_object,
76
        template="tutorialv2/export/ebook/chapter.html",
77
        file_ext="xhtml",
78
        image_callback=image_handler.handle_images,
79
        image_directory=DirTuple(str(img_dir.absolute()), str(img_dir.relative_to(root_dir))),
80
        relative=".",
81
        intro_ccl_template="tutorialv2/export/ebook/introduction.html",
82
    )
83
    for container_path, title in path_to_title_dict.items():
3✔
84
        # TODO: check if a function exists in the std lib to get rid of `root_dir + '/'`
85
        yield container_path.replace(str(root_dir.absolute()) + "/", ""), "chapter-" + old_slugify(title), title
3✔
86

87

88
def build_toc_ncx(chapters, tutorial, working_dir):
3✔
89
    with Path(working_dir, "toc.ncx").open("w", encoding="utf-8") as toc_ncx_path:
3✔
90
        toc_ncx_path.write(
3✔
91
            render_to_string(
92
                "tutorialv2/export/ebook/toc.ncx.html",
93
                context={
94
                    "chapters": chapters,
95
                    "title": tutorial.title,
96
                    "description": tutorial.description,
97
                    "content": tutorial,
98
                },
99
            )
100
        )
101

102

103
def build_content_opf(content, chapters, images, working_dir):
3✔
104
    with Path(working_dir, "content.opf").open("w", encoding="utf-8") as content_opf_path:
3✔
105
        content_opf_path.write(
3✔
106
            render_to_string(
107
                "tutorialv2/export/ebook/content.opf.xml",
108
                context={"content": content, "chapters": chapters, "images": images},
109
            )
110
        )
111

112

113
def build_container_xml(working_dir):
3✔
114
    with Path(working_dir, "container.xml").open("w", encoding="utf-8") as f:
3✔
115
        f.write(render_to_string("tutorialv2/export/ebook/container.xml"))
3✔
116

117

118
def build_nav_xhtml(working_dir, content, chapters):
3✔
119
    with Path(working_dir, "nav.xhtml").open("w", encoding="utf-8") as f:
3✔
120
        f.write(render_to_string("tutorialv2/export/ebook/nav.html", {"content": content, "chapters": chapters}))
3✔
121

122

123
def build_ebook(published_content_entity, working_dir, final_file_path):
3✔
124
    ops_dir = Path(working_dir, "ebook", "OPS")
3✔
125
    text_dir_path = Path(ops_dir, "Text")
3✔
126
    style_dir_path = Path(ops_dir, "styles")
3✔
127
    font_dir_path = Path(ops_dir, "Fonts")
3✔
128
    meta_inf_dir_path = Path(working_dir, "ebook", "META-INF")
3✔
129
    target_image_dir = Path(ops_dir, "images")
3✔
130

131
    with contextlib.suppress(FileExistsError):  # Forced to use this until python 3.5 is used and ok_exist appears
3✔
132
        text_dir_path.mkdir(parents=True)
3✔
133
    with contextlib.suppress(FileExistsError):
3✔
134
        style_dir_path.mkdir(parents=True)
3✔
135
    with contextlib.suppress(FileExistsError):
3✔
136
        font_dir_path.mkdir(parents=True)
3✔
137
    with contextlib.suppress(FileExistsError):
3✔
138
        meta_inf_dir_path.mkdir(parents=True)
3✔
139
    with contextlib.suppress(FileExistsError):
3✔
140
        target_image_dir.mkdir(parents=True)
3✔
141

142
    mimetype_conf = __build_mime_type_conf()
3✔
143
    mime_path = Path(working_dir, "ebook", mimetype_conf["filename"])
3✔
144
    with contextlib.suppress(FileExistsError, FileNotFoundError):
3✔
145
        for img in published_content_entity.content.gallery.get_gallery_path().iterdir():
3!
146
            shutil.copy(str(img), str(target_image_dir))
×
147

148
    with mime_path.open(mode="w", encoding="utf-8") as mimefile:
3✔
149
        mimefile.write(mimetype_conf["content"])
3✔
150
    image_handler = ImageHandling()
3✔
151
    chapters = list(
3✔
152
        build_html_chapter_file(
153
            published_content_entity.content,
154
            published_content_entity.content.load_version(sha=published_content_entity.sha_public),
155
            working_dir=text_dir_path,
156
            root_dir=Path(working_dir, "ebook"),
157
            image_handler=image_handler,
158
        )
159
    )
160
    build_toc_ncx(chapters, published_content_entity, ops_dir)
3✔
161
    copy_or_create_empty(settings.ZDS_APP["content"]["epub_stylesheets"]["toc"], style_dir_path, "toc.css")
3✔
162
    copy_or_create_empty(settings.ZDS_APP["content"]["epub_stylesheets"]["full"], style_dir_path, "zmd.css")
3✔
163
    copy_or_create_empty(settings.ZDS_APP["content"]["epub_stylesheets"]["katex"], style_dir_path, "katex.css")
3✔
164
    style_images_path = settings.BASE_DIR / "dist" / "images"
3✔
165
    smiley_images_path = settings.BASE_DIR / "dist" / "smileys" / "svg"
3✔
166
    if style_images_path.exists():
3!
167
        import_asset(style_images_path, target_image_dir)
3✔
168
    if smiley_images_path.exists():
3!
169
        import_asset(smiley_images_path, target_image_dir)
3✔
170
    images = list(__traverse_and_identify_images(target_image_dir))
3✔
171
    image_handler.names.add("sprite.png")
3✔
172
    images = image_handler.remove_unused_image(target_image_dir, images)
3✔
173
    build_content_opf(published_content_entity, chapters, images, ops_dir)
3✔
174
    build_container_xml(meta_inf_dir_path)
3✔
175
    build_nav_xhtml(ops_dir, published_content_entity, chapters)
3✔
176

177
    zip_logger = logging.getLogger(__name__ + ".zip")
3✔
178
    zip_logger.setLevel(logging.WARN)
3✔
179
    shutil.make_archive(str(final_file_path), format="zip", root_dir=str(Path(working_dir, "ebook")), logger=zip_logger)
3✔
180
    shutil.move(str(final_file_path) + ".zip", str(final_file_path))
3✔
181

182

183
def import_asset(style_images_path, target_image_dir):
3✔
184
    for img_path in style_images_path.iterdir():
3✔
185
        if img_path.is_file():
3✔
186
            shutil.copy2(str(img_path), str(target_image_dir))
3✔
187
        else:
188
            import_asset(img_path, target_image_dir)
3✔
189

190

191
def copy_or_create_empty(src_path, dst_path, default_name):
3✔
192
    if src_path.exists():
3✔
193
        copy(str(src_path), str(dst_path))
3✔
194
    else:
195
        with Path(dst_path, default_name).open("w", encoding="utf-8") as f:
3✔
196
            f.write("")
3✔
197

198

199
class ImageHandling:
3✔
200
    def __init__(self):
3✔
201
        self.names = set()
3✔
202
        self.url_scheme_matcher = re.compile(r"^https?://")
3✔
203

204
    def handle_images(self, relative_path):
3✔
205
        def handle_image_path_with_good_img_dir_path(html_code):
3✔
206
            soup_parser = BeautifulSoup(html_code, "lxml")
3✔
207
            for image in soup_parser.find_all("img"):
3✔
208
                if not image.get("src", ""):
2!
209
                    continue
×
210
                image_url = image["src"]
2✔
211
                if self.url_scheme_matcher.search(image_url):
2✔
212
                    splitted = parse.urlsplit(image_url)
2✔
213
                    final_path = splitted.path
2✔
214
                elif (not (Path(settings.MEDIA_URL).is_dir() and Path(image_url).exists())) and image_url.startswith(
2✔
215
                    settings.MEDIA_URL
216
                ):
217
                    # do not go there if image_url is the path on the system
218
                    # and not a portion of web URL
219
                    # (image_url.startswith(settings.MEDIA_URL) can be True if
220
                    # zds-site is in a directory under /media (the default
221
                    # value of settings.MEDIA_URL))
222
                    final_path = Path(image_url).name
1✔
223
                elif Path(image_url).is_absolute() and "images" in image_url:
2✔
224
                    root = Path(image_url)
2✔
225
                    while root.name != "images":
2✔
226
                        root = root.parent
2✔
227
                    final_path = str(Path(image_url).relative_to(root))
2✔
228
                else:
229
                    final_path = Path(image_url).name
2✔
230
                image_path_in_ebook = relative_path + "/images/" + str(final_path).replace("%20", "_")
2✔
231
                image["src"] = str(image_path_in_ebook)
2✔
232
                self.names.add(final_path)
2✔
233
            ids = {}
3✔
234
            for element in soup_parser.find_all(name=None, attrs={"id": (lambda s: True)}):
3✔
235
                while element.get("id", None) and element["id"] in ids:
3✔
236
                    element["id"] += "-1"
2✔
237
                if element.get("id", None):
3✔
238
                    ids[element["id"]] = True
2✔
239
            return soup_parser.prettify("utf-8").decode("utf-8")
3✔
240

241
        return handle_image_path_with_good_img_dir_path
3✔
242

243
    def remove_unused_image(self, image_path: Path, imglist):
3✔
244
        # Remove unused images:
245
        for image in image_path.rglob("*"):
3✔
246
            if str(Path(image).relative_to(image_path)) not in self.names and not image.is_dir():
3✔
247
                os.remove(str(image))
3✔
248
                imglist = [i for i in imglist if i[0].name.replace("%20", "_") != image.name]
3✔
249
        # Remove empty folders:
250
        for item in image_path.iterdir():
3✔
251
            if item.is_dir() and len(list(item.iterdir())) == 0:
3!
252
                os.rmdir(str(item))
×
253
        return imglist
3✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc