20770571728

Committed 09 Dec 2025 06:44PM UTC coverage: 89.329% (-0.01%) from 89.34%

Build # 20770571728

Build Type

push

github

Committed by

web-flow

Commit Message

Corrige l'orthographe de la page des liens de partage (#6774)

Coverage Stats

3090 of 4140 branches covered (74.64%)

17086 of 19127 relevant lines covered (89.33%)

1.91 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.04

/zds/tutorialv2/epub_utils.py

import contextlib
import logging
import os
import re
import shutil
from collections import namedtuple
from os import path
from pathlib import Path
from shutil import copy
from urllib import parse

from bs4 import BeautifulSoup
from django.conf import settings
from django.template.loader import render_to_string

from zds.tutorialv2.publish_container import publish_container
from zds.utils import old_slugify


def __build_mime_type_conf():
    # this is just a way to make the "mime" more mockable. For now it's compatible with
    # EPUB 3 standard (https://fr.flossmanuals.net/creer-un-epub/epub-3/ (fr))
    return {"filename": "mimetype", "content": "application/epub+zip"}


def __traverse_and_identify_images(root_image_dir, current_dir=None):
    """
    :param root_image_dir: Root folder of the images
    :type root_image_dir: pathlib.Path
    :param current_dir:  Folder currently explored
    :type current_dir: pathlib.Path
    :return:
    """
    media_type_map = {
        ".png": "image/png",
        ".jpeg": "image/jpeg",
        ".jpg": "image/jpeg",
        ".gif": "image/gif",
        ".svg": "image/svg",
    }

    if current_dir is None:
        current_dir = root_image_dir

    for image_file_path in current_dir.iterdir():
        if image_file_path.is_dir():
            yield from __traverse_and_identify_images(root_image_dir, image_file_path)
            continue
        ext = path.splitext(image_file_path.name)[1]
        ebook_image_path = Path("images", image_file_path.relative_to(root_image_dir))
        identifier = "image_" + str(ebook_image_path)[7:].lower().replace(".", "-").replace("@", "-").replace("/", "-")
        yield ebook_image_path, identifier, media_type_map.get(ext.lower(), "image/png")


def build_html_chapter_file(published_object, versioned_object, working_dir, root_dir, image_handler):
    """
    Parses the full html file, extracts the ``<hX>`` tags and splits their content into new files.
    Yields all the produced files.

    :param root_dir: the root directory into which dump the ebook
    :type root_dir: pathlib.Path
    :param working_dir:
    :type working_dir: pathlib.Path
    :param versioned_object: the object representing the public version in git file system
    :type versioned_object: zds.tutorialv2.models.models_versioned.VersionedContent
    :param published_object: the published content as saved in database
    :type published_object: zds.tutorialv2.models.models_database.PublishedContent
    :type image_handler: ImageHandling
    :return: a generator of tuples composed as ``[splitted_html_file_relative_path, chapter-identifier, chapter-title]``
    """
    DirTuple = namedtuple("DirTuple", ["absolute", "relative"])
    img_dir = working_dir.parent / "images"
    path_to_title_dict = publish_container(
        published_object,
        str(working_dir),
        versioned_object,
        template="tutorialv2/export/ebook/chapter.html",
        file_ext="xhtml",
        image_callback=image_handler.handle_images,
        image_directory=DirTuple(str(img_dir.absolute()), str(img_dir.relative_to(root_dir))),
        relative=".",
        intro_ccl_template="tutorialv2/export/ebook/introduction.html",
    )
    for container_path, title in path_to_title_dict.items():
        # TODO: check if a function exists in the std lib to get rid of `root_dir + '/'`
        yield container_path.replace(str(root_dir.absolute()) + "/", ""), "chapter-" + old_slugify(title), title


def build_toc_ncx(chapters, tutorial, working_dir):
    with Path(working_dir, "toc.ncx").open("w", encoding="utf-8") as toc_ncx_path:
        toc_ncx_path.write(
            render_to_string(
                "tutorialv2/export/ebook/toc.ncx.html",
                context={
                    "chapters": chapters,
                    "title": tutorial.title,
                    "description": tutorial.description,
                    "content": tutorial,
                },
            )
        )


def build_content_opf(content, chapters, images, working_dir):
    with Path(working_dir, "content.opf").open("w", encoding="utf-8") as content_opf_path:
        content_opf_path.write(
            render_to_string(
                "tutorialv2/export/ebook/content.opf.xml",
                context={"content": content, "chapters": chapters, "images": images},
            )
        )


def build_container_xml(working_dir):
    with Path(working_dir, "container.xml").open("w", encoding="utf-8") as f:
        f.write(render_to_string("tutorialv2/export/ebook/container.xml"))


def build_nav_xhtml(working_dir, content, chapters):
    with Path(working_dir, "nav.xhtml").open("w", encoding="utf-8") as f:
        f.write(render_to_string("tutorialv2/export/ebook/nav.html", {"content": content, "chapters": chapters}))


def build_ebook(published_content_entity, working_dir, final_file_path):
    ops_dir = Path(working_dir, "ebook", "OPS")
    text_dir_path = Path(ops_dir, "Text")
    style_dir_path = Path(ops_dir, "styles")
    font_dir_path = Path(ops_dir, "Fonts")
    meta_inf_dir_path = Path(working_dir, "ebook", "META-INF")
    target_image_dir = Path(ops_dir, "images")

    text_dir_path.mkdir(parents=True, exist_ok=True)
    style_dir_path.mkdir(parents=True, exist_ok=True)
    font_dir_path.mkdir(parents=True, exist_ok=True)
    meta_inf_dir_path.mkdir(parents=True, exist_ok=True)
    target_image_dir.mkdir(parents=True, exist_ok=True)

    mimetype_conf = __build_mime_type_conf()
    mime_path = Path(working_dir, "ebook", mimetype_conf["filename"])
    if published_content_entity.content.gallery.get_gallery_path().exists():
        # The gallery dir is created only when uploading the first image, so if
        # the content doesn't have any image from its gallery, the folder may
        # not exist.
        for img in published_content_entity.content.gallery.get_gallery_path().iterdir():
            # Do not interrupt the whole loop if one item triggers an exception
            # IsADirectoryError: ignore directories (which can be there only if created manually)
            with contextlib.suppress(FileExistsError, FileNotFoundError, IsADirectoryError):
                shutil.copy(str(img), str(target_image_dir))

    with mime_path.open(mode="w", encoding="utf-8") as mimefile:
        mimefile.write(mimetype_conf["content"])
    image_handler = ImageHandling()
    chapters = list(
        build_html_chapter_file(
            published_content_entity.content,
            published_content_entity.content.load_version(sha=published_content_entity.sha_public),
            working_dir=text_dir_path,
            root_dir=Path(working_dir, "ebook"),
            image_handler=image_handler,
        )
    )
    build_toc_ncx(chapters, published_content_entity, ops_dir)
    copy_or_create_empty(settings.ZDS_APP["content"]["epub_stylesheets"]["toc"], style_dir_path, "toc.css")
    copy_or_create_empty(settings.ZDS_APP["content"]["epub_stylesheets"]["full"], style_dir_path, "zmd.css")
    copy_or_create_empty(settings.ZDS_APP["content"]["epub_stylesheets"]["katex"], style_dir_path, "katex.css")
    style_images_path = settings.BASE_DIR / "dist" / "images"
    smiley_images_path = settings.BASE_DIR / "dist" / "smileys" / "svg"
    if style_images_path.exists():
        import_asset(style_images_path, target_image_dir)
    if smiley_images_path.exists():
        import_asset(smiley_images_path, target_image_dir)
    images = list(__traverse_and_identify_images(target_image_dir))
    image_handler.names.add("sprite.png")
    images = image_handler.remove_unused_image(target_image_dir, images)
    build_content_opf(published_content_entity, chapters, images, ops_dir)
    build_container_xml(meta_inf_dir_path)
    build_nav_xhtml(ops_dir, published_content_entity, chapters)

    zip_logger = logging.getLogger(__name__ + ".zip")
    zip_logger.setLevel(logging.WARN)
    shutil.make_archive(str(final_file_path), format="zip", root_dir=str(Path(working_dir, "ebook")), logger=zip_logger)
    shutil.move(str(final_file_path) + ".zip", str(final_file_path))


def import_asset(style_images_path, target_image_dir):
    for img_path in style_images_path.iterdir():
        if img_path.is_file():
            shutil.copy2(str(img_path), str(target_image_dir))
        else:
            import_asset(img_path, target_image_dir)


def copy_or_create_empty(src_path, dst_path, default_name):
    if src_path.exists():
        copy(str(src_path), str(dst_path))
    else:
        with Path(dst_path, default_name).open("w", encoding="utf-8") as f:
            f.write("")


class ImageHandling:
    def __init__(self):
        self.names = set()
        self.url_scheme_matcher = re.compile(r"^https?://")

    def handle_images(self, relative_path):
        def handle_image_path_with_good_img_dir_path(html_code):
            soup_parser = BeautifulSoup(html_code, "lxml")
            for image in soup_parser.find_all("img"):
                if not image.get("src", ""):
                    continue
                image_url = image["src"]
                if self.url_scheme_matcher.search(image_url):
                    splitted = parse.urlsplit(image_url)
                    final_path = splitted.path
                elif (not (Path(settings.MEDIA_URL).is_dir() and Path(image_url).exists())) and image_url.startswith(
                    settings.MEDIA_URL
                ):
                    # do not go there if image_url is the path on the system
                    # and not a portion of web URL
                    # (image_url.startswith(settings.MEDIA_URL) can be True if
                    # zds-site is in a directory under /media (the default
                    # value of settings.MEDIA_URL))
                    final_path = Path(image_url).name
                elif Path(image_url).is_absolute() and "images" in image_url:
                    root = Path(image_url)
                    while root.name != "images":
                        root = root.parent
                    final_path = str(Path(image_url).relative_to(root))
                else:
                    final_path = Path(image_url).name
                image_path_in_ebook = relative_path + "/images/" + str(final_path).replace("%20", "_")
                image["src"] = str(image_path_in_ebook)
                self.names.add(final_path)
            ids = {}
            for element in soup_parser.find_all(name=None, attrs={"id": (lambda s: True)}):
                while element.get("id", None) and element["id"] in ids:
                    element["id"] += "-1"
                if element.get("id", None):
                    ids[element["id"]] = True
            return soup_parser.prettify("utf-8").decode("utf-8")

        return handle_image_path_with_good_img_dir_path

    def remove_unused_image(self, image_path: Path, imglist):
        # Remove unused images:
        for image in image_path.rglob("*"):
            if str(Path(image).relative_to(image_path)) not in self.names and not image.is_dir():
                os.remove(str(image))
                imglist = [i for i in imglist if i[0].name.replace("%20", "_") != image.name]
        # Remove empty folders:
        for item in image_path.iterdir():
            if item.is_dir() and len(list(item.iterdir())) == 0:
                os.rmdir(str(item))
        return imglist

1	import contextlib	3✔
2	import logging	3✔
3	import os	3✔
4	import re	3✔
5	import shutil	3✔
6	from collections import namedtuple	3✔
7	from os import path	3✔
8	from pathlib import Path	3✔
9	from shutil import copy	3✔
10	from urllib import parse	3✔
11
12	from bs4 import BeautifulSoup	3✔
13	from django.conf import settings	3✔
14	from django.template.loader import render_to_string	3✔
15
16	from zds.tutorialv2.publish_container import publish_container	3✔
17	from zds.utils import old_slugify	3✔
18
19
20	def __build_mime_type_conf():	3✔
21	# this is just a way to make the "mime" more mockable. For now it's compatible with
22	# EPUB 3 standard (https://fr.flossmanuals.net/creer-un-epub/epub-3/ (fr))
23	return {"filename": "mimetype", "content": "application/epub+zip"}	3✔
24
25
26	def __traverse_and_identify_images(root_image_dir, current_dir=None):	3✔
27	"""
28	:param root_image_dir: Root folder of the images
29	:type root_image_dir: pathlib.Path
30	:param current_dir: Folder currently explored
31	:type current_dir: pathlib.Path
32	:return:
33	"""
34	media_type_map = {	3✔
35	".png": "image/png",
36	".jpeg": "image/jpeg",
37	".jpg": "image/jpeg",
38	".gif": "image/gif",
39	".svg": "image/svg",
40	}
41
42	if current_dir is None:	3!
43	current_dir = root_image_dir	3✔
44
45	for image_file_path in current_dir.iterdir():	3✔
46	if image_file_path.is_dir():	3!
47	yield from __traverse_and_identify_images(root_image_dir, image_file_path)	×
48	continue	×
49	ext = path.splitext(image_file_path.name)[1]	3✔
50	ebook_image_path = Path("images", image_file_path.relative_to(root_image_dir))	3✔
51	identifier = "image_" + str(ebook_image_path)[7:].lower().replace(".", "-").replace("@", "-").replace("/", "-")	3✔
52	yield ebook_image_path, identifier, media_type_map.get(ext.lower(), "image/png")	3✔
53
54
55	def build_html_chapter_file(published_object, versioned_object, working_dir, root_dir, image_handler):	3✔
56	"""
57	Parses the full html file, extracts the ``<hX>`` tags and splits their content into new files.
58	Yields all the produced files.
59
60	:param root_dir: the root directory into which dump the ebook
61	:type root_dir: pathlib.Path
62	:param working_dir:
63	:type working_dir: pathlib.Path
64	:param versioned_object: the object representing the public version in git file system
65	:type versioned_object: zds.tutorialv2.models.models_versioned.VersionedContent
66	:param published_object: the published content as saved in database
67	:type published_object: zds.tutorialv2.models.models_database.PublishedContent
68	:type image_handler: ImageHandling
69	:return: a generator of tuples composed as ``[splitted_html_file_relative_path, chapter-identifier, chapter-title]``
70	"""
71	DirTuple = namedtuple("DirTuple", ["absolute", "relative"])	3✔
72	img_dir = working_dir.parent / "images"	3✔
73	path_to_title_dict = publish_container(	3✔
74	published_object,
75	str(working_dir),
76	versioned_object,
77	template="tutorialv2/export/ebook/chapter.html",
78	file_ext="xhtml",
79	image_callback=image_handler.handle_images,
80	image_directory=DirTuple(str(img_dir.absolute()), str(img_dir.relative_to(root_dir))),
81	relative=".",
82	intro_ccl_template="tutorialv2/export/ebook/introduction.html",
83	)
84	for container_path, title in path_to_title_dict.items():	3✔
85	# TODO: check if a function exists in the std lib to get rid of `root_dir + '/'`
86	yield container_path.replace(str(root_dir.absolute()) + "/", ""), "chapter-" + old_slugify(title), title	3✔
87
88
89	def build_toc_ncx(chapters, tutorial, working_dir):	3✔
90	with Path(working_dir, "toc.ncx").open("w", encoding="utf-8") as toc_ncx_path:	3✔
91	toc_ncx_path.write(	3✔
92	render_to_string(
93	"tutorialv2/export/ebook/toc.ncx.html",
94	context={
95	"chapters": chapters,
96	"title": tutorial.title,
97	"description": tutorial.description,
98	"content": tutorial,
99	},
100	)
101	)
102
103
104	def build_content_opf(content, chapters, images, working_dir):	3✔
105	with Path(working_dir, "content.opf").open("w", encoding="utf-8") as content_opf_path:	3✔
106	content_opf_path.write(	3✔
107	render_to_string(
108	"tutorialv2/export/ebook/content.opf.xml",
109	context={"content": content, "chapters": chapters, "images": images},
110	)
111	)
112
113
114	def build_container_xml(working_dir):	3✔
115	with Path(working_dir, "container.xml").open("w", encoding="utf-8") as f:	3✔
116	f.write(render_to_string("tutorialv2/export/ebook/container.xml"))	3✔
117
118
119	def build_nav_xhtml(working_dir, content, chapters):	3✔
120	with Path(working_dir, "nav.xhtml").open("w", encoding="utf-8") as f:	3✔
121	f.write(render_to_string("tutorialv2/export/ebook/nav.html", {"content": content, "chapters": chapters}))	3✔
122
123
124	def build_ebook(published_content_entity, working_dir, final_file_path):	3✔
125	ops_dir = Path(working_dir, "ebook", "OPS")	3✔
126	text_dir_path = Path(ops_dir, "Text")	3✔
127	style_dir_path = Path(ops_dir, "styles")	3✔
128	font_dir_path = Path(ops_dir, "Fonts")	3✔
129	meta_inf_dir_path = Path(working_dir, "ebook", "META-INF")	3✔
130	target_image_dir = Path(ops_dir, "images")	3✔
131
132	text_dir_path.mkdir(parents=True, exist_ok=True)	3✔
133	style_dir_path.mkdir(parents=True, exist_ok=True)	3✔
134	font_dir_path.mkdir(parents=True, exist_ok=True)	3✔
135	meta_inf_dir_path.mkdir(parents=True, exist_ok=True)	3✔
136	target_image_dir.mkdir(parents=True, exist_ok=True)	3✔
137
138	mimetype_conf = __build_mime_type_conf()	3✔
139	mime_path = Path(working_dir, "ebook", mimetype_conf["filename"])	3✔
140	if published_content_entity.content.gallery.get_gallery_path().exists():	3!
141	# The gallery dir is created only when uploading the first image, so if
142	# the content doesn't have any image from its gallery, the folder may
143	# not exist.
144	for img in published_content_entity.content.gallery.get_gallery_path().iterdir():	×
145	# Do not interrupt the whole loop if one item triggers an exception
146	# IsADirectoryError: ignore directories (which can be there only if created manually)
147	with contextlib.suppress(FileExistsError, FileNotFoundError, IsADirectoryError):	×
148	shutil.copy(str(img), str(target_image_dir))	×
149
150	with mime_path.open(mode="w", encoding="utf-8") as mimefile:	3✔
151	mimefile.write(mimetype_conf["content"])	3✔
152	image_handler = ImageHandling()	3✔
153	chapters = list(	3✔
154	build_html_chapter_file(
155	published_content_entity.content,
156	published_content_entity.content.load_version(sha=published_content_entity.sha_public),
157	working_dir=text_dir_path,
158	root_dir=Path(working_dir, "ebook"),
159	image_handler=image_handler,
160	)
161	)
162	build_toc_ncx(chapters, published_content_entity, ops_dir)	3✔
163	copy_or_create_empty(settings.ZDS_APP["content"]["epub_stylesheets"]["toc"], style_dir_path, "toc.css")	3✔
164	copy_or_create_empty(settings.ZDS_APP["content"]["epub_stylesheets"]["full"], style_dir_path, "zmd.css")	3✔
165	copy_or_create_empty(settings.ZDS_APP["content"]["epub_stylesheets"]["katex"], style_dir_path, "katex.css")	3✔
166	style_images_path = settings.BASE_DIR / "dist" / "images"	3✔
167	smiley_images_path = settings.BASE_DIR / "dist" / "smileys" / "svg"	3✔
168	if style_images_path.exists():	3!
169	import_asset(style_images_path, target_image_dir)	3✔
170	if smiley_images_path.exists():	3!
171	import_asset(smiley_images_path, target_image_dir)	3✔
172	images = list(__traverse_and_identify_images(target_image_dir))	3✔
173	image_handler.names.add("sprite.png")	3✔
174	images = image_handler.remove_unused_image(target_image_dir, images)	3✔
175	build_content_opf(published_content_entity, chapters, images, ops_dir)	3✔
176	build_container_xml(meta_inf_dir_path)	3✔
177	build_nav_xhtml(ops_dir, published_content_entity, chapters)	3✔
178
179	zip_logger = logging.getLogger(__name__ + ".zip")	3✔
180	zip_logger.setLevel(logging.WARN)	3✔
181	shutil.make_archive(str(final_file_path), format="zip", root_dir=str(Path(working_dir, "ebook")), logger=zip_logger)	3✔
182	shutil.move(str(final_file_path) + ".zip", str(final_file_path))	3✔
183
184
185	def import_asset(style_images_path, target_image_dir):	3✔
186	for img_path in style_images_path.iterdir():	3✔
187	if img_path.is_file():	3✔
188	shutil.copy2(str(img_path), str(target_image_dir))	3✔
189	else:
190	import_asset(img_path, target_image_dir)	3✔
191
192
193	def copy_or_create_empty(src_path, dst_path, default_name):	3✔
194	if src_path.exists():	3✔
195	copy(str(src_path), str(dst_path))	3✔
196	else:
197	with Path(dst_path, default_name).open("w", encoding="utf-8") as f:	3✔
198	f.write("")	3✔
199
200
201	class ImageHandling:	3✔
202	def __init__(self):	3✔
203	self.names = set()	3✔
204	self.url_scheme_matcher = re.compile(r"^https?://")	3✔
205
206	def handle_images(self, relative_path):	3✔
207	def handle_image_path_with_good_img_dir_path(html_code):	3✔
208	soup_parser = BeautifulSoup(html_code, "lxml")	3✔
209	for image in soup_parser.find_all("img"):	3✔
210	if not image.get("src", ""):	2!
211	continue	×
212	image_url = image["src"]	2✔
213	if self.url_scheme_matcher.search(image_url):	2✔
214	splitted = parse.urlsplit(image_url)	2✔
215	final_path = splitted.path	2✔
216	elif (not (Path(settings.MEDIA_URL).is_dir() and Path(image_url).exists())) and image_url.startswith(	2✔
217	settings.MEDIA_URL
218	):
219	# do not go there if image_url is the path on the system
220	# and not a portion of web URL
221	# (image_url.startswith(settings.MEDIA_URL) can be True if
222	# zds-site is in a directory under /media (the default
223	# value of settings.MEDIA_URL))
224	final_path = Path(image_url).name	1✔
225	elif Path(image_url).is_absolute() and "images" in image_url:	2✔
226	root = Path(image_url)	2✔
227	while root.name != "images":	2✔
228	root = root.parent	2✔
229	final_path = str(Path(image_url).relative_to(root))	2✔
230	else:
231	final_path = Path(image_url).name	2✔
232	image_path_in_ebook = relative_path + "/images/" + str(final_path).replace("%20", "_")	2✔
233	image["src"] = str(image_path_in_ebook)	2✔
234	self.names.add(final_path)	2✔
235	ids = {}	3✔
236	for element in soup_parser.find_all(name=None, attrs={"id": (lambda s: True)}):	3✔
237	while element.get("id", None) and element["id"] in ids:	3✔
238	element["id"] += "-1"	2✔
239	if element.get("id", None):	3✔
240	ids[element["id"]] = True	2✔
241	return soup_parser.prettify("utf-8").decode("utf-8")	3✔
242
243	return handle_image_path_with_good_img_dir_path	3✔
244
245	def remove_unused_image(self, image_path: Path, imglist):	3✔
246	# Remove unused images:
247	for image in image_path.rglob("*"):	3✔
248	if str(Path(image).relative_to(image_path)) not in self.names and not image.is_dir():	3✔
249	os.remove(str(image))	3✔
250	imglist = [i for i in imglist if i[0].name.replace("%20", "_") != image.name]	3✔
251	# Remove empty folders:
252	for item in image_path.iterdir():	3✔
253	if item.is_dir() and len(list(item.iterdir())) == 0:	3!
254	os.rmdir(str(item))	×
255	return imglist	3✔

zestedesavoir / zds-site / 20770571728

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous