11059094899

Committed 26 Sep 2024 07:38PM UTC coverage: 89.18% (+0.002%) from 89.178%

Build # 11059094899

Build Type

push

github

Committed by

Situphen

Commit Message

Met à jour les versions Python supportées

- 3.11 est la version sur le serveur de production
- Django 4.2 est compatible avec les version 3.8-3.12

Run Details

5844 of 7011 branches covered (83.35%)

16707 of 18734 relevant lines covered (89.18%)

1.9 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.93

/zds/tutorialv2/epub_utils.py

import contextlib
import logging
import os
import re
import shutil
from collections import namedtuple
from urllib import parse
from os import path
from bs4 import BeautifulSoup
from pathlib import Path
from shutil import copy
from django.template.loader import render_to_string
from django.conf import settings

from zds.tutorialv2.publish_container import publish_container
from zds.utils import old_slugify


def __build_mime_type_conf():
    # this is just a way to make the "mime" more mockable. For now it's compatible with
    # EPUB 3 standard (https://fr.flossmanuals.net/creer-un-epub/epub-3/ (fr))
    return {"filename": "mimetype", "content": "application/epub+zip"}


def __traverse_and_identify_images(root_image_dir, current_dir=None):
    """
    :param root_image_dir: Root folder of the images
    :type root_image_dir: pathlib.Path
    :param current_dir:  Folder currently explored
    :type current_dir: pathlib.Path
    :return:
    """
    media_type_map = {
        ".png": "image/png",
        ".jpeg": "image/jpeg",
        ".jpg": "image/jpeg",
        ".gif": "image/gif",
        ".svg": "image/svg",
    }

    if current_dir is None:
        current_dir = root_image_dir

    for image_file_path in current_dir.iterdir():
        if image_file_path.is_dir():
            yield from __traverse_and_identify_images(root_image_dir, image_file_path)
            continue
        ext = path.splitext(image_file_path.name)[1]
        ebook_image_path = Path("images", image_file_path.relative_to(root_image_dir))
        identifier = "image_" + str(ebook_image_path)[7:].lower().replace(".", "-").replace("@", "-").replace("/", "-")
        yield ebook_image_path, identifier, media_type_map.get(ext.lower(), "image/png")


def build_html_chapter_file(published_object, versioned_object, working_dir, root_dir, image_handler):
    """
    Parses the full html file, extracts the ``<hX>`` tags and splits their content into new files.
    Yields all the produced files.

    :param root_dir: the root directory into which dump the ebook
    :type root_dir: pathlib.Path
    :param working_dir:
    :type working_dir: pathlib.Path
    :param versioned_object: the object representing the public version in git file system
    :type versioned_object: zds.tutorialv2.models.models_versioned.VersionedContent
    :param published_object: the published content as saved in database
    :type published_object: zds.tutorialv2.models.models_database.PublishedContent
    :type image_handler: ImageHandling
    :return: a generator of tuples composed as ``[splitted_html_file_relative_path, chapter-identifier, chapter-title]``
    """
    DirTuple = namedtuple("DirTuple", ["absolute", "relative"])
    img_dir = working_dir.parent / "images"
    path_to_title_dict = publish_container(
        published_object,
        str(working_dir),
        versioned_object,
        template="tutorialv2/export/ebook/chapter.html",
        file_ext="xhtml",
        image_callback=image_handler.handle_images,
        image_directory=DirTuple(str(img_dir.absolute()), str(img_dir.relative_to(root_dir))),
        relative=".",
        intro_ccl_template="tutorialv2/export/ebook/introduction.html",
    )
    for container_path, title in path_to_title_dict.items():
        # TODO: check if a function exists in the std lib to get rid of `root_dir + '/'`
        yield container_path.replace(str(root_dir.absolute()) + "/", ""), "chapter-" + old_slugify(title), title


def build_toc_ncx(chapters, tutorial, working_dir):
    with Path(working_dir, "toc.ncx").open("w", encoding="utf-8") as toc_ncx_path:
        toc_ncx_path.write(
            render_to_string(
                "tutorialv2/export/ebook/toc.ncx.html",
                context={
                    "chapters": chapters,
                    "title": tutorial.title,
                    "description": tutorial.description,
                    "content": tutorial,
                },
            )
        )


def build_content_opf(content, chapters, images, working_dir):
    with Path(working_dir, "content.opf").open("w", encoding="utf-8") as content_opf_path:
        content_opf_path.write(
            render_to_string(
                "tutorialv2/export/ebook/content.opf.xml",
                context={"content": content, "chapters": chapters, "images": images},
            )
        )


def build_container_xml(working_dir):
    with Path(working_dir, "container.xml").open("w", encoding="utf-8") as f:
        f.write(render_to_string("tutorialv2/export/ebook/container.xml"))


def build_nav_xhtml(working_dir, content, chapters):
    with Path(working_dir, "nav.xhtml").open("w", encoding="utf-8") as f:
        f.write(render_to_string("tutorialv2/export/ebook/nav.html", {"content": content, "chapters": chapters}))


def build_ebook(published_content_entity, working_dir, final_file_path):
    ops_dir = Path(working_dir, "ebook", "OPS")
    text_dir_path = Path(ops_dir, "Text")
    style_dir_path = Path(ops_dir, "styles")
    font_dir_path = Path(ops_dir, "Fonts")
    meta_inf_dir_path = Path(working_dir, "ebook", "META-INF")
    target_image_dir = Path(ops_dir, "images")

    with contextlib.suppress(FileExistsError):  # Forced to use this until python 3.5 is used and ok_exist appears
        text_dir_path.mkdir(parents=True)
    with contextlib.suppress(FileExistsError):
        style_dir_path.mkdir(parents=True)
    with contextlib.suppress(FileExistsError):
        font_dir_path.mkdir(parents=True)
    with contextlib.suppress(FileExistsError):
        meta_inf_dir_path.mkdir(parents=True)
    with contextlib.suppress(FileExistsError):
        target_image_dir.mkdir(parents=True)

    mimetype_conf = __build_mime_type_conf()
    mime_path = Path(working_dir, "ebook", mimetype_conf["filename"])
    with contextlib.suppress(FileExistsError, FileNotFoundError):
        for img in published_content_entity.content.gallery.get_gallery_path().iterdir():
            shutil.copy(str(img), str(target_image_dir))

    with mime_path.open(mode="w", encoding="utf-8") as mimefile:
        mimefile.write(mimetype_conf["content"])
    image_handler = ImageHandling()
    chapters = list(
        build_html_chapter_file(
            published_content_entity.content,
            published_content_entity.content.load_version(sha=published_content_entity.sha_public),
            working_dir=text_dir_path,
            root_dir=Path(working_dir, "ebook"),
            image_handler=image_handler,
        )
    )
    build_toc_ncx(chapters, published_content_entity, ops_dir)
    copy_or_create_empty(settings.ZDS_APP["content"]["epub_stylesheets"]["toc"], style_dir_path, "toc.css")
    copy_or_create_empty(settings.ZDS_APP["content"]["epub_stylesheets"]["full"], style_dir_path, "zmd.css")
    copy_or_create_empty(settings.ZDS_APP["content"]["epub_stylesheets"]["katex"], style_dir_path, "katex.css")
    style_images_path = settings.BASE_DIR / "dist" / "images"
    smiley_images_path = settings.BASE_DIR / "dist" / "smileys" / "svg"
    if style_images_path.exists():
        import_asset(style_images_path, target_image_dir)
    if smiley_images_path.exists():
        import_asset(smiley_images_path, target_image_dir)
    images = list(__traverse_and_identify_images(target_image_dir))
    image_handler.names.add("sprite.png")
    images = image_handler.remove_unused_image(target_image_dir, images)
    build_content_opf(published_content_entity, chapters, images, ops_dir)
    build_container_xml(meta_inf_dir_path)
    build_nav_xhtml(ops_dir, published_content_entity, chapters)

    zip_logger = logging.getLogger(__name__ + ".zip")
    zip_logger.setLevel(logging.WARN)
    shutil.make_archive(str(final_file_path), format="zip", root_dir=str(Path(working_dir, "ebook")), logger=zip_logger)
    shutil.move(str(final_file_path) + ".zip", str(final_file_path))


def import_asset(style_images_path, target_image_dir):
    for img_path in style_images_path.iterdir():
        if img_path.is_file():
            shutil.copy2(str(img_path), str(target_image_dir))
        else:
            import_asset(img_path, target_image_dir)


def copy_or_create_empty(src_path, dst_path, default_name):
    if src_path.exists():
        copy(str(src_path), str(dst_path))
    else:
        with Path(dst_path, default_name).open("w", encoding="utf-8") as f:
            f.write("")


class ImageHandling:
    def __init__(self):
        self.names = set()
        self.url_scheme_matcher = re.compile(r"^https?://")

    def handle_images(self, relative_path):
        def handle_image_path_with_good_img_dir_path(html_code):
            soup_parser = BeautifulSoup(html_code, "lxml")
            for image in soup_parser.find_all("img"):
                if not image.get("src", ""):
                    continue
                image_url = image["src"]
                if self.url_scheme_matcher.search(image_url):
                    splitted = parse.urlsplit(image_url)
                    final_path = splitted.path
                elif (not (Path(settings.MEDIA_URL).is_dir() and Path(image_url).exists())) and image_url.startswith(
                    settings.MEDIA_URL
                ):
                    # do not go there if image_url is the path on the system
                    # and not a portion of web URL
                    # (image_url.startswith(settings.MEDIA_URL) can be True if
                    # zds-site is in a directory under /media (the default
                    # value of settings.MEDIA_URL))
                    final_path = Path(image_url).name
                elif Path(image_url).is_absolute() and "images" in image_url:
                    root = Path(image_url)
                    while root.name != "images":
                        root = root.parent
                    final_path = str(Path(image_url).relative_to(root))
                else:
                    final_path = Path(image_url).name
                image_path_in_ebook = relative_path + "/images/" + str(final_path).replace("%20", "_")
                image["src"] = str(image_path_in_ebook)
                self.names.add(final_path)
            ids = {}
            for element in soup_parser.find_all(name=None, attrs={"id": (lambda s: True)}):
                while element.get("id", None) and element["id"] in ids:
                    element["id"] += "-1"
                if element.get("id", None):
                    ids[element["id"]] = True
            return soup_parser.prettify("utf-8").decode("utf-8")

        return handle_image_path_with_good_img_dir_path

    def remove_unused_image(self, image_path: Path, imglist):
        # Remove unused images:
        for image in image_path.rglob("*"):
            if str(Path(image).relative_to(image_path)) not in self.names and not image.is_dir():
                os.remove(str(image))
                imglist = [i for i in imglist if i[0].name.replace("%20", "_") != image.name]
        # Remove empty folders:
        for item in image_path.iterdir():
            if item.is_dir() and len(list(item.iterdir())) == 0:
                os.rmdir(str(item))
        return imglist

1	import contextlib	3✔
2	import logging	3✔
3	import os	3✔
4	import re	3✔
5	import shutil	3✔
6	from collections import namedtuple	3✔
7	from urllib import parse	3✔
8	from os import path	3✔
9	from bs4 import BeautifulSoup	3✔
10	from pathlib import Path	3✔
11	from shutil import copy	3✔
12	from django.template.loader import render_to_string	3✔
13	from django.conf import settings	3✔
14
15	from zds.tutorialv2.publish_container import publish_container	3✔
16	from zds.utils import old_slugify	3✔
17
18
19	def __build_mime_type_conf():	3✔
20	# this is just a way to make the "mime" more mockable. For now it's compatible with
21	# EPUB 3 standard (https://fr.flossmanuals.net/creer-un-epub/epub-3/ (fr))
22	return {"filename": "mimetype", "content": "application/epub+zip"}	3✔
23
24
25	def __traverse_and_identify_images(root_image_dir, current_dir=None):	3✔
26	"""
27	:param root_image_dir: Root folder of the images
28	:type root_image_dir: pathlib.Path
29	:param current_dir: Folder currently explored
30	:type current_dir: pathlib.Path
31	:return:
32	"""
33	media_type_map = {	3✔
34	".png": "image/png",
35	".jpeg": "image/jpeg",
36	".jpg": "image/jpeg",
37	".gif": "image/gif",
38	".svg": "image/svg",
39	}
40
41	if current_dir is None:	3✔
42	current_dir = root_image_dir	3✔
43
44	for image_file_path in current_dir.iterdir():	3✔
45	if image_file_path.is_dir():	3✔
46	yield from __traverse_and_identify_images(root_image_dir, image_file_path)	2✔
47	continue	2✔
48	ext = path.splitext(image_file_path.name)[1]	3✔
49	ebook_image_path = Path("images", image_file_path.relative_to(root_image_dir))	3✔
50	identifier = "image_" + str(ebook_image_path)[7:].lower().replace(".", "-").replace("@", "-").replace("/", "-")	3✔
51	yield ebook_image_path, identifier, media_type_map.get(ext.lower(), "image/png")	3✔
52
53
54	def build_html_chapter_file(published_object, versioned_object, working_dir, root_dir, image_handler):	3✔
55	"""
56	Parses the full html file, extracts the ``<hX>`` tags and splits their content into new files.
57	Yields all the produced files.
58
59	:param root_dir: the root directory into which dump the ebook
60	:type root_dir: pathlib.Path
61	:param working_dir:
62	:type working_dir: pathlib.Path
63	:param versioned_object: the object representing the public version in git file system
64	:type versioned_object: zds.tutorialv2.models.models_versioned.VersionedContent
65	:param published_object: the published content as saved in database
66	:type published_object: zds.tutorialv2.models.models_database.PublishedContent
67	:type image_handler: ImageHandling
68	:return: a generator of tuples composed as ``[splitted_html_file_relative_path, chapter-identifier, chapter-title]``
69	"""
70	DirTuple = namedtuple("DirTuple", ["absolute", "relative"])	3✔
71	img_dir = working_dir.parent / "images"	3✔
72	path_to_title_dict = publish_container(	3✔
73	published_object,
74	str(working_dir),
75	versioned_object,
76	template="tutorialv2/export/ebook/chapter.html",
77	file_ext="xhtml",
78	image_callback=image_handler.handle_images,
79	image_directory=DirTuple(str(img_dir.absolute()), str(img_dir.relative_to(root_dir))),
80	relative=".",
81	intro_ccl_template="tutorialv2/export/ebook/introduction.html",
82	)
83	for container_path, title in path_to_title_dict.items():	3✔
84	# TODO: check if a function exists in the std lib to get rid of `root_dir + '/'`
85	yield container_path.replace(str(root_dir.absolute()) + "/", ""), "chapter-" + old_slugify(title), title	3✔
86
87
88	def build_toc_ncx(chapters, tutorial, working_dir):	3✔
89	with Path(working_dir, "toc.ncx").open("w", encoding="utf-8") as toc_ncx_path:	3✔
90	toc_ncx_path.write(	3✔
91	render_to_string(
92	"tutorialv2/export/ebook/toc.ncx.html",
93	context={
94	"chapters": chapters,
95	"title": tutorial.title,
96	"description": tutorial.description,
97	"content": tutorial,
98	},
99	)
100	)
101
102
103	def build_content_opf(content, chapters, images, working_dir):	3✔
104	with Path(working_dir, "content.opf").open("w", encoding="utf-8") as content_opf_path:	3✔
105	content_opf_path.write(	3✔
106	render_to_string(
107	"tutorialv2/export/ebook/content.opf.xml",
108	context={"content": content, "chapters": chapters, "images": images},
109	)
110	)
111
112
113	def build_container_xml(working_dir):	3✔
114	with Path(working_dir, "container.xml").open("w", encoding="utf-8") as f:	3✔
115	f.write(render_to_string("tutorialv2/export/ebook/container.xml"))	3✔
116
117
118	def build_nav_xhtml(working_dir, content, chapters):	3✔
119	with Path(working_dir, "nav.xhtml").open("w", encoding="utf-8") as f:	3✔
120	f.write(render_to_string("tutorialv2/export/ebook/nav.html", {"content": content, "chapters": chapters}))	3✔
121
122
123	def build_ebook(published_content_entity, working_dir, final_file_path):	3✔
124	ops_dir = Path(working_dir, "ebook", "OPS")	3✔
125	text_dir_path = Path(ops_dir, "Text")	3✔
126	style_dir_path = Path(ops_dir, "styles")	3✔
127	font_dir_path = Path(ops_dir, "Fonts")	3✔
128	meta_inf_dir_path = Path(working_dir, "ebook", "META-INF")	3✔
129	target_image_dir = Path(ops_dir, "images")	3✔
130
131	with contextlib.suppress(FileExistsError): # Forced to use this until python 3.5 is used and ok_exist appears	3✔
132	text_dir_path.mkdir(parents=True)	3✔
133	with contextlib.suppress(FileExistsError):	3✔
134	style_dir_path.mkdir(parents=True)	3✔
135	with contextlib.suppress(FileExistsError):	3✔
136	font_dir_path.mkdir(parents=True)	3✔
137	with contextlib.suppress(FileExistsError):	3✔
138	meta_inf_dir_path.mkdir(parents=True)	3✔
139	with contextlib.suppress(FileExistsError):	3✔
140	target_image_dir.mkdir(parents=True)	3✔
141
142	mimetype_conf = __build_mime_type_conf()	3✔
143	mime_path = Path(working_dir, "ebook", mimetype_conf["filename"])	3✔
144	with contextlib.suppress(FileExistsError, FileNotFoundError):	3✔
145	for img in published_content_entity.content.gallery.get_gallery_path().iterdir():	3!
146	shutil.copy(str(img), str(target_image_dir))	×
147
148	with mime_path.open(mode="w", encoding="utf-8") as mimefile:	3✔
149	mimefile.write(mimetype_conf["content"])	3✔
150	image_handler = ImageHandling()	3✔
151	chapters = list(	3✔
152	build_html_chapter_file(
153	published_content_entity.content,
154	published_content_entity.content.load_version(sha=published_content_entity.sha_public),
155	working_dir=text_dir_path,
156	root_dir=Path(working_dir, "ebook"),
157	image_handler=image_handler,
158	)
159	)
160	build_toc_ncx(chapters, published_content_entity, ops_dir)	3✔
161	copy_or_create_empty(settings.ZDS_APP["content"]["epub_stylesheets"]["toc"], style_dir_path, "toc.css")	3✔
162	copy_or_create_empty(settings.ZDS_APP["content"]["epub_stylesheets"]["full"], style_dir_path, "zmd.css")	3✔
163	copy_or_create_empty(settings.ZDS_APP["content"]["epub_stylesheets"]["katex"], style_dir_path, "katex.css")	3✔
164	style_images_path = settings.BASE_DIR / "dist" / "images"	3✔
165	smiley_images_path = settings.BASE_DIR / "dist" / "smileys" / "svg"	3✔
166	if style_images_path.exists():	3!
167	import_asset(style_images_path, target_image_dir)	3✔
168	if smiley_images_path.exists():	3!
169	import_asset(smiley_images_path, target_image_dir)	3✔
170	images = list(__traverse_and_identify_images(target_image_dir))	3✔
171	image_handler.names.add("sprite.png")	3✔
172	images = image_handler.remove_unused_image(target_image_dir, images)	3✔
173	build_content_opf(published_content_entity, chapters, images, ops_dir)	3✔
174	build_container_xml(meta_inf_dir_path)	3✔
175	build_nav_xhtml(ops_dir, published_content_entity, chapters)	3✔
176
177	zip_logger = logging.getLogger(__name__ + ".zip")	3✔
178	zip_logger.setLevel(logging.WARN)	3✔
179	shutil.make_archive(str(final_file_path), format="zip", root_dir=str(Path(working_dir, "ebook")), logger=zip_logger)	3✔
180	shutil.move(str(final_file_path) + ".zip", str(final_file_path))	3✔
181
182
183	def import_asset(style_images_path, target_image_dir):	3✔
184	for img_path in style_images_path.iterdir():	3✔
185	if img_path.is_file():	3✔
186	shutil.copy2(str(img_path), str(target_image_dir))	3✔
187	else:
188	import_asset(img_path, target_image_dir)	3✔
189
190
191	def copy_or_create_empty(src_path, dst_path, default_name):	3✔
192	if src_path.exists():	3✔
193	copy(str(src_path), str(dst_path))	3✔
194	else:
195	with Path(dst_path, default_name).open("w", encoding="utf-8") as f:	3✔
196	f.write("")	3✔
197
198
199	class ImageHandling:	3✔
200	def __init__(self):	3✔
201	self.names = set()	3✔
202	self.url_scheme_matcher = re.compile(r"^https?://")	3✔
203
204	def handle_images(self, relative_path):	3✔
205	def handle_image_path_with_good_img_dir_path(html_code):	3✔
206	soup_parser = BeautifulSoup(html_code, "lxml")	3✔
207	for image in soup_parser.find_all("img"):	3✔
208	if not image.get("src", ""):	2!
209	continue	×
210	image_url = image["src"]	2✔
211	if self.url_scheme_matcher.search(image_url):	2✔
212	splitted = parse.urlsplit(image_url)	2✔
213	final_path = splitted.path	2✔
214	elif (not (Path(settings.MEDIA_URL).is_dir() and Path(image_url).exists())) and image_url.startswith(	2✔
215	settings.MEDIA_URL
216	):
217	# do not go there if image_url is the path on the system
218	# and not a portion of web URL
219	# (image_url.startswith(settings.MEDIA_URL) can be True if
220	# zds-site is in a directory under /media (the default
221	# value of settings.MEDIA_URL))
222	final_path = Path(image_url).name	1✔
223	elif Path(image_url).is_absolute() and "images" in image_url:	2✔
224	root = Path(image_url)	2✔
225	while root.name != "images":	2✔
226	root = root.parent	2✔
227	final_path = str(Path(image_url).relative_to(root))	2✔
228	else:
229	final_path = Path(image_url).name	2✔
230	image_path_in_ebook = relative_path + "/images/" + str(final_path).replace("%20", "_")	2✔
231	image["src"] = str(image_path_in_ebook)	2✔
232	self.names.add(final_path)	2✔
233	ids = {}	3✔
234	for element in soup_parser.find_all(name=None, attrs={"id": (lambda s: True)}):	3✔
235	while element.get("id", None) and element["id"] in ids:	3✔
236	element["id"] += "-1"	2✔
237	if element.get("id", None):	3✔
238	ids[element["id"]] = True	2✔
239	return soup_parser.prettify("utf-8").decode("utf-8")	3✔
240
241	return handle_image_path_with_good_img_dir_path	3✔
242
243	def remove_unused_image(self, image_path: Path, imglist):	3✔
244	# Remove unused images:
245	for image in image_path.rglob("*"):	3✔
246	if str(Path(image).relative_to(image_path)) not in self.names and not image.is_dir():	3✔
247	os.remove(str(image))	3✔
248	imglist = [i for i in imglist if i[0].name.replace("%20", "_") != image.name]	3✔
249	# Remove empty folders:
250	for item in image_path.iterdir():	3✔
251	if item.is_dir() and len(list(item.iterdir())) == 0:	3!
252	os.rmdir(str(item))	×
253	return imglist	3✔

zestedesavoir / zds-site / 11059094899

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous