• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

EsupPortail / Esup-Pod / 18407208465

10 Oct 2025 12:57PM UTC coverage: 70.389%. First build
18407208465

Pull #1359

github

web-flow
[DONE] Force indicate username in 'User name' field in csv file (#1339)

* Force indicate username in 'User name' field in csv file (Fix #1315)
---------

Co-authored-by: Céline Didier <ceine.didier@univ-lorraine.fr>
Pull Request #1359: [RELEASE] 4.0.2

94 of 118 new or added lines in 8 files covered. (79.66%)

12240 of 17389 relevant lines covered (70.39%)

0.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

38.56
/pod/video_encode_transcript/transcript.py
1
"""Esup-Pod transcript video functions."""
2

3
from django.conf import settings
1✔
4
from django.core.files import File
1✔
5
from pod.completion.models import Track
1✔
6
from pod.main.tasks import task_start_transcript
1✔
7
from webvtt import Caption, WebVTT
1✔
8

9
from .utils import (
1✔
10
    send_email,
11
    send_email_transcript,
12
    change_encoding_step,
13
    add_encoding_log,
14
)
15
from ..video.models import Video
1✔
16
import importlib.util
1✔
17

18
if (
1✔
19
    importlib.util.find_spec("vosk") is not None
20
    or importlib.util.find_spec("whisper") is not None
21
):
22
    from .transcript_model import start_transcripting
×
23
else:
24

25
    def start_transcripting(*args, **kwargs):
1✔
NEW
26
        raise NotImplementedError("No transcription engine available.")
×
27

28

29
from .encoding_utils import sec_to_timestamp
1✔
30

31
import os
1✔
32
import time
1✔
33

34
from tempfile import NamedTemporaryFile
1✔
35

36
import threading
1✔
37
import logging
1✔
38

39
if getattr(settings, "USE_PODFILE", False):
1✔
40
    __FILEPICKER__ = True
1✔
41
    from pod.podfile.models import CustomFileModel
1✔
42
else:
43
    __FILEPICKER__ = False
×
44
    from pod.main.models import CustomFileModel
×
45

46
EMAIL_ON_TRANSCRIPTING_COMPLETION = getattr(
1✔
47
    settings, "EMAIL_ON_TRANSCRIPTING_COMPLETION", True
48
)
49
TRANSCRIPTION_MODEL_PARAM = getattr(settings, "TRANSCRIPTION_MODEL_PARAM", False)
1✔
50
USE_TRANSCRIPTION = getattr(settings, "USE_TRANSCRIPTION", False)
1✔
51
TRANSCRIPTION_TYPE = (
1✔
52
    getattr(settings, "TRANSCRIPTION_TYPE", "WHISPER") if USE_TRANSCRIPTION else None
53
)
54
TRANSCRIPTION_NORMALIZE = getattr(settings, "TRANSCRIPTION_NORMALIZE", False)
1✔
55
CELERY_TO_ENCODE = getattr(settings, "CELERY_TO_ENCODE", False)
1✔
56

57
USE_REMOTE_ENCODING_TRANSCODING = getattr(
1✔
58
    settings, "USE_REMOTE_ENCODING_TRANSCODING", False
59
)
60
if USE_REMOTE_ENCODING_TRANSCODING:
1✔
61
    from .transcripting_tasks import start_transcripting_task
1✔
62

63
CAPTIONS_STRICT_ACCESSIBILITY = getattr(
1✔
64
    settings,
65
    "CAPTIONS_STRICT_ACCESSIBILITY",
66
    False,
67
)
68

69
log = logging.getLogger(__name__)
1✔
70

71

72
# ##########################################################################
73
# TRANSCRIPT VIDEO: THREAD TO LAUNCH TRANSCRIPT
74
# ##########################################################################
75
def start_transcript(video_id, threaded=True) -> None:
1✔
76
    """
77
    Call to start transcript main function.
78

79
    Will launch transcript mode depending on configuration.
80
    """
81
    if threaded:
1✔
82
        if CELERY_TO_ENCODE:
1✔
83
            task_start_transcript.delay(video_id)
×
84
        else:
85
            log.info("START TRANSCRIPT VIDEO %s" % video_id)
1✔
86
            t = threading.Thread(target=main_threaded_transcript, args=[video_id])
1✔
87
            t.daemon = True
1✔
88
            t.start()
1✔
89
    else:
90
        main_threaded_transcript(video_id)
1✔
91

92

93
def main_threaded_transcript(video_to_encode_id) -> None:
1✔
94
    """
95
    Transcript main function.
96

97
    Will check all configuration and file and launch transcript.
98
    """
99
    change_encoding_step(video_to_encode_id, 5, "transcripting audio")
1✔
100

101
    video_to_encode = Video.objects.get(id=video_to_encode_id)
1✔
102
    video_to_encode.encoding_in_progress = True
1✔
103
    video_to_encode.save()
1✔
104
    msg = ""
1✔
105
    lang = video_to_encode.transcript
1✔
106
    # check if TRANSCRIPTION_MODEL_PARAM [lang] exist
107
    if not TRANSCRIPTION_MODEL_PARAM[TRANSCRIPTION_TYPE].get(lang):
1✔
108
        msg += "\n no transcript model found for lang: %s." % lang
×
109
        msg += "Please add it in TRANSCRIPTION_MODEL_PARAM."
×
110
        change_encoding_step(video_to_encode.id, -1, msg)
×
111
        send_email(msg, video_to_encode.id)
×
112
    else:
113
        mp3file = (
1✔
114
            video_to_encode.get_video_mp3().source_file
115
            if video_to_encode.get_video_mp3()
116
            else None
117
        )
118
        if mp3file is None:
1✔
119
            msg += "\n no mp3 file found for video: %s." % video_to_encode.id
×
120
            change_encoding_step(video_to_encode.id, -1, msg)
×
121
            send_email(msg, video_to_encode.id)
×
122
        else:
123
            mp3filepath = mp3file.path
1✔
124
            if USE_REMOTE_ENCODING_TRANSCODING:
1✔
125
                start_transcripting_task.delay(
1✔
126
                    video_to_encode.id, mp3filepath, video_to_encode.duration, lang
127
                )
128
            else:
129
                msg, webvtt = start_transcripting(
×
130
                    mp3filepath, video_to_encode.duration, lang
131
                )
132
                save_vtt_and_notify(video_to_encode, msg, webvtt)
×
133
    add_encoding_log(video_to_encode.id, msg)
1✔
134

135

136
def save_vtt_and_notify(video_to_encode, msg, webvtt) -> None:
1✔
137
    """Call save vtt file function and notify by mail at the end."""
138
    msg += save_vtt(video_to_encode, webvtt)
×
139
    change_encoding_step(video_to_encode.id, 0, "done")
×
140
    video_to_encode.encoding_in_progress = False
×
141
    video_to_encode.save()
×
142
    # envois mail fin transcription
143
    if EMAIL_ON_TRANSCRIPTING_COMPLETION:
×
144
        send_email_transcript(video_to_encode)
×
145
    add_encoding_log(video_to_encode.id, msg)
×
146

147

148
def save_vtt(video: Video, webvtt: WebVTT, lang_code: str = None) -> str:
1✔
149
    """Save webvtt file with the video."""
150
    msg = "\nSAVE TRANSCRIPT WEBVTT : %s" % time.ctime()
×
151
    lang = lang_code if lang_code else video.transcript
×
152
    temp_vtt_file = NamedTemporaryFile(suffix=".vtt")
×
153
    webvtt.save(temp_vtt_file.name)
×
154
    if webvtt.captions:
×
155
        if TRANSCRIPTION_TYPE != "WHISPER":
×
156
            improve_captions_accessibility(webvtt)
×
157
        msg += "\nstore vtt file in bdd with CustomFileModel model file field"
×
158
        if __FILEPICKER__:
×
159
            video_dir = video.get_or_create_video_folder()
×
160
            """
161
            previousSubtitleFile = CustomFileModel.objects.filter(
162
                name__startswith="subtitle_%s" % lang,
163
                folder=video_dir,
164
                created_by=video.owner
165
            )
166
            """
167
            # for subt in previousSubtitleFile:
168
            #     subt.delete()
169
            subtitle_file, created = CustomFileModel.objects.get_or_create(
×
170
                name="subtitle_%s_%s" % (lang, time.strftime("%Y%m%d-%H%M%S")),
171
                folder=video_dir,
172
                created_by=video.owner,
173
            )
174
            if subtitle_file.file and os.path.isfile(subtitle_file.file.path):
×
175
                os.remove(subtitle_file.file.path)
×
176
        else:
177
            subtitle_file, created = CustomFileModel.objects.get_or_create()
×
178

179
        subtitle_file.file.save(
×
180
            "subtitle_%s_%s.vtt" % (lang, time.strftime("%Y%m%d-%H%M%S")),
181
            File(temp_vtt_file),
182
        )
183
        msg += "\nstore vtt file in bdd with Track model src field"
×
184

185
        subtitle_btt, created = Track.objects.get_or_create(video=video, lang=lang)
×
186
        subtitle_btt.src = subtitle_file
×
187
        subtitle_btt.lang = lang
×
188
        subtitle_btt.save()
×
189
    else:
190
        msg += "\nERROR SUBTITLES Output size is 0"
×
191
    return msg
×
192

193

194
def remove_unnecessary_spaces(text: str) -> str:
1✔
195
    """
196
    Remove unnecessary spaces from a string.
197

198
    Args:
199
        text (str): The string.
200

201
    Returns:
202
        str: The new string.
203
    """
204
    return " ".join(text.split())
×
205

206

207
def improve_captions_accessibility(
1✔
208
    webvtt, strict_accessibility=CAPTIONS_STRICT_ACCESSIBILITY
209
) -> None:
210
    """
211
    Parse the vtt file in argument to render the caption conform to accessibility.
212

213
    - see `https://github.com/knarf18/Bonnes-pratiques-du-sous-titrage/blob/master/Liste%20de%20bonnes%20pratiques.md`
214
    - 40 car maximum per line (CPL)
215
    - 2 lines max by caption
216

217
    Args:
218
        webvtt (:class:`webvtt.WebVTT`): The webvtt file content
219
        strict_accessibility (bool): If True, the caption will be more accessible
220

221
    """
222
    new_captions = []
×
223
    for caption in webvtt.captions:
×
224
        sent = split_string(caption.text, 40 if strict_accessibility else 55, sep=" ")
×
225
        # nb mots total
226
        nbTotWords = len(caption.text.split())
×
227
        if len(sent) > 2:
×
228
            num_captions = int(len(sent) / 2)
×
229
            if len(sent) % 2:
×
230
                num_captions += 1
×
231
            dur = caption.end_in_seconds - caption.start_in_seconds
×
232
            # On se positionne sur le point de départ en sec
233
            startTime = caption.start_in_seconds
×
234
            for x in range(num_captions):
×
235
                new_cap = Caption()
×
236
                new_cap.text = remove_unnecessary_spaces(get_cap_text(sent, x))
×
237
                # Durée d'affichage au prorata du nombre de mots
238
                timeCalc = dur * (len(new_cap.text.split()) / nbTotWords)
×
239
                new_cap.start = sec_to_timestamp(startTime)
×
240
                new_cap.end = sec_to_timestamp(startTime + timeCalc)
×
241
                startTime = startTime + timeCalc
×
242
                new_captions.append(new_cap)
×
243
        else:
244
            new_cap = Caption()
×
245
            new_cap.start = caption.start
×
246
            new_cap.end = caption.end
×
247
            new_cap.text = "\n".join(sent)
×
248
            new_captions.append(new_cap)
×
249
    # remove all old captions
250
    while len(webvtt.captions) > 0:
×
251
        del webvtt.captions[0]
×
252
    # add the new one
253
    for cap in new_captions:
×
254
        webvtt.captions.append(cap)
×
255
    webvtt.save()
×
256

257

258
def get_cap_text(sent, x):
1✔
259
    """
260
    Get the text in the sent array at the position gived in arg.
261

262
    Args:
263
        sent (list): The list of text
264
        x (int): The position to extract
265

266
    Returns:
267
        str: The extracted text
268
    """
269
    new_cap_text = sent[x * 2]
×
270
    try:
×
271
        new_cap_text += "\n" + sent[x * 2 + 1]
×
272
    except IndexError:
×
273
        pass
×
274
    return new_cap_text
×
275

276

277
def pad(line, limit):
1✔
278
    """
279
    Add some space at the end of line to specified limit.
280

281
    Args:
282
        line (str): A line of text
283
        limit (int): The size of line
284

285
    Returns:
286
        str: the line with space at the end
287
    """
288
    return line + " " * (limit - len(line))
×
289

290

291
def split_string(text, limit, sep=" "):
1✔
292
    """
293
    Split text by word for specified limit.
294

295
    Args:
296
        text (str): the text of the caption
297
        limit (int): size of line
298
        sep (str): default " "
299

300
    Returns:
301
        array: list of words in the text
302
    """
303
    words = text.split()
×
304
    if max(map(len, words)) > limit:
×
305
        raise ValueError("limit is too small")
×
306
    res = []
×
307
    part = words[0]
×
308
    others = words[1:]
×
309
    for word in others:
×
310
        if len(sep) + len(word) > limit - len(part):
×
311
            res.append(part)
×
312
            part = word
×
313
        else:
314
            part += sep + word
×
315
    if part:
×
316
        res.append(part)
×
317
    # add space to the end of line
318
    result = [pad(line, limit) for line in res]
×
319
    return result
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc