• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

EsupPortail / Esup-Pod / 8755927956

19 Apr 2024 03:21PM UTC coverage: 70.223%. First build
8755927956

Pull #1085

github

web-flow
[DONE] Update settings.py (#1115)

bump pod version to 3.6.0
Pull Request #1085: [DONE - FREEZE] Develop #3.6.0

744 of 994 new or added lines in 37 files covered. (74.85%)

10530 of 14995 relevant lines covered (70.22%)

0.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

30.0
/pod/video_encode_transcript/transcript.py
1
"""Esup-Pod transcript video functions."""
2

3
from django.conf import settings
1✔
4
from django.core.files import File
1✔
5
from pod.completion.models import Track
1✔
6
from pod.main.tasks import task_start_transcript
1✔
7
from webvtt import Caption
1✔
8

9
from .utils import (
1✔
10
    send_email,
11
    send_email_transcript,
12
    change_encoding_step,
13
    add_encoding_log,
14
)
15
from ..video.models import Video
1✔
16
import importlib.util
1✔
17

18
if (
1✔
19
    importlib.util.find_spec("vosk") is not None
20
    or importlib.util.find_spec("stt") is not None
21
    or importlib.util.find_spec("whisper") is not None
22
):
23
    from .transcript_model import start_transcripting
×
24

25

26
from .encoding_utils import sec_to_timestamp
1✔
27

28
import os
1✔
29
import time
1✔
30

31
from tempfile import NamedTemporaryFile
1✔
32

33
import threading
1✔
34
import logging
1✔
35

36
DEBUG = getattr(settings, "DEBUG", False)
1✔
37

38
if getattr(settings, "USE_PODFILE", False):
1✔
39
    __FILEPICKER__ = True
1✔
40
    from pod.podfile.models import CustomFileModel
1✔
41
    from pod.podfile.models import UserFolder
1✔
42
else:
43
    __FILEPICKER__ = False
×
44
    from pod.main.models import CustomFileModel
×
45

46
EMAIL_ON_TRANSCRIPTING_COMPLETION = getattr(
1✔
47
    settings, "EMAIL_ON_TRANSCRIPTING_COMPLETION", True
48
)
49
TRANSCRIPTION_MODEL_PARAM = getattr(settings, "TRANSCRIPTION_MODEL_PARAM", False)
1✔
50
USE_TRANSCRIPTION = getattr(settings, "USE_TRANSCRIPTION", False)
1✔
51
if USE_TRANSCRIPTION:
1✔
52
    TRANSCRIPTION_TYPE = getattr(settings, "TRANSCRIPTION_TYPE", "STT")
1✔
53
TRANSCRIPTION_NORMALIZE = getattr(settings, "TRANSCRIPTION_NORMALIZE", False)
1✔
54
CELERY_TO_ENCODE = getattr(settings, "CELERY_TO_ENCODE", False)
1✔
55

56
USE_REMOTE_ENCODING_TRANSCODING = getattr(
1✔
57
    settings, "USE_REMOTE_ENCODING_TRANSCODING", False
58
)
59
if USE_REMOTE_ENCODING_TRANSCODING:
1✔
60
    from .transcripting_tasks import start_transcripting_task
×
61

62
log = logging.getLogger(__name__)
1✔
63

64
"""
65
TO TEST IN THE SHELL -->
66
from pod.video.transcript import *
67
stt_model = get_model("fr")
68
msg, webvtt, all_text = main_stt_transcript(
69
    "/test/audio_192k_pod.mp3", # file
70
    177, # file duration
71
    stt_model # model stt loaded
72
)
73
print(webvtt)
74
"""
75

76

77
# ##########################################################################
78
# TRANSCRIPT VIDEO: THREAD TO LAUNCH TRANSCRIPT
79
# ##########################################################################
80
def start_transcript(video_id, threaded=True):
1✔
81
    """
82
    Call to start transcript main function.
83

84
    Will launch transcript mode depending on configuration.
85
    """
86
    if threaded:
1✔
87
        if CELERY_TO_ENCODE:
1✔
88
            task_start_transcript.delay(video_id)
×
89
        else:
90
            log.info("START TRANSCRIPT VIDEO %s" % video_id)
1✔
91
            t = threading.Thread(target=main_threaded_transcript, args=[video_id])
1✔
92
            t.setDaemon(True)
1✔
93
            t.start()
1✔
94
    else:
95
        main_threaded_transcript(video_id)
×
96

97

98
def main_threaded_transcript(video_to_encode_id):
1✔
99
    """
100
    Transcript main function.
101

102
    Will check all configuration and file and launch transcript.
103
    """
104
    change_encoding_step(video_to_encode_id, 5, "transcripting audio")
1✔
105

106
    video_to_encode = Video.objects.get(id=video_to_encode_id)
×
107
    video_to_encode.encoding_in_progress = True
×
108
    video_to_encode.save()
×
109
    msg = ""
×
110
    lang = video_to_encode.transcript
×
111
    # check if TRANSCRIPTION_MODEL_PARAM [lang] exist
112
    if not TRANSCRIPTION_MODEL_PARAM[TRANSCRIPTION_TYPE].get(lang):
×
113
        msg += "\n no stt model found for lang: %s." % lang
×
114
        msg += "Please add it in TRANSCRIPTION_MODEL_PARAM."
×
115
        change_encoding_step(video_to_encode.id, -1, msg)
×
116
        send_email(msg, video_to_encode.id)
×
117
    else:
118
        mp3file = (
×
119
            video_to_encode.get_video_mp3().source_file
120
            if video_to_encode.get_video_mp3()
121
            else None
122
        )
123
        if mp3file is None:
×
124
            msg += "\n no mp3 file found for video: %s." % video_to_encode.id
×
125
            change_encoding_step(video_to_encode.id, -1, msg)
×
126
            send_email(msg, video_to_encode.id)
×
127
        else:
128
            mp3filepath = mp3file.path
×
129
            if USE_REMOTE_ENCODING_TRANSCODING:
×
130
                start_transcripting_task.delay(
×
131
                    video_to_encode.id, mp3filepath, video_to_encode.duration, lang
132
                )
133
            else:
134
                msg, webvtt = start_transcripting(
×
135
                    mp3filepath, video_to_encode.duration, lang
136
                )
137
                save_vtt_and_notify(video_to_encode, msg, webvtt)
×
138
    add_encoding_log(video_to_encode.id, msg)
×
139

140

141
def save_vtt_and_notify(video_to_encode, msg, webvtt):
1✔
142
    """Call save vtt file function and notify by mail at the end."""
143
    msg += saveVTT(video_to_encode, webvtt)
×
144
    change_encoding_step(video_to_encode.id, 0, "done")
×
145
    video_to_encode.encoding_in_progress = False
×
146
    video_to_encode.save()
×
147
    # envois mail fin transcription
148
    if EMAIL_ON_TRANSCRIPTING_COMPLETION:
×
149
        send_email_transcript(video_to_encode)
×
150
    add_encoding_log(video_to_encode.id, msg)
×
151

152

153
def saveVTT(video, webvtt):
1✔
154
    """Save webvtt file with the video."""
NEW
155
    msg = "\nSAVE TRANSCRIPT WEBVTT: %s" % time.ctime()
×
156
    lang = video.transcript
×
157
    temp_vtt_file = NamedTemporaryFile(suffix=".vtt")
×
158
    webvtt.save(temp_vtt_file.name)
×
159
    if webvtt.captions:
×
160
        improveCaptionsAccessibility(webvtt)
×
161
        msg += "\nstore vtt file in bdd with CustomFileModel model file field"
×
162
        if __FILEPICKER__:
×
163
            videodir, created = UserFolder.objects.get_or_create(
×
164
                name="%s" % video.slug, owner=video.owner
165
            )
166
            """
167
            previousSubtitleFile = CustomFileModel.objects.filter(
168
                name__startswith="subtitle_%s" % lang,
169
                folder=videodir,
170
                created_by=video.owner
171
            )
172
            """
173
            # for subt in previousSubtitleFile:
174
            #     subt.delete()
175
            subtitleFile, created = CustomFileModel.objects.get_or_create(
×
176
                name="subtitle_%s_%s" % (lang, time.strftime("%Y%m%d-%H%M%S")),
177
                folder=videodir,
178
                created_by=video.owner,
179
            )
180
            if subtitleFile.file and os.path.isfile(subtitleFile.file.path):
×
181
                os.remove(subtitleFile.file.path)
×
182
        else:
183
            subtitleFile, created = CustomFileModel.objects.get_or_create()
×
184

185
        subtitleFile.file.save(
×
186
            "subtitle_%s_%s.vtt" % (lang, time.strftime("%Y%m%d-%H%M%S")),
187
            File(temp_vtt_file),
188
        )
189
        msg += "\nstore vtt file in bdd with Track model src field"
×
190

191
        subtitleVtt, created = Track.objects.get_or_create(video=video, lang=lang)
×
192
        subtitleVtt.src = subtitleFile
×
193
        subtitleVtt.lang = lang
×
194
        subtitleVtt.save()
×
195
    else:
196
        msg += "\nERROR SUBTITLES Output size is 0"
×
197
    return msg
×
198

199

200
def improveCaptionsAccessibility(webvtt):
1✔
201
    """
202
    Parse the vtt file in argument to render the caption conform to accessibility.
203

204
    - see `https://github.com/knarf18/Bonnes-pratiques-du-sous-titrage/blob/master/Liste%20de%20bonnes%20pratiques.md` # noqa: E501
205
    - 40 car maximum per ligne (CPL)
206
    - 2 lines max by caption
207

208
    Args:
209
        webvtt (:class:`webvtt.WebVTT`): the webvtt file content
210

211
    """
212
    new_captions = []
×
213
    for caption in webvtt.captions:
×
214
        sent = split_string(caption.text, 40, sep=" ")
×
215
        # nb mots total
216
        nbTotWords = len(caption.text.split())
×
217
        if len(sent) > 2:
×
218
            num_captions = int(len(sent) / 2)
×
219
            if len(sent) % 2:
×
220
                num_captions += 1
×
221
            dur = caption.end_in_seconds - caption.start_in_seconds
×
222
            # On se positionne sur le point de départ en sec
223
            startTime = caption.start_in_seconds
×
224
            for x in range(num_captions):
×
225
                new_cap = Caption()
×
226
                new_cap.text = get_cap_text(sent, x)
×
227
                # Durée d'affichage au prorata du nombre de mots
228
                timeCalc = dur * (len(new_cap.text.split()) / nbTotWords)
×
229
                new_cap.start = sec_to_timestamp(startTime)
×
230
                new_cap.end = sec_to_timestamp(startTime + timeCalc)
×
231
                startTime = startTime + timeCalc
×
232
                new_captions.append(new_cap)
×
233
        else:
234
            new_cap = Caption()
×
235
            new_cap.start = caption.start
×
236
            new_cap.end = caption.end
×
237
            new_cap.text = "\n".join(sent)
×
238
            new_captions.append(new_cap)
×
239
    # remove all old captions
240
    while len(webvtt.captions) > 0:
×
241
        del webvtt.captions[0]
×
242
    # add the new one
243
    for cap in new_captions:
×
244
        webvtt.captions.append(cap)
×
245
    webvtt.save()
×
246

247

248
def get_cap_text(sent, x):
1✔
249
    """
250
    Get the text in the sent array at the position gived in arg.
251

252
    Args:
253
        sent (list): The list of text
254
        x (int): The position to extract
255

256
    Returns:
257
        str: The extracted text
258
    """
259
    new_cap_text = sent[x * 2]
×
260
    try:
×
261
        new_cap_text += "\n" + sent[x * 2 + 1]
×
262
    except IndexError:
×
263
        pass
×
264
    return new_cap_text
×
265

266

267
def pad(line, limit):
1✔
268
    """
269
    Add some space at the end of line to specified limit.
270

271
    Args:
272
        line (str): A line of text
273
        limit (int): The size of line
274

275
    Returns:
276
        str: the line with space at the end
277
    """
278
    return line + " " * (limit - len(line))
×
279

280

281
def split_string(text, limit, sep=" "):
1✔
282
    """
283
    Split text by word for specified limit.
284

285
    Args:
286
        text (str): the text of the caption
287
        limit (int): size of line
288
        sep (str): default " "
289

290
    Returns:
291
        array: list of words in the text
292
    """
293
    words = text.split()
×
294
    if max(map(len, words)) > limit:
×
295
        raise ValueError("limit is too small")
×
296
    res = []
×
297
    part = words[0]
×
298
    others = words[1:]
×
299
    for word in others:
×
300
        if len(sep) + len(word) > limit - len(part):
×
301
            res.append(part)
×
302
            part = word
×
303
        else:
304
            part += sep + word
×
305
    if part:
×
306
        res.append(part)
×
307
    # add space to the end of line
308
    result = [pad(line, limit) for line in res]
×
309
    return result
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc