6611338684

Committed 23 Oct 2023 09:27AM UTC coverage: 70.317% (+0.01%) from 70.305%

Build # 6611338684

Build Type

push

github

Committed by

web-flow

Commit Message

Merge pull request #971 from EsupPortail/develop

[DONE] Develop #3.4.1

Run Details

49 of 49 new or added lines in 10 files covered. (100.0%)

9298 of 13223 relevant lines covered (70.32%)

0.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/pod/video_encode_transcript/transcript.py

"""Esup-Pod transcript video functions."""
from django.conf import settings
from django.core.files import File
from pod.completion.models import Track
from pod.main.tasks import task_start_transcript

from .utils import (
    send_email,
    send_email_transcript,
    change_encoding_step,
    add_encoding_log,
)
from ..video.models import Video
import importlib.util

if (
    importlib.util.find_spec("vosk") is not None
    or importlib.util.find_spec("stt") is not None
):
    from .transcript_model import start_transcripting

import os
import time

from tempfile import NamedTemporaryFile

import threading
import logging

DEBUG = getattr(settings, "DEBUG", False)

if getattr(settings, "USE_PODFILE", False):
    __FILEPICKER__ = True
    from pod.podfile.models import CustomFileModel
    from pod.podfile.models import UserFolder
else:
    __FILEPICKER__ = False
    from pod.main.models import CustomFileModel

EMAIL_ON_TRANSCRIPTING_COMPLETION = getattr(
    settings, "EMAIL_ON_TRANSCRIPTING_COMPLETION", True
)
TRANSCRIPTION_MODEL_PARAM = getattr(settings, "TRANSCRIPTION_MODEL_PARAM", False)
USE_TRANSCRIPTION = getattr(settings, "USE_TRANSCRIPTION", False)
if USE_TRANSCRIPTION:
    TRANSCRIPTION_TYPE = getattr(settings, "TRANSCRIPTION_TYPE", "STT")
TRANSCRIPTION_NORMALIZE = getattr(settings, "TRANSCRIPTION_NORMALIZE", False)
CELERY_TO_ENCODE = getattr(settings, "CELERY_TO_ENCODE", False)

USE_DISTANT_ENCODING_TRANSCODING = getattr(
    settings, "USE_DISTANT_ENCODING_TRANSCODING", False
)
if USE_DISTANT_ENCODING_TRANSCODING:
    from .transcripting_tasks import start_transcripting_task

log = logging.getLogger(__name__)

"""
TO TEST IN THE SHELL -->
from pod.video.transcript import *
stt_model = get_model("fr")
msg, webvtt, all_text = main_stt_transcript(
    "/test/audio_192k_pod.mp3", # file
    177, # file duration
    stt_model # model stt loaded
)
print(webvtt)
"""


# ##########################################################################
# TRANSCRIPT VIDEO: THREAD TO LAUNCH TRANSCRIPT
# ##########################################################################
def start_transcript(video_id, threaded=True):
    """
    Call to start transcript main function.

    Will launch transcript mode depending on configuration.
    """
    if threaded:
        if CELERY_TO_ENCODE:
            task_start_transcript.delay(video_id)
        else:
            log.info("START TRANSCRIPT VIDEO %s" % video_id)
            t = threading.Thread(target=main_threaded_transcript, args=[video_id])
            t.setDaemon(True)
            t.start()
    else:
        main_threaded_transcript(video_id)


def main_threaded_transcript(video_to_encode_id):
    """
    Transcript main function.

    Will check all configuration and file and launch transcript.
    """
    change_encoding_step(video_to_encode_id, 5, "transcripting audio")

    video_to_encode = Video.objects.get(id=video_to_encode_id)

    msg = ""
    lang = video_to_encode.transcript
    # check if TRANSCRIPTION_MODEL_PARAM [lang] exist
    if not TRANSCRIPTION_MODEL_PARAM[TRANSCRIPTION_TYPE].get(lang):
        msg += "\n no stt model found for lang: %s." % lang
        msg += "Please add it in TRANSCRIPTION_MODEL_PARAM."
        change_encoding_step(video_to_encode.id, -1, msg)
        send_email(msg, video_to_encode.id)
    else:
        mp3file = (
            video_to_encode.get_video_mp3().source_file
            if video_to_encode.get_video_mp3()
            else None
        )
        if mp3file is None:
            msg += "\n no mp3 file found for video: %s." % video_to_encode.id
            change_encoding_step(video_to_encode.id, -1, msg)
            send_email(msg, video_to_encode.id)
        else:
            mp3filepath = mp3file.path
            if USE_DISTANT_ENCODING_TRANSCODING:
                start_transcripting_task.delay(
                    video_to_encode.id, mp3filepath, video_to_encode.duration, lang
                )
            else:
                msg, webvtt = start_transcripting(
                    mp3filepath, video_to_encode.duration, lang
                )
                save_vtt_and_notify(video_to_encode, msg, webvtt)
    add_encoding_log(video_to_encode.id, msg)


def save_vtt_and_notify(video_to_encode, msg, webvtt):
    """Call save vtt file function and notify by mail at the end."""
    msg += saveVTT(video_to_encode, webvtt)
    change_encoding_step(video_to_encode.id, 0, "done")
    # envois mail fin transcription
    if EMAIL_ON_TRANSCRIPTING_COMPLETION:
        send_email_transcript(video_to_encode)
    add_encoding_log(video_to_encode.id, msg)


def saveVTT(video, webvtt):
    """Save webvtt file with the video."""
    msg = "\nSAVE TRANSCRIPT WEBVTT : %s" % time.ctime()
    lang = video.transcript
    temp_vtt_file = NamedTemporaryFile(suffix=".vtt")
    webvtt.save(temp_vtt_file.name)
    if webvtt.captions:
        msg += "\nstore vtt file in bdd with CustomFileModel model file field"
        if __FILEPICKER__:
            videodir, created = UserFolder.objects.get_or_create(
                name="%s" % video.slug, owner=video.owner
            )
            """
            previousSubtitleFile = CustomFileModel.objects.filter(
                name__startswith="subtitle_%s" % lang,
                folder=videodir,
                created_by=video.owner
            )
            """
            # for subt in previousSubtitleFile:
            #     subt.delete()
            subtitleFile, created = CustomFileModel.objects.get_or_create(
                name="subtitle_%s_%s" % (lang, time.strftime("%Y%m%d-%H%M%S")),
                folder=videodir,
                created_by=video.owner,
            )
            if subtitleFile.file and os.path.isfile(subtitleFile.file.path):
                os.remove(subtitleFile.file.path)
        else:
            subtitleFile, created = CustomFileModel.objects.get_or_create()

        subtitleFile.file.save(
            "subtitle_%s_%s.vtt" % (lang, time.strftime("%Y%m%d-%H%M%S")),
            File(temp_vtt_file),
        )
        msg += "\nstore vtt file in bdd with Track model src field"

        subtitleVtt, created = Track.objects.get_or_create(video=video, lang=lang)
        subtitleVtt.src = subtitleFile
        subtitleVtt.lang = lang
        subtitleVtt.save()
    else:
        msg += "\nERROR SUBTITLES Output size is 0"
    return msg

1	"""Esup-Pod transcript video functions."""
2	from django.conf import settings	×
3	from django.core.files import File	×
4	from pod.completion.models import Track	×
5	from pod.main.tasks import task_start_transcript	×
6
7	from .utils import (	×
8	send_email,
9	send_email_transcript,
10	change_encoding_step,
11	add_encoding_log,
12	)
13	from ..video.models import Video	×
14	import importlib.util	×
15
16	if (	×
17	importlib.util.find_spec("vosk") is not None
18	or importlib.util.find_spec("stt") is not None
19	):
20	from .transcript_model import start_transcripting	×
21
22	import os	×
23	import time	×
24
25	from tempfile import NamedTemporaryFile	×
26
27	import threading	×
28	import logging	×
29
30	DEBUG = getattr(settings, "DEBUG", False)	×
31
32	if getattr(settings, "USE_PODFILE", False):	×
33	__FILEPICKER__ = True	×
34	from pod.podfile.models import CustomFileModel	×
35	from pod.podfile.models import UserFolder	×
36	else:
37	__FILEPICKER__ = False	×
38	from pod.main.models import CustomFileModel	×
39
40	EMAIL_ON_TRANSCRIPTING_COMPLETION = getattr(	×
41	settings, "EMAIL_ON_TRANSCRIPTING_COMPLETION", True
42	)
43	TRANSCRIPTION_MODEL_PARAM = getattr(settings, "TRANSCRIPTION_MODEL_PARAM", False)	×
44	USE_TRANSCRIPTION = getattr(settings, "USE_TRANSCRIPTION", False)	×
45	if USE_TRANSCRIPTION:	×
46	TRANSCRIPTION_TYPE = getattr(settings, "TRANSCRIPTION_TYPE", "STT")	×
47	TRANSCRIPTION_NORMALIZE = getattr(settings, "TRANSCRIPTION_NORMALIZE", False)	×
48	CELERY_TO_ENCODE = getattr(settings, "CELERY_TO_ENCODE", False)	×
49
50	USE_DISTANT_ENCODING_TRANSCODING = getattr(	×
51	settings, "USE_DISTANT_ENCODING_TRANSCODING", False
52	)
53	if USE_DISTANT_ENCODING_TRANSCODING:	×
54	from .transcripting_tasks import start_transcripting_task	×
55
56	log = logging.getLogger(__name__)	×
57
58	"""
59	TO TEST IN THE SHELL -->
60	from pod.video.transcript import *
61	stt_model = get_model("fr")
62	msg, webvtt, all_text = main_stt_transcript(
63	"/test/audio_192k_pod.mp3", # file
64	177, # file duration
65	stt_model # model stt loaded
66	)
67	print(webvtt)
68	"""
69
70
71	# ##########################################################################
72	# TRANSCRIPT VIDEO: THREAD TO LAUNCH TRANSCRIPT
73	# ##########################################################################
74	def start_transcript(video_id, threaded=True):	×
75	"""
76	Call to start transcript main function.
77
78	Will launch transcript mode depending on configuration.
79	"""
80	if threaded:	×
81	if CELERY_TO_ENCODE:	×
82	task_start_transcript.delay(video_id)	×
83	else:
84	log.info("START TRANSCRIPT VIDEO %s" % video_id)	×
85	t = threading.Thread(target=main_threaded_transcript, args=[video_id])	×
86	t.setDaemon(True)	×
87	t.start()	×
88	else:
89	main_threaded_transcript(video_id)	×
90
91
92	def main_threaded_transcript(video_to_encode_id):	×
93	"""
94	Transcript main function.
95
96	Will check all configuration and file and launch transcript.
97	"""
98	change_encoding_step(video_to_encode_id, 5, "transcripting audio")	×
99
100	video_to_encode = Video.objects.get(id=video_to_encode_id)	×
101
102	msg = ""	×
103	lang = video_to_encode.transcript	×
104	# check if TRANSCRIPTION_MODEL_PARAM [lang] exist
105	if not TRANSCRIPTION_MODEL_PARAM[TRANSCRIPTION_TYPE].get(lang):	×
106	msg += "\n no stt model found for lang: %s." % lang	×
107	msg += "Please add it in TRANSCRIPTION_MODEL_PARAM."	×
108	change_encoding_step(video_to_encode.id, -1, msg)	×
109	send_email(msg, video_to_encode.id)	×
110	else:
111	mp3file = (	×
112	video_to_encode.get_video_mp3().source_file
113	if video_to_encode.get_video_mp3()
114	else None
115	)
116	if mp3file is None:	×
117	msg += "\n no mp3 file found for video: %s." % video_to_encode.id	×
118	change_encoding_step(video_to_encode.id, -1, msg)	×
119	send_email(msg, video_to_encode.id)	×
120	else:
121	mp3filepath = mp3file.path	×
122	if USE_DISTANT_ENCODING_TRANSCODING:	×
123	start_transcripting_task.delay(	×
124	video_to_encode.id, mp3filepath, video_to_encode.duration, lang
125	)
126	else:
127	msg, webvtt = start_transcripting(	×
128	mp3filepath, video_to_encode.duration, lang
129	)
130	save_vtt_and_notify(video_to_encode, msg, webvtt)	×
131	add_encoding_log(video_to_encode.id, msg)	×
132
133
134	def save_vtt_and_notify(video_to_encode, msg, webvtt):	×
135	"""Call save vtt file function and notify by mail at the end."""
136	msg += saveVTT(video_to_encode, webvtt)	×
137	change_encoding_step(video_to_encode.id, 0, "done")	×
138	# envois mail fin transcription
139	if EMAIL_ON_TRANSCRIPTING_COMPLETION:	×
140	send_email_transcript(video_to_encode)	×
141	add_encoding_log(video_to_encode.id, msg)	×
142
143
144	def saveVTT(video, webvtt):	×
145	"""Save webvtt file with the video."""
146	msg = "\nSAVE TRANSCRIPT WEBVTT : %s" % time.ctime()	×
147	lang = video.transcript	×
148	temp_vtt_file = NamedTemporaryFile(suffix=".vtt")	×
149	webvtt.save(temp_vtt_file.name)	×
150	if webvtt.captions:	×
151	msg += "\nstore vtt file in bdd with CustomFileModel model file field"	×
152	if __FILEPICKER__:	×
153	videodir, created = UserFolder.objects.get_or_create(	×
154	name="%s" % video.slug, owner=video.owner
155	)
156	"""
157	previousSubtitleFile = CustomFileModel.objects.filter(
158	name__startswith="subtitle_%s" % lang,
159	folder=videodir,
160	created_by=video.owner
161	)
162	"""
163	# for subt in previousSubtitleFile:
164	# subt.delete()
165	subtitleFile, created = CustomFileModel.objects.get_or_create(	×
166	name="subtitle_%s_%s" % (lang, time.strftime("%Y%m%d-%H%M%S")),
167	folder=videodir,
168	created_by=video.owner,
169	)
170	if subtitleFile.file and os.path.isfile(subtitleFile.file.path):	×
171	os.remove(subtitleFile.file.path)	×
172	else:
173	subtitleFile, created = CustomFileModel.objects.get_or_create()	×
174
175	subtitleFile.file.save(	×
176	"subtitle_%s_%s.vtt" % (lang, time.strftime("%Y%m%d-%H%M%S")),
177	File(temp_vtt_file),
178	)
179	msg += "\nstore vtt file in bdd with Track model src field"	×
180
181	subtitleVtt, created = Track.objects.get_or_create(video=video, lang=lang)	×
182	subtitleVtt.src = subtitleFile	×
183	subtitleVtt.lang = lang	×
184	subtitleVtt.save()	×
185	else:
186	msg += "\nERROR SUBTITLES Output size is 0"	×
187	return msg	×

EsupPortail / Esup-Pod / 6611338684

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous