6377635546

Committed 02 Oct 2023 08:21AM UTC coverage: 70.396% (-1.6%) from 71.99%

Build # 6377635546

Build Type

push

github

Committed by

web-flow

Commit Message

Merge pull request #900 from EsupPortail/develop

[DONE] #3.4.0

Run Details

1509 of 1509 new or added lines in 58 files covered. (100.0%)

9288 of 13194 relevant lines covered (70.4%)

0.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/pod/video_encode_transcript/transcript.py

from django.conf import settings
from django.core.files import File
from pod.completion.models import Track
from pod.main.tasks import task_start_transcript

from .utils import (
    send_email,
    send_email_transcript,
    change_encoding_step,
    add_encoding_log,
)
from ..video.models import Video
import importlib.util

if (
    importlib.util.find_spec("vosk") is not None
    or importlib.util.find_spec("stt") is not None
):
    from .transcript_model import start_transcripting

import os
import time

from tempfile import NamedTemporaryFile

import threading
import logging

DEBUG = getattr(settings, "DEBUG", False)

if getattr(settings, "USE_PODFILE", False):
    __FILEPICKER__ = True
    from pod.podfile.models import CustomFileModel
    from pod.podfile.models import UserFolder
else:
    __FILEPICKER__ = False
    from pod.main.models import CustomFileModel

EMAIL_ON_TRANSCRIPTING_COMPLETION = getattr(
    settings, "EMAIL_ON_TRANSCRIPTING_COMPLETION", True
)
TRANSCRIPTION_MODEL_PARAM = getattr(settings, "TRANSCRIPTION_MODEL_PARAM", False)
USE_TRANSCRIPTION = getattr(settings, "USE_TRANSCRIPTION", False)
if USE_TRANSCRIPTION:
    TRANSCRIPTION_TYPE = getattr(settings, "TRANSCRIPTION_TYPE", "VOSK")
TRANSCRIPTION_NORMALIZE = getattr(settings, "TRANSCRIPTION_NORMALIZE", False)
CELERY_TO_ENCODE = getattr(settings, "CELERY_TO_ENCODE", False)

USE_DISTANT_ENCODING_TRANSCODING = getattr(
    settings, "USE_DISTANT_ENCODING_TRANSCODING", False
)
if USE_DISTANT_ENCODING_TRANSCODING:
    from .transcripting_tasks import start_transcripting_task

log = logging.getLogger(__name__)

"""
TO TEST IN THE SHELL -->
from pod.video.transcript import *
stt_model = get_model("fr")
msg, webvtt, all_text = main_stt_transcript(
    "/test/audio_192k_pod.mp3", # file
    177, # file duration
    stt_model # model stt loaded
)
print(webvtt)
"""


# ##########################################################################
# TRANSCRIPT VIDEO: THREAD TO LAUNCH TRANSCRIPT
# ##########################################################################
def start_transcript(video_id, threaded=True):
    """
    Main function call to start transcript.
    Will launch transcript mode depending on configuration.
    """
    if threaded:
        if CELERY_TO_ENCODE:
            task_start_transcript.delay(video_id)
        else:
            log.info("START TRANSCRIPT VIDEO %s" % video_id)
            t = threading.Thread(target=main_threaded_transcript, args=[video_id])
            t.setDaemon(True)
            t.start()
    else:
        main_threaded_transcript(video_id)


def main_threaded_transcript(video_to_encode_id):
    """
    Main function to transcript.
    Will check all configuration and file and launch transcript.
    """
    change_encoding_step(video_to_encode_id, 5, "transcripting audio")

    video_to_encode = Video.objects.get(id=video_to_encode_id)

    msg = ""
    lang = video_to_encode.transcript
    # check if TRANSCRIPTION_MODEL_PARAM [lang] exist
    if not TRANSCRIPTION_MODEL_PARAM[TRANSCRIPTION_TYPE].get(lang):
        msg += "\n no stt model found for lang:%s." % lang
        msg += "Please add it in TRANSCRIPTION_MODEL_PARAM."
        change_encoding_step(video_to_encode.id, -1, msg)
        send_email(msg, video_to_encode.id)
    else:
        mp3file = (
            video_to_encode.get_video_mp3().source_file
            if video_to_encode.get_video_mp3()
            else None
        )
        if mp3file is None:
            msg += "\n no mp3 file found for video :%s." % video_to_encode.id
            change_encoding_step(video_to_encode.id, -1, msg)
            send_email(msg, video_to_encode.id)
        else:
            mp3filepath = mp3file.path
            if USE_DISTANT_ENCODING_TRANSCODING:
                start_transcripting_task.delay(
                    video_to_encode.id, mp3filepath, video_to_encode.duration, lang
                )
            else:
                msg, webvtt = start_transcripting(
                    mp3filepath, video_to_encode.duration, lang
                )
                save_vtt_and_notify(video_to_encode, msg, webvtt)
    add_encoding_log(video_to_encode.id, msg)


def save_vtt_and_notify(video_to_encode, msg, webvtt):
    """Call save vtt file function and notify by mail at the end."""
    msg += saveVTT(video_to_encode, webvtt)
    change_encoding_step(video_to_encode.id, 0, "done")
    # envois mail fin transcription
    if EMAIL_ON_TRANSCRIPTING_COMPLETION:
        send_email_transcript(video_to_encode)
    add_encoding_log(video_to_encode.id, msg)


def saveVTT(video, webvtt):
    """Save webvtt file with the video."""
    msg = "\nSAVE TRANSCRIPT WEBVTT : %s" % time.ctime()
    lang = video.transcript
    temp_vtt_file = NamedTemporaryFile(suffix=".vtt")
    webvtt.save(temp_vtt_file.name)
    if webvtt.captions:
        msg += "\nstore vtt file in bdd with CustomFileModel model file field"
        if __FILEPICKER__:
            videodir, created = UserFolder.objects.get_or_create(
                name="%s" % video.slug, owner=video.owner
            )
            """
            previousSubtitleFile = CustomFileModel.objects.filter(
                name__startswith="subtitle_%s" % lang,
                folder=videodir,
                created_by=video.owner
            )
            """
            # for subt in previousSubtitleFile:
            #     subt.delete()
            subtitleFile, created = CustomFileModel.objects.get_or_create(
                name="subtitle_%s_%s" % (lang, time.strftime("%Y%m%d-%H%M%S")),
                folder=videodir,
                created_by=video.owner,
            )
            if subtitleFile.file and os.path.isfile(subtitleFile.file.path):
                os.remove(subtitleFile.file.path)
        else:
            subtitleFile, created = CustomFileModel.objects.get_or_create()

        subtitleFile.file.save(
            "subtitle_%s_%s.vtt" % (lang, time.strftime("%Y%m%d-%H%M%S")),
            File(temp_vtt_file),
        )
        msg += "\nstore vtt file in bdd with Track model src field"

        subtitleVtt, created = Track.objects.get_or_create(video=video, lang=lang)
        subtitleVtt.src = subtitleFile
        subtitleVtt.lang = lang
        subtitleVtt.save()
    else:
        msg += "\nERROR SUBTITLES Output size is 0"
    return msg

1	from django.conf import settings	×
2	from django.core.files import File	×
3	from pod.completion.models import Track	×
4	from pod.main.tasks import task_start_transcript	×
5
6	from .utils import (	×
7	send_email,
8	send_email_transcript,
9	change_encoding_step,
10	add_encoding_log,
11	)
12	from ..video.models import Video	×
13	import importlib.util	×
14
15	if (	×
16	importlib.util.find_spec("vosk") is not None
17	or importlib.util.find_spec("stt") is not None
18	):
19	from .transcript_model import start_transcripting	×
20
21	import os	×
22	import time	×
23
24	from tempfile import NamedTemporaryFile	×
25
26	import threading	×
27	import logging	×
28
29	DEBUG = getattr(settings, "DEBUG", False)	×
30
31	if getattr(settings, "USE_PODFILE", False):	×
32	__FILEPICKER__ = True	×
33	from pod.podfile.models import CustomFileModel	×
34	from pod.podfile.models import UserFolder	×
35	else:
36	__FILEPICKER__ = False	×
37	from pod.main.models import CustomFileModel	×
38
39	EMAIL_ON_TRANSCRIPTING_COMPLETION = getattr(	×
40	settings, "EMAIL_ON_TRANSCRIPTING_COMPLETION", True
41	)
42	TRANSCRIPTION_MODEL_PARAM = getattr(settings, "TRANSCRIPTION_MODEL_PARAM", False)	×
43	USE_TRANSCRIPTION = getattr(settings, "USE_TRANSCRIPTION", False)	×
44	if USE_TRANSCRIPTION:	×
45	TRANSCRIPTION_TYPE = getattr(settings, "TRANSCRIPTION_TYPE", "VOSK")	×
46	TRANSCRIPTION_NORMALIZE = getattr(settings, "TRANSCRIPTION_NORMALIZE", False)	×
47	CELERY_TO_ENCODE = getattr(settings, "CELERY_TO_ENCODE", False)	×
48
49	USE_DISTANT_ENCODING_TRANSCODING = getattr(	×
50	settings, "USE_DISTANT_ENCODING_TRANSCODING", False
51	)
52	if USE_DISTANT_ENCODING_TRANSCODING:	×
53	from .transcripting_tasks import start_transcripting_task	×
54
55	log = logging.getLogger(__name__)	×
56
57	"""
58	TO TEST IN THE SHELL -->
59	from pod.video.transcript import *
60	stt_model = get_model("fr")
61	msg, webvtt, all_text = main_stt_transcript(
62	"/test/audio_192k_pod.mp3", # file
63	177, # file duration
64	stt_model # model stt loaded
65	)
66	print(webvtt)
67	"""
68
69
70	# ##########################################################################
71	# TRANSCRIPT VIDEO: THREAD TO LAUNCH TRANSCRIPT
72	# ##########################################################################
73	def start_transcript(video_id, threaded=True):	×
74	"""
75	Main function call to start transcript.
76	Will launch transcript mode depending on configuration.
77	"""
78	if threaded:	×
79	if CELERY_TO_ENCODE:	×
80	task_start_transcript.delay(video_id)	×
81	else:
82	log.info("START TRANSCRIPT VIDEO %s" % video_id)	×
83	t = threading.Thread(target=main_threaded_transcript, args=[video_id])	×
84	t.setDaemon(True)	×
85	t.start()	×
86	else:
87	main_threaded_transcript(video_id)	×
88
89
90	def main_threaded_transcript(video_to_encode_id):	×
91	"""
92	Main function to transcript.
93	Will check all configuration and file and launch transcript.
94	"""
95	change_encoding_step(video_to_encode_id, 5, "transcripting audio")	×
96
97	video_to_encode = Video.objects.get(id=video_to_encode_id)	×
98
99	msg = ""	×
100	lang = video_to_encode.transcript	×
101	# check if TRANSCRIPTION_MODEL_PARAM [lang] exist
102	if not TRANSCRIPTION_MODEL_PARAM[TRANSCRIPTION_TYPE].get(lang):	×
103	msg += "\n no stt model found for lang:%s." % lang	×
104	msg += "Please add it in TRANSCRIPTION_MODEL_PARAM."	×
105	change_encoding_step(video_to_encode.id, -1, msg)	×
106	send_email(msg, video_to_encode.id)	×
107	else:
108	mp3file = (	×
109	video_to_encode.get_video_mp3().source_file
110	if video_to_encode.get_video_mp3()
111	else None
112	)
113	if mp3file is None:	×
114	msg += "\n no mp3 file found for video :%s." % video_to_encode.id	×
115	change_encoding_step(video_to_encode.id, -1, msg)	×
116	send_email(msg, video_to_encode.id)	×
117	else:
118	mp3filepath = mp3file.path	×
119	if USE_DISTANT_ENCODING_TRANSCODING:	×
120	start_transcripting_task.delay(	×
121	video_to_encode.id, mp3filepath, video_to_encode.duration, lang
122	)
123	else:
124	msg, webvtt = start_transcripting(	×
125	mp3filepath, video_to_encode.duration, lang
126	)
127	save_vtt_and_notify(video_to_encode, msg, webvtt)	×
128	add_encoding_log(video_to_encode.id, msg)	×
129
130
131	def save_vtt_and_notify(video_to_encode, msg, webvtt):	×
132	"""Call save vtt file function and notify by mail at the end."""
133	msg += saveVTT(video_to_encode, webvtt)	×
134	change_encoding_step(video_to_encode.id, 0, "done")	×
135	# envois mail fin transcription
136	if EMAIL_ON_TRANSCRIPTING_COMPLETION:	×
137	send_email_transcript(video_to_encode)	×
138	add_encoding_log(video_to_encode.id, msg)	×
139
140
141	def saveVTT(video, webvtt):	×
142	"""Save webvtt file with the video."""
143	msg = "\nSAVE TRANSCRIPT WEBVTT : %s" % time.ctime()	×
144	lang = video.transcript	×
145	temp_vtt_file = NamedTemporaryFile(suffix=".vtt")	×
146	webvtt.save(temp_vtt_file.name)	×
147	if webvtt.captions:	×
148	msg += "\nstore vtt file in bdd with CustomFileModel model file field"	×
149	if __FILEPICKER__:	×
150	videodir, created = UserFolder.objects.get_or_create(	×
151	name="%s" % video.slug, owner=video.owner
152	)
153	"""
154	previousSubtitleFile = CustomFileModel.objects.filter(
155	name__startswith="subtitle_%s" % lang,
156	folder=videodir,
157	created_by=video.owner
158	)
159	"""
160	# for subt in previousSubtitleFile:
161	# subt.delete()
162	subtitleFile, created = CustomFileModel.objects.get_or_create(	×
163	name="subtitle_%s_%s" % (lang, time.strftime("%Y%m%d-%H%M%S")),
164	folder=videodir,
165	created_by=video.owner,
166	)
167	if subtitleFile.file and os.path.isfile(subtitleFile.file.path):	×
168	os.remove(subtitleFile.file.path)	×
169	else:
170	subtitleFile, created = CustomFileModel.objects.get_or_create()	×
171
172	subtitleFile.file.save(	×
173	"subtitle_%s_%s.vtt" % (lang, time.strftime("%Y%m%d-%H%M%S")),
174	File(temp_vtt_file),
175	)
176	msg += "\nstore vtt file in bdd with Track model src field"	×
177
178	subtitleVtt, created = Track.objects.get_or_create(video=video, lang=lang)	×
179	subtitleVtt.src = subtitleFile	×
180	subtitleVtt.lang = lang	×
181	subtitleVtt.save()	×
182	else:
183	msg += "\nERROR SUBTITLES Output size is 0"	×
184	return msg	×

EsupPortail / Esup-Pod / 6377635546

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous