5462629459

pending completion

Build # 5462629459

Build Type

Pull #899

github

Committed by

web-flow

Commit Message

Merge e759dacb6 into c94f0e331

Pull Request Pull Request #899: [DONE] Ptitloup/feature new encoding

Run Details

400 of 400 new or added lines in 14 files covered. (100.0%)

8968 of 12675 relevant lines covered (70.75%)

0.71 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/pod/video_encode_transcript/transcript.py

from django.conf import settings
from django.core.files import File
from pod.completion.models import Track
from pod.main.tasks import task_start_transcript

from .utils import (
    send_email,
    send_email_transcript,
    change_encoding_step,
    add_encoding_log,
)
from ..video.models import Video
import importlib.util
if (
    importlib.util.find_spec("vosk") is not None
    or importlib.util.find_spec("stt") is not None
):
    from .transcript_model import start_transcripting

import os
import time

from tempfile import NamedTemporaryFile

import threading
import logging

DEBUG = getattr(settings, "DEBUG", False)

if getattr(settings, "USE_PODFILE", False):
    __FILEPICKER__ = True
    from pod.podfile.models import CustomFileModel
    from pod.podfile.models import UserFolder
else:
    __FILEPICKER__ = False
    from pod.main.models import CustomFileModel

EMAIL_ON_TRANSCRIPTING_COMPLETION = getattr(
    settings, "EMAIL_ON_TRANSCRIPTING_COMPLETION", True
)
TRANSCRIPTION_MODEL_PARAM = getattr(settings, "TRANSCRIPTION_MODEL_PARAM", False)
USE_TRANSCRIPTION = getattr(settings, "USE_TRANSCRIPTION", False)
if USE_TRANSCRIPTION:
    TRANSCRIPTION_TYPE = getattr(settings, "TRANSCRIPTION_TYPE", "VOSK")
TRANSCRIPTION_NORMALIZE = getattr(settings, "TRANSCRIPTION_NORMALIZE", False)
CELERY_TO_ENCODE = getattr(settings, "CELERY_TO_ENCODE", False)

USE_DISTANT_ENCODING_TRANSCODING = getattr(
    settings,
    "USE_DISTANT_ENCODING_TRANSCODING",
    False
)
if USE_DISTANT_ENCODING_TRANSCODING:
    from .transcripting_tasks import start_transcripting_task

log = logging.getLogger(__name__)

"""
TO TEST IN THE SHELL -->
from pod.video.transcript import *
stt_model = get_model("fr")
msg, webvtt, all_text = main_stt_transcript(
    "/test/audio_192k_pod.mp3", # file
    177, # file duration
    stt_model # model stt loaded
)
print(webvtt)
"""


# ##########################################################################
# TRANSCRIPT VIDEO: THREAD TO LAUNCH TRANSCRIPT
# ##########################################################################
def start_transcript(video_id, threaded=True):
    """
    Main function call to start transcript.
    Will launch transcript mode depending on configuration.
    """
    if threaded:
        if CELERY_TO_ENCODE:
            task_start_transcript.delay(video_id)
        else:
            log.info("START TRANSCRIPT VIDEO %s" % video_id)
            t = threading.Thread(target=main_threaded_transcript, args=[video_id])
            t.setDaemon(True)
            t.start()
    else:
        main_threaded_transcript(video_id)


def main_threaded_transcript(video_to_encode_id):
    """
    Main function to transcript.
    Will check all configuration and file and launch transcript.
    """
    change_encoding_step(video_to_encode_id, 5, "transcripting audio")

    video_to_encode = Video.objects.get(id=video_to_encode_id)

    msg = ""
    lang = video_to_encode.transcript
    # check if TRANSCRIPTION_MODEL_PARAM [lang] exist
    if not TRANSCRIPTION_MODEL_PARAM[TRANSCRIPTION_TYPE].get(lang):
        msg += "\n no stt model found for lang:%s." % lang
        msg += "Please add it in TRANSCRIPTION_MODEL_PARAM."
        change_encoding_step(video_to_encode.id, -1, msg)
        send_email(msg, video_to_encode.id)
    else:
        mp3file = (
            video_to_encode.get_video_mp3().source_file
            if video_to_encode.get_video_mp3()
            else None
        )
        if mp3file is None:
            msg += "\n no mp3 file found for video :%s." % video_to_encode.id
            change_encoding_step(video_to_encode.id, -1, msg)
            send_email(msg, video_to_encode.id)
        else:
            mp3filepath = mp3file.path
            if USE_DISTANT_ENCODING_TRANSCODING:
                start_transcripting_task.delay(
                    video_to_encode.id,
                    mp3filepath,
                    video_to_encode.duration,
                    lang
                )
            else:
                msg, webvtt = start_transcripting(
                    mp3filepath,
                    video_to_encode.duration,
                    lang
                )
                save_vtt_and_notify(video_to_encode, msg, webvtt)
    add_encoding_log(video_to_encode.id, msg)


def save_vtt_and_notify(video_to_encode, msg, webvtt):
    """Call save vtt file function and notify by mail at the end."""
    msg += saveVTT(video_to_encode, webvtt)
    change_encoding_step(video_to_encode.id, 0, "done")
    # envois mail fin transcription
    if EMAIL_ON_TRANSCRIPTING_COMPLETION:
        send_email_transcript(video_to_encode)
    add_encoding_log(video_to_encode.id, msg)


def saveVTT(video, webvtt):
    """Save webvtt file with the video."""
    msg = "\nSAVE TRANSCRIPT WEBVTT : %s" % time.ctime()
    lang = video.transcript
    temp_vtt_file = NamedTemporaryFile(suffix=".vtt")
    webvtt.save(temp_vtt_file.name)
    if webvtt.captions:
        msg += "\nstore vtt file in bdd with CustomFileModel model file field"
        if __FILEPICKER__:
            videodir, created = UserFolder.objects.get_or_create(
                name="%s" % video.slug, owner=video.owner
            )
            """
            previousSubtitleFile = CustomFileModel.objects.filter(
                name__startswith="subtitle_%s" % lang,
                folder=videodir,
                created_by=video.owner
            )
            """
            # for subt in previousSubtitleFile:
            #     subt.delete()
            subtitleFile, created = CustomFileModel.objects.get_or_create(
                name="subtitle_%s_%s" % (lang, time.strftime("%Y%m%d-%H%M%S")),
                folder=videodir,
                created_by=video.owner,
            )
            if subtitleFile.file and os.path.isfile(subtitleFile.file.path):
                os.remove(subtitleFile.file.path)
        else:
            subtitleFile, created = CustomFileModel.objects.get_or_create()

        subtitleFile.file.save(
            "subtitle_%s_%s.vtt" % (lang, time.strftime("%Y%m%d-%H%M%S")),
            File(temp_vtt_file),
        )
        msg += "\nstore vtt file in bdd with Track model src field"

        subtitleVtt, created = Track.objects.get_or_create(video=video, lang=lang)
        subtitleVtt.src = subtitleFile
        subtitleVtt.lang = lang
        subtitleVtt.save()
    else:
        msg += "\nERROR SUBTITLES Output size is 0"
    return msg

1	from django.conf import settings	×
2	from django.core.files import File	×
3	from pod.completion.models import Track	×
4	from pod.main.tasks import task_start_transcript	×
5
6	from .utils import (	×
7	send_email,
8	send_email_transcript,
9	change_encoding_step,
10	add_encoding_log,
11	)
12	from ..video.models import Video	×
13	import importlib.util	×
14	if (	×
15	importlib.util.find_spec("vosk") is not None
16	or importlib.util.find_spec("stt") is not None
17	):
18	from .transcript_model import start_transcripting	×
19
20	import os	×
21	import time	×
22
23	from tempfile import NamedTemporaryFile	×
24
25	import threading	×
26	import logging	×
27
28	DEBUG = getattr(settings, "DEBUG", False)	×
29
30	if getattr(settings, "USE_PODFILE", False):	×
31	__FILEPICKER__ = True	×
32	from pod.podfile.models import CustomFileModel	×
33	from pod.podfile.models import UserFolder	×
34	else:
35	__FILEPICKER__ = False	×
36	from pod.main.models import CustomFileModel	×
37
38	EMAIL_ON_TRANSCRIPTING_COMPLETION = getattr(	×
39	settings, "EMAIL_ON_TRANSCRIPTING_COMPLETION", True
40	)
41	TRANSCRIPTION_MODEL_PARAM = getattr(settings, "TRANSCRIPTION_MODEL_PARAM", False)	×
42	USE_TRANSCRIPTION = getattr(settings, "USE_TRANSCRIPTION", False)	×
43	if USE_TRANSCRIPTION:	×
44	TRANSCRIPTION_TYPE = getattr(settings, "TRANSCRIPTION_TYPE", "VOSK")	×
45	TRANSCRIPTION_NORMALIZE = getattr(settings, "TRANSCRIPTION_NORMALIZE", False)	×
46	CELERY_TO_ENCODE = getattr(settings, "CELERY_TO_ENCODE", False)	×
47
48	USE_DISTANT_ENCODING_TRANSCODING = getattr(	×
49	settings,
50	"USE_DISTANT_ENCODING_TRANSCODING",
51	False
52	)
53	if USE_DISTANT_ENCODING_TRANSCODING:	×
54	from .transcripting_tasks import start_transcripting_task	×
55
56	log = logging.getLogger(__name__)	×
57
58	"""
59	TO TEST IN THE SHELL -->
60	from pod.video.transcript import *
61	stt_model = get_model("fr")
62	msg, webvtt, all_text = main_stt_transcript(
63	"/test/audio_192k_pod.mp3", # file
64	177, # file duration
65	stt_model # model stt loaded
66	)
67	print(webvtt)
68	"""
69
70
71	# ##########################################################################
72	# TRANSCRIPT VIDEO: THREAD TO LAUNCH TRANSCRIPT
73	# ##########################################################################
74	def start_transcript(video_id, threaded=True):	×
75	"""
76	Main function call to start transcript.
77	Will launch transcript mode depending on configuration.
78	"""
79	if threaded:	×
80	if CELERY_TO_ENCODE:	×
81	task_start_transcript.delay(video_id)	×
82	else:
83	log.info("START TRANSCRIPT VIDEO %s" % video_id)	×
84	t = threading.Thread(target=main_threaded_transcript, args=[video_id])	×
85	t.setDaemon(True)	×
86	t.start()	×
87	else:
88	main_threaded_transcript(video_id)	×
89
90
91	def main_threaded_transcript(video_to_encode_id):	×
92	"""
93	Main function to transcript.
94	Will check all configuration and file and launch transcript.
95	"""
96	change_encoding_step(video_to_encode_id, 5, "transcripting audio")	×
97
98	video_to_encode = Video.objects.get(id=video_to_encode_id)	×
99
100	msg = ""	×
101	lang = video_to_encode.transcript	×
102	# check if TRANSCRIPTION_MODEL_PARAM [lang] exist
103	if not TRANSCRIPTION_MODEL_PARAM[TRANSCRIPTION_TYPE].get(lang):	×
104	msg += "\n no stt model found for lang:%s." % lang	×
105	msg += "Please add it in TRANSCRIPTION_MODEL_PARAM."	×
106	change_encoding_step(video_to_encode.id, -1, msg)	×
107	send_email(msg, video_to_encode.id)	×
108	else:
109	mp3file = (	×
110	video_to_encode.get_video_mp3().source_file
111	if video_to_encode.get_video_mp3()
112	else None
113	)
114	if mp3file is None:	×
115	msg += "\n no mp3 file found for video :%s." % video_to_encode.id	×
116	change_encoding_step(video_to_encode.id, -1, msg)	×
117	send_email(msg, video_to_encode.id)	×
118	else:
119	mp3filepath = mp3file.path	×
120	if USE_DISTANT_ENCODING_TRANSCODING:	×
121	start_transcripting_task.delay(	×
122	video_to_encode.id,
123	mp3filepath,
124	video_to_encode.duration,
125	lang
126	)
127	else:
128	msg, webvtt = start_transcripting(	×
129	mp3filepath,
130	video_to_encode.duration,
131	lang
132	)
133	save_vtt_and_notify(video_to_encode, msg, webvtt)	×
134	add_encoding_log(video_to_encode.id, msg)	×
135
136
137	def save_vtt_and_notify(video_to_encode, msg, webvtt):	×
138	"""Call save vtt file function and notify by mail at the end."""
139	msg += saveVTT(video_to_encode, webvtt)	×
140	change_encoding_step(video_to_encode.id, 0, "done")	×
141	# envois mail fin transcription
142	if EMAIL_ON_TRANSCRIPTING_COMPLETION:	×
143	send_email_transcript(video_to_encode)	×
144	add_encoding_log(video_to_encode.id, msg)	×
145
146
147	def saveVTT(video, webvtt):	×
148	"""Save webvtt file with the video."""
149	msg = "\nSAVE TRANSCRIPT WEBVTT : %s" % time.ctime()	×
150	lang = video.transcript	×
151	temp_vtt_file = NamedTemporaryFile(suffix=".vtt")	×
152	webvtt.save(temp_vtt_file.name)	×
153	if webvtt.captions:	×
154	msg += "\nstore vtt file in bdd with CustomFileModel model file field"	×
155	if __FILEPICKER__:	×
156	videodir, created = UserFolder.objects.get_or_create(	×
157	name="%s" % video.slug, owner=video.owner
158	)
159	"""
160	previousSubtitleFile = CustomFileModel.objects.filter(
161	name__startswith="subtitle_%s" % lang,
162	folder=videodir,
163	created_by=video.owner
164	)
165	"""
166	# for subt in previousSubtitleFile:
167	# subt.delete()
168	subtitleFile, created = CustomFileModel.objects.get_or_create(	×
169	name="subtitle_%s_%s" % (lang, time.strftime("%Y%m%d-%H%M%S")),
170	folder=videodir,
171	created_by=video.owner,
172	)
173	if subtitleFile.file and os.path.isfile(subtitleFile.file.path):	×
174	os.remove(subtitleFile.file.path)	×
175	else:
176	subtitleFile, created = CustomFileModel.objects.get_or_create()	×
177
178	subtitleFile.file.save(	×
179	"subtitle_%s_%s.vtt" % (lang, time.strftime("%Y%m%d-%H%M%S")),
180	File(temp_vtt_file),
181	)
182	msg += "\nstore vtt file in bdd with Track model src field"	×
183
184	subtitleVtt, created = Track.objects.get_or_create(video=video, lang=lang)	×
185	subtitleVtt.src = subtitleFile	×
186	subtitleVtt.lang = lang	×
187	subtitleVtt.save()	×
188	else:
189	msg += "\nERROR SUBTITLES Output size is 0"	×
190	return msg	×

EsupPortail / Esup-Pod / 5462629459

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous