Commit dda3b840 authored by 翟艳秋(20软)'s avatar 翟艳秋(20软)

1. 修改判定字幕的算法; 2.在界面中添加停止按钮,用于终止当前任务,重新开始新的任务; 3.将检测旁白的进度条优化为真进度条;

parent 742031dc
...@@ -73,7 +73,7 @@ def concat_wav(root): ...@@ -73,7 +73,7 @@ def concat_wav(root):
return output_file return output_file
def detect_with_asr(video_path, book_path, start_time=0, end_time=-1): def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None):
# 临时存储各种中间产物的文件夹 # 临时存储各种中间产物的文件夹
tmp_root = './tmp' tmp_root = './tmp'
if not os.path.exists(tmp_root): if not os.path.exists(tmp_root):
...@@ -102,9 +102,9 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1): ...@@ -102,9 +102,9 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1):
sys.path.append("./PaddlePaddle_DeepSpeech2") sys.path.append("./PaddlePaddle_DeepSpeech2")
from infer_path import predict_long_audio_with_paddle from infer_path import predict_long_audio_with_paddle
table_content = predict_long_audio_with_paddle(audio_path, book_name_xlsx, start_time) table_content = predict_long_audio_with_paddle(audio_path, start_time, state)
write_to_sheet(book_name_xlsx, sheet_name_xlsx, table_content) write_to_sheet(book_name_xlsx, sheet_name_xlsx, table_content)
state[0] = 1
# 删除中间文件 # 删除中间文件
# shutil.rmtree(tmp_root) # shutil.rmtree(tmp_root)
......
...@@ -14,7 +14,7 @@ up_b, down_b = 0, 0 ...@@ -14,7 +14,7 @@ up_b, down_b = 0, 0
ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False) ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
def get_position(video_path,start_time): def get_position(video_path, start_time):
""" """
根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别 根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
:param video_path: 视频存储路径 :param video_path: 视频存储路径
...@@ -130,7 +130,7 @@ def detect_subtitle(img): ...@@ -130,7 +130,7 @@ def detect_subtitle(img):
return None return None
def process_video(video_path, begin, end): def process_video(video_path, begin, end, state):
""" """
处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务 处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
:param video_path: 待处理视频的路径 :param video_path: 待处理视频的路径
...@@ -156,6 +156,7 @@ def process_video(video_path, begin, end): ...@@ -156,6 +156,7 @@ def process_video(video_path, begin, end):
cnt += 1 cnt += 1
# 每秒取4帧画面左右 # 每秒取4帧画面左右
if cnt % int(fps / 4) == 0: if cnt % int(fps / 4) == 0:
state[0] = float(cnt / video.get(cv2.CAP_PROP_FRAME_COUNT)) if state[0] < 0.99 else 0.99
subTitle = detect_subtitle(frame) subTitle = detect_subtitle(frame)
# 第一次找到字幕 # 第一次找到字幕
if lastSubTitle is None and subTitle is not None: if lastSubTitle is None and subTitle is not None:
...@@ -215,7 +216,7 @@ def write_excel_xlsx(path, sheet_name, value): ...@@ -215,7 +216,7 @@ def write_excel_xlsx(path, sheet_name, value):
workbook.save(path) workbook.save(path)
def detect_with_ocr(video_path, book_path, start_time, end_time): def detect_with_ocr(video_path, book_path, start_time, end_time, state):
book_name_xlsx = book_path book_name_xlsx = book_path
sheet_name_xlsx = "旁白插入位置建议" sheet_name_xlsx = "旁白插入位置建议"
...@@ -225,10 +226,11 @@ def detect_with_ocr(video_path, book_path, start_time, end_time): ...@@ -225,10 +226,11 @@ def detect_with_ocr(video_path, book_path, start_time, end_time):
# 获取并构建输出信息 # 获取并构建输出信息
table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']] table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']]
table_content = table_head + process_video(video_path, start_time, end_time) table_content = table_head + process_video(video_path, start_time, end_time, state)
# 输出旁白位置推荐信息到表格 # 输出旁白位置推荐信息到表格
write_excel_xlsx(book_name_xlsx, sheet_name_xlsx, table_content) write_excel_xlsx(book_name_xlsx, sheet_name_xlsx, table_content)
state[0] = 1
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -68,7 +68,6 @@ def detect_movie(video_path, start, interval): ...@@ -68,7 +68,6 @@ def detect_movie(video_path, start, interval):
interval = interval * fps interval = interval * fps
random_number = 50 random_number = 50
ans = [False] * 3 ans = [False] * 3
print(ans)
for i in range(3): for i in range(3):
random_list = random_int_list(start, start + interval, random_number) random_list = random_int_list(start, start + interval, random_number)
start = start + interval start = start + interval
...@@ -82,15 +81,20 @@ def detect_movie(video_path, start, interval): ...@@ -82,15 +81,20 @@ def detect_movie(video_path, start, interval):
if ans[i]: if ans[i]:
print(random_point) print(random_point)
break break
if not ans[i]:
print('{}-{}时间段内未检测到字幕'.format(start, start + interval))
if i == 1 and Counter(ans).most_common(1)[0][0] is False:
break
video.release() video.release()
print(ans) print(ans)
return Counter(ans).most_common(1)[0][0] return Counter(ans).most_common(1)[0][0]
if __name__ == '__main__': if __name__ == '__main__':
video_path = r'D:\heelo\hysxm.mp4' pass
start_time = time.time() # video_path = r'D:\heelo\hysxm.mp4'
start = 90 # start_time = time.time()
interval = 120 # start = 90
print(detect_movie(video_path, start, interval)) # interval = 120
print(time.time() - start_time) # print(detect_movie(video_path, start, interval))
# print(time.time() - start_time)
...@@ -18,7 +18,7 @@ def trans_to_seconds(timepoint): ...@@ -18,7 +18,7 @@ def trans_to_seconds(timepoint):
return time_in_seconds return time_in_seconds
def detect(video_path, start_time, end_time, book_path): def detect(video_path, start_time, end_time, book_path, state):
print("开始检测") print("开始检测")
if book_path is None: if book_path is None:
book_path = os.path.basename(video_path).split('.')[0] + ".xlsx" book_path = os.path.basename(video_path).split('.')[0] + ".xlsx"
...@@ -30,9 +30,9 @@ def detect(video_path, start_time, end_time, book_path): ...@@ -30,9 +30,9 @@ def detect(video_path, start_time, end_time, book_path):
has_subtitle = detect_movie(video_path, start_time, 60) has_subtitle = detect_movie(video_path, start_time, 60)
if has_subtitle: if has_subtitle:
detect_with_ocr(video_path, book_path, start_time, end_time) detect_with_ocr(video_path, book_path, start_time, end_time, state)
else: else:
detect_with_asr(video_path, book_path, start_time, end_time) detect_with_asr(video_path, book_path, start_time, end_time, state)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -160,6 +160,7 @@ def ss_and_export(sheet_path, output_dir, speed, caption_file, state): ...@@ -160,6 +160,7 @@ def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
for i, text in enumerate(narratages): for i, text in enumerate(narratages):
wav_path = os.path.join(root_path, '%.2f.wav' % start_timepoint[i]) wav_path = os.path.join(root_path, '%.2f.wav' % start_timepoint[i])
speech_synthesis(text, wav_path, speed) speech_synthesis(text, wav_path, speed)
time.sleep(1)
print("目前正在处理{}".format(wav_path)) print("目前正在处理{}".format(wav_path))
state[0] = float((i + 1) / len(narratages)) state[0] = float((i + 1) / len(narratages))
......
# -*- coding:utf-8 -*- # -*- coding:utf-8 -*-
import threading import threading
from mttkinter import mtTkinter as tk from mttkinter import mtTkinter as tk
from tkinter import filedialog, ttk, messagebox, Frame, Canvas from tkinter import filedialog, ttk, messagebox
import os import os
import time
import ffmpeg import ffmpeg
from speech_synthesis import ss_and_export from speech_synthesis import ss_and_export
import ctypes
import inspect
window = tk.Tk() window = tk.Tk()
window.title('无障碍电影辅助工具') # 标题 window.title('无障碍电影辅助工具') # 标题
...@@ -24,6 +25,7 @@ def open_video_file(): ...@@ -24,6 +25,7 @@ def open_video_file():
# 获取视频的时长等信息,初始化开始结束时间 # 获取视频的时长等信息,初始化开始结束时间
info = ffmpeg.probe(video_path) info = ffmpeg.probe(video_path)
vs = next(c for c in info['streams'] if c['codec_type'] == 'video') vs = next(c for c in info['streams'] if c['codec_type'] == 'video')
print(vs)
try: try:
duration = int(float(vs['duration'])) duration = int(float(vs['duration']))
hours = int(duration / 3600) hours = int(duration / 3600)
...@@ -31,7 +33,10 @@ def open_video_file(): ...@@ -31,7 +33,10 @@ def open_video_file():
seconds = int(duration - 60 * minutes - 3600 * hours) seconds = int(duration - 60 * minutes - 3600 * hours)
endTime.set("%02d:%02d:%02d" % (hours, minutes, seconds)) endTime.set("%02d:%02d:%02d" % (hours, minutes, seconds))
except: except:
endTime.set(vs['tags']['DURATION']) for k in vs['tags'].keys():
k_l = str.lower(k)
if 'duration' in k_l:
endTime.set(vs['tags'][k])
if len(video_path) != 0 and not is_video(video_path): if len(video_path) != 0 and not is_video(video_path):
messagebox.showwarning('警告', "请选择正确的视频格式,能够处理的视频格式如下所示:\n'.mkv', '.rmvb', '.mp4', '.avi'") messagebox.showwarning('警告', "请选择正确的视频格式,能够处理的视频格式如下所示:\n'.mkv', '.rmvb', '.mp4', '.avi'")
...@@ -89,15 +94,13 @@ def start_process(p, p_label, state, intervals=100): ...@@ -89,15 +94,13 @@ def start_process(p, p_label, state, intervals=100):
""" """
print("进度条开始滚动") print("进度条开始滚动")
p.start(interval=int(intervals)) p.start(interval=int(intervals))
laststate = state[0] lastState = state[0]
while True: while True:
if state[0] and state[0] != laststate: # 当前进度不为None且与上一进度不一样且当前进度比进度条的状态要多时,对进度条状态进行更新
if state[0] and state[0] != lastState and state[0] * 100 > p['value']:
p['value'] = int(state[0] * 100) p['value'] = int(state[0] * 100)
lastState = state[0]
p_label['text'] = str(int(p['value'])) + "%" p_label['text'] = str(int(p['value'])) + "%"
if p['value'] == 99:
p.stop()
p['value'] = 99
break
if p['value'] == 100: if p['value'] == 100:
p.stop() p.stop()
p['value'] = 100 p['value'] = 100
...@@ -124,26 +127,50 @@ def start_detect(): ...@@ -124,26 +127,50 @@ def start_detect():
messagebox.showwarning("警告", "请输入表格存放路径") messagebox.showwarning("警告", "请输入表格存放路径")
return return
# 开始检测后,将“开始检测”按钮设置为不可点击状态,“停止检测”按钮设置为可点击状态
startDetection.config(state=tk.DISABLED)
stopDetection.config(state=tk.ACTIVE)
processState.set("正在启动中……") processState.set("正在启动中……")
from narratage_detection import detect from narratage_detection import detect
# 显示进度条及开始检测 # 显示进度条及开始检测
progressbar_1.grid(column=2, row=1) progressbar_1.grid(column=2, row=1, sticky="W")
progress_1.grid(column=3, row=1) progress_1.grid(column=3, row=1)
processState.set("开始检测") processState.set("开始检测")
intervals = trans_to_seconds(endTime.get()) intervals = trans_to_seconds(endTime.get())
# 多线程同步进行检测和进度条更新 # 多线程同步进行检测和进度条更新
threads = [threading.Thread(target=start_process, args=(progressbar_1, progress_1, None, intervals * 5)), state = [None]
threading.Thread(target=detect, args=(video_path, startTime.get(), endTime.get(), book_path))] threads = [
threading.Thread(target=start_process, args=(progressbar_1, progress_1, state, 100000), name="startProgress1"),
threading.Thread(target=detect, args=(video_path, startTime.get(), endTime.get(), book_path, state),
name="detect")]
for t in threads: for t in threads:
t.start() t.start()
# 线程完成任务后结束线程 # 线程完成任务后结束线程
for t in threads: for t in threads:
t.join() t.join()
print("线程{}已结束".format(t.name))
# 将进度条的进度拉满到100%,并给出“任务已完成”的提示 # 将进度条的进度拉满到100%,并给出“任务已完成”的提示
progressbar_1['value'] = 100 progressbar_1['value'] = 100
progress_1['text'] = '100%' progress_1['text'] = '100%'
processState.set("任务已完成") processState.set("任务已完成")
# 检测完成后,将“停止检测”按钮设置为不可点击状态,”开始检测“按钮设置为可点击状态
stopDetection.config(state=tk.DISABLED)
startDetection.config(state=tk.ACTIVE)
def stop_detect():
for x in threading.enumerate():
if x.getName() in ["startDetect", "startProgress1", "detect"]:
_async_raise(x.ident, SystemExit)
# 设置检测状态为”已停止“,”停止检测“按钮为不可点击状态,”开始检测“按钮为可点击状态,检测进度条初始化为0,并隐藏
processState.set("已停止")
stopDetection.config(state=tk.DISABLED)
startDetection.config(state=tk.ACTIVE)
progressbar_1.stop()
progressbar_1['value'] = 0
progress_1['text'] = "0%"
progressbar_1.grid_forget()
progress_1.grid_forget()
def open_sheet_file(): def open_sheet_file():
...@@ -206,31 +233,69 @@ def start_synthesis(): ...@@ -206,31 +233,69 @@ def start_synthesis():
messagebox.showwarning("警告", "当前输入的表格不存在,请检查一遍。") messagebox.showwarning("警告", "当前输入的表格不存在,请检查一遍。")
# 显示进度条、进度条百分比及任务状态提示文本 # 显示进度条、进度条百分比及任务状态提示文本
startSynthesis.config(state=tk.DISABLED)
stopSynthesis.config(state=tk.ACTIVE)
progressbar_2.grid(column=2, row=2) progressbar_2.grid(column=2, row=2)
progress_2.grid(column=3, row=2) progress_2.grid(column=3, row=2)
processState_2.set("开始生成音频及字幕") processState_2.set("开始生成音频及字幕")
# 多线程同时实现语音合成+字幕导出、进度条 # 多线程同时实现语音合成+字幕导出、进度条
state = [None] state = [None]
threads = [threading.Thread(target=start_process, args=(progressbar_2, progress_2, state)), threads = [threading.Thread(target=start_process, args=(progressbar_2, progress_2, state), name="startProgress2"),
threading.Thread(target=ss_and_export, threading.Thread(target=ss_and_export,
args=(sheet_path, audio_dir, speed, caption_path, state))] args=(sheet_path, audio_dir, speed, caption_path, state), name="ssAndExport")]
for t in threads: for t in threads:
t.start() t.start()
for t in threads: for t in threads:
t.join() t.join()
processState_2.set("语音和字幕已导出完毕") processState_2.set("语音和字幕已导出完毕")
startSynthesis.config(state=tk.ACTIVE)
stopSynthesis.config(state=tk.DISABLED)
def thread_it(func, *args):
def stop_synthesis():
print(threading.enumerate())
for x in threading.enumerate():
if x.getName() in ["startSynthesis", "startProgress2", "ssAndExport"]:
_async_raise(x.ident, SystemExit)
# 设置检测状态为”已停止“,”停止检测“按钮为不可点击状态,”开始检测“按钮为可点击状态,检测进度条初始化为0,并隐藏
processState_2.set("已停止")
stopSynthesis.config(state=tk.DISABLED)
startSynthesis.config(state=tk.ACTIVE)
progressbar_2.stop()
progressbar_2['value'] = 0
progress_2['text'] = "0%"
progressbar_2.grid_forget()
progress_2.grid_forget()
def thread_it(func, *args, name):
# 创建线程 # 创建线程
t = threading.Thread(target=func, args=args) t = threading.Thread(target=func, args=args, name=name)
# 守护 # 守护
t.setDaemon(True) t.setDaemon(True)
# 启动 # 启动
t.start() t.start()
def _async_raise(tid, exctype):
"""
终结线程
:param tid: 线程id
:param exctype: 关闭方式
:return:
"""
tid = ctypes.c_long(tid)
if not inspect.isclass(exctype):
exctype = type(exctype)
res = ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, ctypes.py_object(exctype))
if res == 0:
raise ValueError("invalid thread id")
elif res != 1:
ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, None)
raise SystemError("PyThreadState_SetAsyncExc failed")
def _quit(): def _quit():
window.quit() window.quit()
window.destroy() window.destroy()
...@@ -256,7 +321,6 @@ tabControl.pack(expand=1, fill="both") ...@@ -256,7 +321,6 @@ tabControl.pack(expand=1, fill="both")
- 视频实际结束时间|文本框 - 视频实际结束时间|文本框
""" """
video_info = ttk.LabelFrame(tab1, text=" 视频信息操作 ") video_info = ttk.LabelFrame(tab1, text=" 视频信息操作 ")
# video_info.grid(column=0, row=0, padx=8, pady=4)
video_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4) video_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4)
input_label = ttk.Label(video_info, text="视频文件") input_label = ttk.Label(video_info, text="视频文件")
...@@ -283,6 +347,7 @@ endTime_entered.grid(column=1, row=2, sticky="W") ...@@ -283,6 +347,7 @@ endTime_entered.grid(column=1, row=2, sticky="W")
检测步骤相关内容,包含以下内容: 检测步骤相关内容,包含以下内容:
- 输出表格路径|输出表格路径文本框|打开文件夹 - 输出表格路径|输出表格路径文本框|打开文件夹
- 开始检测按钮|当前检测状态提示文本|任务进度条|进度条百分比 - 开始检测按钮|当前检测状态提示文本|任务进度条|进度条百分比
- 停止检测按钮
""" """
detect_command = ttk.LabelFrame(tab1, text=" 检测步骤 ") detect_command = ttk.LabelFrame(tab1, text=" 检测步骤 ")
detect_command.place(relx=0.05, rely=0.5, relwidth=0.9, relheight=0.4) detect_command.place(relx=0.05, rely=0.5, relwidth=0.9, relheight=0.4)
...@@ -295,19 +360,24 @@ outputFile.grid(column=1, row=0) ...@@ -295,19 +360,24 @@ outputFile.grid(column=1, row=0)
save_button = ttk.Button(detect_command, text="打开文件夹", command=find_save_file) save_button = ttk.Button(detect_command, text="打开文件夹", command=find_save_file)
save_button.grid(column=2, row=0) save_button.grid(column=2, row=0)
startDetection = ttk.Button(detect_command, text="开始检测", command=lambda: thread_it(start_detect)) startDetection = ttk.Button(detect_command, text="开始检测", command=lambda: thread_it(start_detect, name="startDetect"))
startDetection.grid(column=0, row=1) startDetection.grid(column=0, row=1)
processState = tk.StringVar() processState = tk.StringVar()
stateLabel = tk.Label(detect_command, textvariable=processState, fg="green") stateLabel = tk.Label(detect_command, textvariable=processState, fg="green")
stateLabel.grid(column=1, row=1, sticky="W") stateLabel.grid(column=1, row=1, sticky="W")
progressbar_1 = ttk.Progressbar(detect_command, length=100, mode="determinate") progressbar_1 = ttk.Progressbar(detect_command, length=80, mode="determinate")
progress_1 = tk.Label(detect_command, text="0%") progress_1 = tk.Label(detect_command, text="0%")
stopDetection = ttk.Button(detect_command, text="停止检测", command=lambda: thread_it(stop_detect, name="stopDetect"))
stopDetection.grid(column=0, row=2)
stopDetection.config(state=tk.DISABLED)
""" """
为旁白语音合成添加部件 为旁白语音合成添加部件
""" """
""" """
语音相关设置,包含以下内容:
- 旁白脚本表格|表格路径|上传文件按钮
""" """
audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ") audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ")
audio_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4) audio_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4)
...@@ -347,13 +417,18 @@ audioDir_input.grid(column=1, row=0) ...@@ -347,13 +417,18 @@ audioDir_input.grid(column=1, row=0)
save_button_2 = ttk.Button(synthesis_command, text="打开文件夹", command=find_save_dir) save_button_2 = ttk.Button(synthesis_command, text="打开文件夹", command=find_save_dir)
save_button_2.grid(column=2, row=0) save_button_2.grid(column=2, row=0)
startSynthesis = ttk.Button(synthesis_command, text="开始合成", command=lambda: thread_it(start_synthesis)) startSynthesis = ttk.Button(synthesis_command, text="开始合成",
command=lambda: thread_it(start_synthesis, name="startSynthesis"))
startSynthesis.grid(column=0, row=2) startSynthesis.grid(column=0, row=2)
processState_2 = tk.StringVar() processState_2 = tk.StringVar()
stateLabel_2 = tk.Label(synthesis_command, textvariable=processState_2, fg="green") stateLabel_2 = tk.Label(synthesis_command, textvariable=processState_2, fg="green")
stateLabel_2.grid(column=1, row=2, sticky="W") stateLabel_2.grid(column=1, row=2, sticky="W")
progressbar_2 = ttk.Progressbar(synthesis_command, length=100, mode="determinate") progressbar_2 = ttk.Progressbar(synthesis_command, length=100, mode="determinate")
progress_2 = tk.Label(synthesis_command, text="0%") progress_2 = tk.Label(synthesis_command, text="0%")
stopSynthesis = ttk.Button(synthesis_command, text="停止合成",
command=lambda: thread_it(stop_synthesis, name="stopSynthesis"))
stopSynthesis.grid(column=0, row=3)
stopSynthesis.config(state=tk.DISABLED)
# 刷新显示 # 刷新显示
window.mainloop() window.mainloop()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment