Commit dda3b840 authored by 翟艳秋(20软)'s avatar 翟艳秋(20软)

1. 修改判定字幕的算法; 2.在界面中添加停止按钮,用于终止当前任务,重新开始新的任务; 3.将检测旁白的进度条优化为真进度条;

parent 742031dc
......@@ -73,7 +73,7 @@ def concat_wav(root):
return output_file
def detect_with_asr(video_path, book_path, start_time=0, end_time=-1):
def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None):
# 临时存储各种中间产物的文件夹
tmp_root = './tmp'
if not os.path.exists(tmp_root):
......@@ -102,9 +102,9 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1):
sys.path.append("./PaddlePaddle_DeepSpeech2")
from infer_path import predict_long_audio_with_paddle
table_content = predict_long_audio_with_paddle(audio_path, book_name_xlsx, start_time)
table_content = predict_long_audio_with_paddle(audio_path, start_time, state)
write_to_sheet(book_name_xlsx, sheet_name_xlsx, table_content)
state[0] = 1
# 删除中间文件
# shutil.rmtree(tmp_root)
......
......@@ -14,7 +14,7 @@ up_b, down_b = 0, 0
ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
def get_position(video_path,start_time):
def get_position(video_path, start_time):
"""
根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
:param video_path: 视频存储路径
......@@ -130,7 +130,7 @@ def detect_subtitle(img):
return None
def process_video(video_path, begin, end):
def process_video(video_path, begin, end, state):
"""
处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
:param video_path: 待处理视频的路径
......@@ -156,6 +156,7 @@ def process_video(video_path, begin, end):
cnt += 1
# 每秒取4帧画面左右
if cnt % int(fps / 4) == 0:
state[0] = float(cnt / video.get(cv2.CAP_PROP_FRAME_COUNT)) if state[0] < 0.99 else 0.99
subTitle = detect_subtitle(frame)
# 第一次找到字幕
if lastSubTitle is None and subTitle is not None:
......@@ -215,7 +216,7 @@ def write_excel_xlsx(path, sheet_name, value):
workbook.save(path)
def detect_with_ocr(video_path, book_path, start_time, end_time):
def detect_with_ocr(video_path, book_path, start_time, end_time, state):
book_name_xlsx = book_path
sheet_name_xlsx = "旁白插入位置建议"
......@@ -225,10 +226,11 @@ def detect_with_ocr(video_path, book_path, start_time, end_time):
# 获取并构建输出信息
table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']]
table_content = table_head + process_video(video_path, start_time, end_time)
table_content = table_head + process_video(video_path, start_time, end_time, state)
# 输出旁白位置推荐信息到表格
write_excel_xlsx(book_name_xlsx, sheet_name_xlsx, table_content)
state[0] = 1
if __name__ == '__main__':
......
......@@ -68,7 +68,6 @@ def detect_movie(video_path, start, interval):
interval = interval * fps
random_number = 50
ans = [False] * 3
print(ans)
for i in range(3):
random_list = random_int_list(start, start + interval, random_number)
start = start + interval
......@@ -82,15 +81,20 @@ def detect_movie(video_path, start, interval):
if ans[i]:
print(random_point)
break
if not ans[i]:
print('{}-{}时间段内未检测到字幕'.format(start, start + interval))
if i == 1 and Counter(ans).most_common(1)[0][0] is False:
break
video.release()
print(ans)
return Counter(ans).most_common(1)[0][0]
if __name__ == '__main__':
video_path = r'D:\heelo\hysxm.mp4'
start_time = time.time()
start = 90
interval = 120
print(detect_movie(video_path, start, interval))
print(time.time() - start_time)
pass
# video_path = r'D:\heelo\hysxm.mp4'
# start_time = time.time()
# start = 90
# interval = 120
# print(detect_movie(video_path, start, interval))
# print(time.time() - start_time)
......@@ -18,7 +18,7 @@ def trans_to_seconds(timepoint):
return time_in_seconds
def detect(video_path, start_time, end_time, book_path):
def detect(video_path, start_time, end_time, book_path, state):
print("开始检测")
if book_path is None:
book_path = os.path.basename(video_path).split('.')[0] + ".xlsx"
......@@ -30,9 +30,9 @@ def detect(video_path, start_time, end_time, book_path):
has_subtitle = detect_movie(video_path, start_time, 60)
if has_subtitle:
detect_with_ocr(video_path, book_path, start_time, end_time)
detect_with_ocr(video_path, book_path, start_time, end_time, state)
else:
detect_with_asr(video_path, book_path, start_time, end_time)
detect_with_asr(video_path, book_path, start_time, end_time, state)
if __name__ == '__main__':
......
......@@ -160,6 +160,7 @@ def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
for i, text in enumerate(narratages):
wav_path = os.path.join(root_path, '%.2f.wav' % start_timepoint[i])
speech_synthesis(text, wav_path, speed)
time.sleep(1)
print("目前正在处理{}".format(wav_path))
state[0] = float((i + 1) / len(narratages))
......
# -*- coding:utf-8 -*-
import threading
from mttkinter import mtTkinter as tk
from tkinter import filedialog, ttk, messagebox, Frame, Canvas
from tkinter import filedialog, ttk, messagebox
import os
import time
import ffmpeg
from speech_synthesis import ss_and_export
import ctypes
import inspect
window = tk.Tk()
window.title('无障碍电影辅助工具') # 标题
......@@ -24,6 +25,7 @@ def open_video_file():
# 获取视频的时长等信息,初始化开始结束时间
info = ffmpeg.probe(video_path)
vs = next(c for c in info['streams'] if c['codec_type'] == 'video')
print(vs)
try:
duration = int(float(vs['duration']))
hours = int(duration / 3600)
......@@ -31,7 +33,10 @@ def open_video_file():
seconds = int(duration - 60 * minutes - 3600 * hours)
endTime.set("%02d:%02d:%02d" % (hours, minutes, seconds))
except:
endTime.set(vs['tags']['DURATION'])
for k in vs['tags'].keys():
k_l = str.lower(k)
if 'duration' in k_l:
endTime.set(vs['tags'][k])
if len(video_path) != 0 and not is_video(video_path):
messagebox.showwarning('警告', "请选择正确的视频格式,能够处理的视频格式如下所示:\n'.mkv', '.rmvb', '.mp4', '.avi'")
......@@ -89,15 +94,13 @@ def start_process(p, p_label, state, intervals=100):
"""
print("进度条开始滚动")
p.start(interval=int(intervals))
laststate = state[0]
lastState = state[0]
while True:
if state[0] and state[0] != laststate:
# 当前进度不为None且与上一进度不一样且当前进度比进度条的状态要多时,对进度条状态进行更新
if state[0] and state[0] != lastState and state[0] * 100 > p['value']:
p['value'] = int(state[0] * 100)
lastState = state[0]
p_label['text'] = str(int(p['value'])) + "%"
if p['value'] == 99:
p.stop()
p['value'] = 99
break
if p['value'] == 100:
p.stop()
p['value'] = 100
......@@ -124,26 +127,50 @@ def start_detect():
messagebox.showwarning("警告", "请输入表格存放路径")
return
# 开始检测后,将“开始检测”按钮设置为不可点击状态,“停止检测”按钮设置为可点击状态
startDetection.config(state=tk.DISABLED)
stopDetection.config(state=tk.ACTIVE)
processState.set("正在启动中……")
from narratage_detection import detect
# 显示进度条及开始检测
progressbar_1.grid(column=2, row=1)
progressbar_1.grid(column=2, row=1, sticky="W")
progress_1.grid(column=3, row=1)
processState.set("开始检测")
intervals = trans_to_seconds(endTime.get())
# 多线程同步进行检测和进度条更新
threads = [threading.Thread(target=start_process, args=(progressbar_1, progress_1, None, intervals * 5)),
threading.Thread(target=detect, args=(video_path, startTime.get(), endTime.get(), book_path))]
state = [None]
threads = [
threading.Thread(target=start_process, args=(progressbar_1, progress_1, state, 100000), name="startProgress1"),
threading.Thread(target=detect, args=(video_path, startTime.get(), endTime.get(), book_path, state),
name="detect")]
for t in threads:
t.start()
# 线程完成任务后结束线程
for t in threads:
t.join()
print("线程{}已结束".format(t.name))
# 将进度条的进度拉满到100%,并给出“任务已完成”的提示
progressbar_1['value'] = 100
progress_1['text'] = '100%'
processState.set("任务已完成")
# 检测完成后,将“停止检测”按钮设置为不可点击状态,”开始检测“按钮设置为可点击状态
stopDetection.config(state=tk.DISABLED)
startDetection.config(state=tk.ACTIVE)
def stop_detect():
for x in threading.enumerate():
if x.getName() in ["startDetect", "startProgress1", "detect"]:
_async_raise(x.ident, SystemExit)
# 设置检测状态为”已停止“,”停止检测“按钮为不可点击状态,”开始检测“按钮为可点击状态,检测进度条初始化为0,并隐藏
processState.set("已停止")
stopDetection.config(state=tk.DISABLED)
startDetection.config(state=tk.ACTIVE)
progressbar_1.stop()
progressbar_1['value'] = 0
progress_1['text'] = "0%"
progressbar_1.grid_forget()
progress_1.grid_forget()
def open_sheet_file():
......@@ -206,31 +233,69 @@ def start_synthesis():
messagebox.showwarning("警告", "当前输入的表格不存在,请检查一遍。")
# 显示进度条、进度条百分比及任务状态提示文本
startSynthesis.config(state=tk.DISABLED)
stopSynthesis.config(state=tk.ACTIVE)
progressbar_2.grid(column=2, row=2)
progress_2.grid(column=3, row=2)
processState_2.set("开始生成音频及字幕")
# 多线程同时实现语音合成+字幕导出、进度条
state = [None]
threads = [threading.Thread(target=start_process, args=(progressbar_2, progress_2, state)),
threads = [threading.Thread(target=start_process, args=(progressbar_2, progress_2, state), name="startProgress2"),
threading.Thread(target=ss_and_export,
args=(sheet_path, audio_dir, speed, caption_path, state))]
args=(sheet_path, audio_dir, speed, caption_path, state), name="ssAndExport")]
for t in threads:
t.start()
for t in threads:
t.join()
processState_2.set("语音和字幕已导出完毕")
def thread_it(func, *args):
startSynthesis.config(state=tk.ACTIVE)
stopSynthesis.config(state=tk.DISABLED)
def stop_synthesis():
print(threading.enumerate())
for x in threading.enumerate():
if x.getName() in ["startSynthesis", "startProgress2", "ssAndExport"]:
_async_raise(x.ident, SystemExit)
# 设置检测状态为”已停止“,”停止检测“按钮为不可点击状态,”开始检测“按钮为可点击状态,检测进度条初始化为0,并隐藏
processState_2.set("已停止")
stopSynthesis.config(state=tk.DISABLED)
startSynthesis.config(state=tk.ACTIVE)
progressbar_2.stop()
progressbar_2['value'] = 0
progress_2['text'] = "0%"
progressbar_2.grid_forget()
progress_2.grid_forget()
def thread_it(func, *args, name):
# 创建线程
t = threading.Thread(target=func, args=args)
t = threading.Thread(target=func, args=args, name=name)
# 守护
t.setDaemon(True)
# 启动
t.start()
def _async_raise(tid, exctype):
"""
终结线程
:param tid: 线程id
:param exctype: 关闭方式
:return:
"""
tid = ctypes.c_long(tid)
if not inspect.isclass(exctype):
exctype = type(exctype)
res = ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, ctypes.py_object(exctype))
if res == 0:
raise ValueError("invalid thread id")
elif res != 1:
ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, None)
raise SystemError("PyThreadState_SetAsyncExc failed")
def _quit():
window.quit()
window.destroy()
......@@ -256,7 +321,6 @@ tabControl.pack(expand=1, fill="both")
- 视频实际结束时间|文本框
"""
video_info = ttk.LabelFrame(tab1, text=" 视频信息操作 ")
# video_info.grid(column=0, row=0, padx=8, pady=4)
video_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4)
input_label = ttk.Label(video_info, text="视频文件")
......@@ -283,6 +347,7 @@ endTime_entered.grid(column=1, row=2, sticky="W")
检测步骤相关内容,包含以下内容:
- 输出表格路径|输出表格路径文本框|打开文件夹
- 开始检测按钮|当前检测状态提示文本|任务进度条|进度条百分比
- 停止检测按钮
"""
detect_command = ttk.LabelFrame(tab1, text=" 检测步骤 ")
detect_command.place(relx=0.05, rely=0.5, relwidth=0.9, relheight=0.4)
......@@ -295,19 +360,24 @@ outputFile.grid(column=1, row=0)
save_button = ttk.Button(detect_command, text="打开文件夹", command=find_save_file)
save_button.grid(column=2, row=0)
startDetection = ttk.Button(detect_command, text="开始检测", command=lambda: thread_it(start_detect))
startDetection = ttk.Button(detect_command, text="开始检测", command=lambda: thread_it(start_detect, name="startDetect"))
startDetection.grid(column=0, row=1)
processState = tk.StringVar()
stateLabel = tk.Label(detect_command, textvariable=processState, fg="green")
stateLabel.grid(column=1, row=1, sticky="W")
progressbar_1 = ttk.Progressbar(detect_command, length=100, mode="determinate")
progressbar_1 = ttk.Progressbar(detect_command, length=80, mode="determinate")
progress_1 = tk.Label(detect_command, text="0%")
stopDetection = ttk.Button(detect_command, text="停止检测", command=lambda: thread_it(stop_detect, name="stopDetect"))
stopDetection.grid(column=0, row=2)
stopDetection.config(state=tk.DISABLED)
"""
为旁白语音合成添加部件
"""
"""
语音相关设置,包含以下内容:
- 旁白脚本表格|表格路径|上传文件按钮
"""
audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ")
audio_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4)
......@@ -347,13 +417,18 @@ audioDir_input.grid(column=1, row=0)
save_button_2 = ttk.Button(synthesis_command, text="打开文件夹", command=find_save_dir)
save_button_2.grid(column=2, row=0)
startSynthesis = ttk.Button(synthesis_command, text="开始合成", command=lambda: thread_it(start_synthesis))
startSynthesis = ttk.Button(synthesis_command, text="开始合成",
command=lambda: thread_it(start_synthesis, name="startSynthesis"))
startSynthesis.grid(column=0, row=2)
processState_2 = tk.StringVar()
stateLabel_2 = tk.Label(synthesis_command, textvariable=processState_2, fg="green")
stateLabel_2.grid(column=1, row=2, sticky="W")
progressbar_2 = ttk.Progressbar(synthesis_command, length=100, mode="determinate")
progress_2 = tk.Label(synthesis_command, text="0%")
stopSynthesis = ttk.Button(synthesis_command, text="停止合成",
command=lambda: thread_it(stop_synthesis, name="stopSynthesis"))
stopSynthesis.grid(column=0, row=3)
stopSynthesis.config(state=tk.DISABLED)
# 刷新显示
window.mainloop()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment