1. 修改判定字幕的算法; 2.在界面中添加停止按钮，用于终止当前任务，重新开始新的任务; 3.将检测旁白的进度条优化为真进度条;

dda3b840 · 翟艳秋（20软） · 742031dc · dda3b840 · dda3b840 · dda3b840
Commit dda3b840 authored Jan 18, 2022 by 翟艳秋（20软）
6 changed files
--- a/detect_with_asr.py
+++ b/detect_with_asr.py
@@ -73,7 +73,7 @@ def concat_wav(root):
    return output_file
-def detect_with_asr(video_path, book_path, start_time=0, end_time=-1):
+def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None):
    # 临时存储各种中间产物的文件夹
    tmp_root = './tmp'
    if not os.path.exists(tmp_root):
@@ -102,9 +102,9 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1):
    sys.path.append("./PaddlePaddle_DeepSpeech2")
    from infer_path import predict_long_audio_with_paddle
-    table_content = predict_long_audio_with_paddle(audio_path, book_name_xlsx, start_time)
+    table_content = predict_long_audio_with_paddle(audio_path, start_time, state)
    write_to_sheet(book_name_xlsx, sheet_name_xlsx, table_content)
+    state[0] = 1
    # 删除中间文件
    # shutil.rmtree(tmp_root)

--- a/detect_with_ocr.py
+++ b/detect_with_ocr.py
@@ -14,7 +14,7 @@ up_b, down_b = 0, 0
 ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
-def get_position(video_path,start_time):
+def get_position(video_path, start_time):
    """
    根据对视频中的画面进行分析，确定字幕的位置，以便后续的字幕识别
    :param video_path: 视频存储路径
@@ -130,7 +130,7 @@ def detect_subtitle(img):
    return None
-def process_video(video_path, begin, end):
+def process_video(video_path, begin, end, state):
    """
    处理视频，主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
    :param video_path: 待处理视频的路径
@@ -156,6 +156,7 @@ def process_video(video_path, begin, end):
        cnt += 1
        # 每秒取4帧画面左右
        if cnt % int(fps / 4) == 0:
+            state[0] = float(cnt / video.get(cv2.CAP_PROP_FRAME_COUNT)) if state[0] < 0.99 else 0.99
            subTitle = detect_subtitle(frame)
            # 第一次找到字幕
            if lastSubTitle is None and subTitle is not None:
@@ -215,7 +216,7 @@ def write_excel_xlsx(path, sheet_name, value):
    workbook.save(path)
-def detect_with_ocr(video_path, book_path, start_time, end_time):
+def detect_with_ocr(video_path, book_path, start_time, end_time, state):
    book_name_xlsx = book_path
    sheet_name_xlsx = "旁白插入位置建议"
@@ -225,10 +226,11 @@ def detect_with_ocr(video_path, book_path, start_time, end_time):
    # 获取并构建输出信息
    table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']]
-    table_content = table_head + process_video(video_path, start_time, end_time)
+    table_content = table_head + process_video(video_path, start_time, end_time, state)
    # 输出旁白位置推荐信息到表格
    write_excel_xlsx(book_name_xlsx, sheet_name_xlsx, table_content)
+    state[0] = 1
 if __name__ == '__main__':

--- a/judge_subtitle.py
+++ b/judge_subtitle.py
@@ -68,7 +68,6 @@ def detect_movie(video_path, start, interval):
    interval = interval * fps
    random_number = 50
    ans = [False] * 3
-    print(ans)
    for i in range(3):
        random_list = random_int_list(start, start + interval, random_number)
        start = start + interval
@@ -82,15 +81,20 @@ def detect_movie(video_path, start, interval):
            if ans[i]:
                print(random_point)
                break
+        if not ans[i]:
+            print('{}-{}时间段内未检测到字幕'.format(start, start + interval))
+        if i == 1 and Counter(ans).most_common(1)[0][0] is False:
+            break
    video.release()
    print(ans)
    return Counter(ans).most_common(1)[0][0]
 if __name__ == '__main__':
-    video_path = r'D:\heelo\hysxm.mp4'
+    pass
-    start_time = time.time()
+    # video_path = r'D:\heelo\hysxm.mp4'
-    start = 90
+    # start_time = time.time()
-    interval = 120
+    # start = 90
-    print(detect_movie(video_path, start, interval))
+    # interval = 120
-    print(time.time() - start_time)
+    # print(detect_movie(video_path, start, interval))
+    # print(time.time() - start_time)
--- a/narratage_detection.py
+++ b/narratage_detection.py
@@ -18,7 +18,7 @@ def trans_to_seconds(timepoint):
    return time_in_seconds
-def detect(video_path, start_time, end_time, book_path):
+def detect(video_path, start_time, end_time, book_path, state):
    print("开始检测")
    if book_path is None:
        book_path = os.path.basename(video_path).split('.')[0] + ".xlsx"
@@ -30,9 +30,9 @@ def detect(video_path, start_time, end_time, book_path):
    has_subtitle = detect_movie(video_path, start_time, 60)
    if has_subtitle:
-        detect_with_ocr(video_path, book_path, start_time, end_time)
+        detect_with_ocr(video_path, book_path, start_time, end_time, state)
    else:
-        detect_with_asr(video_path, book_path, start_time, end_time)
+        detect_with_asr(video_path, book_path, start_time, end_time, state)
 if __name__ == '__main__':

--- a/speech_synthesis.py
+++ b/speech_synthesis.py
@@ -160,6 +160,7 @@ def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
    for i, text in enumerate(narratages):
        wav_path = os.path.join(root_path, '%.2f.wav' % start_timepoint[i])
        speech_synthesis(text, wav_path, speed)
+        time.sleep(1)
        print("目前正在处理{}".format(wav_path))
        state[0] = float((i + 1) / len(narratages))

--- a/try_with_gui.py
+++ b/try_with_gui.py
 # -*- coding:utf-8 -*-
 import threading
 from mttkinter import mtTkinter as tk
-from tkinter import filedialog, ttk, messagebox, Frame, Canvas
+from tkinter import filedialog, ttk, messagebox
 import os
-import time
 import ffmpeg
 from speech_synthesis import ss_and_export
+import ctypes
+import inspect
 window = tk.Tk()
 window.title('无障碍电影辅助工具')  # 标题
@@ -24,6 +25,7 @@ def open_video_file():
        # 获取视频的时长等信息，初始化开始结束时间
        info = ffmpeg.probe(video_path)
        vs = next(c for c in info['streams'] if c['codec_type'] == 'video')
+        print(vs)
        try:
            duration = int(float(vs['duration']))
            hours = int(duration / 3600)
@@ -31,7 +33,10 @@ def open_video_file():
            seconds = int(duration - 60 * minutes - 3600 * hours)
            endTime.set("%02d:%02d:%02d" % (hours, minutes, seconds))
        except:
-            endTime.set(vs['tags']['DURATION'])
+            for k in vs['tags'].keys():
+                k_l = str.lower(k)
+                if 'duration' in k_l:
+                    endTime.set(vs['tags'][k])
    if len(video_path) != 0 and not is_video(video_path):
        messagebox.showwarning('警告', "请选择正确的视频格式，能够处理的视频格式如下所示：\n'.mkv', '.rmvb', '.mp4', '.avi'")
@@ -89,15 +94,13 @@ def start_process(p, p_label, state, intervals=100):
    """
    print("进度条开始滚动")
    p.start(interval=int(intervals))
-    laststate = state[0]
+    lastState = state[0]
    while True:
-        if state[0] and state[0] != laststate:
+        # 当前进度不为None且与上一进度不一样且当前进度比进度条的状态要多时，对进度条状态进行更新
+        if state[0] and state[0] != lastState and state[0] * 100 > p['value']:
            p['value'] = int(state[0] * 100)
+            lastState = state[0]
        p_label['text'] = str(int(p['value'])) + "%"
-        if p['value'] == 99:
-            p.stop()
-            p['value'] = 99
-            break
        if p['value'] == 100:
            p.stop()
            p['value'] = 100
@@ -124,26 +127,50 @@ def start_detect():
        messagebox.showwarning("警告", "请输入表格存放路径")
        return
+    # 开始检测后，将“开始检测”按钮设置为不可点击状态，“停止检测”按钮设置为可点击状态
+    startDetection.config(state=tk.DISABLED)
+    stopDetection.config(state=tk.ACTIVE)
    processState.set("正在启动中……")
    from narratage_detection import detect
    # 显示进度条及开始检测
-    progressbar_1.grid(column=2, row=1)
+    progressbar_1.grid(column=2, row=1, sticky="W")
    progress_1.grid(column=3, row=1)
    processState.set("开始检测")
    intervals = trans_to_seconds(endTime.get())
    # 多线程同步进行检测和进度条更新
-    threads = [threading.Thread(target=start_process, args=(progressbar_1, progress_1, None, intervals * 5)),
+    state = [None]
-               threading.Thread(target=detect, args=(video_path, startTime.get(), endTime.get(), book_path))]
+    threads = [
+        threading.Thread(target=start_process, args=(progressbar_1, progress_1, state, 100000), name="startProgress1"),
+        threading.Thread(target=detect, args=(video_path, startTime.get(), endTime.get(), book_path, state),
+                         name="detect")]
    for t in threads:
        t.start()
    # 线程完成任务后结束线程
    for t in threads:
        t.join()
-        print("线程{}已结束".format(t.name))
    # 将进度条的进度拉满到100%，并给出“任务已完成”的提示
    progressbar_1['value'] = 100
    progress_1['text'] = '100%'
    processState.set("任务已完成")
+    # 检测完成后，将“停止检测”按钮设置为不可点击状态，”开始检测“按钮设置为可点击状态
+    stopDetection.config(state=tk.DISABLED)
+    startDetection.config(state=tk.ACTIVE)
+def stop_detect():
+    for x in threading.enumerate():
+        if x.getName() in ["startDetect", "startProgress1", "detect"]:
+            _async_raise(x.ident, SystemExit)
+    # 设置检测状态为”已停止“，”停止检测“按钮为不可点击状态，”开始检测“按钮为可点击状态，检测进度条初始化为0，并隐藏
+    processState.set("已停止")
+    stopDetection.config(state=tk.DISABLED)
+    startDetection.config(state=tk.ACTIVE)
+    progressbar_1.stop()
+    progressbar_1['value'] = 0
+    progress_1['text'] = "0%"
+    progressbar_1.grid_forget()
+    progress_1.grid_forget()
 def open_sheet_file():
@@ -206,31 +233,69 @@ def start_synthesis():
        messagebox.showwarning("警告", "当前输入的表格不存在，请检查一遍。")
    # 显示进度条、进度条百分比及任务状态提示文本
+    startSynthesis.config(state=tk.DISABLED)
+    stopSynthesis.config(state=tk.ACTIVE)
    progressbar_2.grid(column=2, row=2)
    progress_2.grid(column=3, row=2)
    processState_2.set("开始生成音频及字幕")
    # 多线程同时实现语音合成+字幕导出、进度条
    state = [None]
-    threads = [threading.Thread(target=start_process, args=(progressbar_2, progress_2, state)),
+    threads = [threading.Thread(target=start_process, args=(progressbar_2, progress_2, state), name="startProgress2"),
               threading.Thread(target=ss_and_export,
-                                args=(sheet_path, audio_dir, speed, caption_path, state))]
+                                args=(sheet_path, audio_dir, speed, caption_path, state), name="ssAndExport")]
    for t in threads:
        t.start()
    for t in threads:
        t.join()
    processState_2.set("语音和字幕已导出完毕")
+    startSynthesis.config(state=tk.ACTIVE)
+    stopSynthesis.config(state=tk.DISABLED)
-def thread_it(func, *args):
+def stop_synthesis():
+    print(threading.enumerate())
+    for x in threading.enumerate():
+        if x.getName() in ["startSynthesis", "startProgress2", "ssAndExport"]:
+            _async_raise(x.ident, SystemExit)
+    # 设置检测状态为”已停止“，”停止检测“按钮为不可点击状态，”开始检测“按钮为可点击状态，检测进度条初始化为0，并隐藏
+    processState_2.set("已停止")
+    stopSynthesis.config(state=tk.DISABLED)
+    startSynthesis.config(state=tk.ACTIVE)
+    progressbar_2.stop()
+    progressbar_2['value'] = 0
+    progress_2['text'] = "0%"
+    progressbar_2.grid_forget()
+    progress_2.grid_forget()
+def thread_it(func, *args, name):
    # 创建线程
-    t = threading.Thread(target=func, args=args)
+    t = threading.Thread(target=func, args=args, name=name)
    # 守护
    t.setDaemon(True)
    # 启动
    t.start()
+def _async_raise(tid, exctype):
+    """
+    终结线程
+    :param tid: 线程id
+    :param exctype: 关闭方式
+    :return:
+    """
+    tid = ctypes.c_long(tid)
+    if not inspect.isclass(exctype):
+        exctype = type(exctype)
+    res = ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, ctypes.py_object(exctype))
+    if res == 0:
+        raise ValueError("invalid thread id")
+    elif res != 1:
+        ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, None)
+        raise SystemError("PyThreadState_SetAsyncExc failed")
 def _quit():
    window.quit()
    window.destroy()
@@ -256,7 +321,6 @@ tabControl.pack(expand=1, fill="both")
    - 视频实际结束时间|文本框
 """
 video_info = ttk.LabelFrame(tab1, text=" 视频信息操作 ")
-# video_info.grid(column=0, row=0, padx=8, pady=4)
 video_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4)
 input_label = ttk.Label(video_info, text="视频文件")
@@ -283,6 +347,7 @@ endTime_entered.grid(column=1, row=2, sticky="W")
    检测步骤相关内容，包含以下内容：
    - 输出表格路径|输出表格路径文本框|打开文件夹
    - 开始检测按钮|当前检测状态提示文本|任务进度条|进度条百分比
+    - 停止检测按钮
 """
 detect_command = ttk.LabelFrame(tab1, text=" 检测步骤 ")
 detect_command.place(relx=0.05, rely=0.5, relwidth=0.9, relheight=0.4)
@@ -295,19 +360,24 @@ outputFile.grid(column=1, row=0)
 save_button = ttk.Button(detect_command, text="打开文件夹", command=find_save_file)
 save_button.grid(column=2, row=0)
-startDetection = ttk.Button(detect_command, text="开始检测", command=lambda: thread_it(start_detect))
+startDetection = ttk.Button(detect_command, text="开始检测", command=lambda: thread_it(start_detect, name="startDetect"))
 startDetection.grid(column=0, row=1)
 processState = tk.StringVar()
 stateLabel = tk.Label(detect_command, textvariable=processState, fg="green")
 stateLabel.grid(column=1, row=1, sticky="W")
-progressbar_1 = ttk.Progressbar(detect_command, length=100, mode="determinate")
+progressbar_1 = ttk.Progressbar(detect_command, length=80, mode="determinate")
 progress_1 = tk.Label(detect_command, text="0%")
+stopDetection = ttk.Button(detect_command, text="停止检测", command=lambda: thread_it(stop_detect, name="stopDetect"))
+stopDetection.grid(column=0, row=2)
+stopDetection.config(state=tk.DISABLED)
 """
    为旁白语音合成添加部件
 """
 """
+    语音相关设置，包含以下内容：
+    - 旁白脚本表格|表格路径|上传文件按钮
 """
 audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ")
 audio_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4)
@@ -347,13 +417,18 @@ audioDir_input.grid(column=1, row=0)
 save_button_2 = ttk.Button(synthesis_command, text="打开文件夹", command=find_save_dir)
 save_button_2.grid(column=2, row=0)
-startSynthesis = ttk.Button(synthesis_command, text="开始合成", command=lambda: thread_it(start_synthesis))
+startSynthesis = ttk.Button(synthesis_command, text="开始合成",
+                            command=lambda: thread_it(start_synthesis, name="startSynthesis"))
 startSynthesis.grid(column=0, row=2)
 processState_2 = tk.StringVar()
 stateLabel_2 = tk.Label(synthesis_command, textvariable=processState_2, fg="green")
 stateLabel_2.grid(column=1, row=2, sticky="W")
 progressbar_2 = ttk.Progressbar(synthesis_command, length=100, mode="determinate")
 progress_2 = tk.Label(synthesis_command, text="0%")
+stopSynthesis = ttk.Button(synthesis_command, text="停止合成",
+                           command=lambda: thread_it(stop_synthesis, name="stopSynthesis"))
+stopSynthesis.grid(column=0, row=3)
+stopSynthesis.config(state=tk.DISABLED)
 # 刷新显示
 window.mainloop()