1.[modified]将界面大小固定，并对各组件均设置为左对齐;

2.[modified]修改进度条数字格式，固定为2位小数（自动补零）; 3.[modified]添加对字幕文件存放路径的检查; 4.[modified]将语音合成从异步改为同步，解决语音合成过程中界面卡顿的问题;

1.[modified]将界面大小固定，并对各组件均设置为左对齐;
172eb5d2 · 翟艳秋（20软） · fad7c317 · 172eb5d2 · 172eb5d2 · 172eb5d2
Commit 172eb5d2 authored Feb 18, 2022 by 翟艳秋（20软）
Showing with 67 additions and 29 deletions

.gitignore .gitignore +3 -0

README.md README.md +14 -0

speech_synthesis.py speech_synthesis.py +11 -13

split_wav.py split_wav.py +1 -1

try_with_gui.py try_with_gui.py +38 -15

No files found.
--- a/.gitignore
+++ b/.gitignore
+/dist/
+/build/
+/missing_packages/
--- a/README.md
+++ b/README.md
+### 文件结构
+
+1、界面文件——try_with_gui.py
+
+2、各功能模块：
+
+- judge_subtitle.py：检测视频是否提供字幕
+- detect_with_ocr.py：在有字幕前提下使用ocr获取视频中的字幕
+- detect_with_asr.py：在无字幕前提下使用asr获取视频中的字幕
+- narratage_detection.py：获取视频字幕及对应时间戳
+- speech_synthesis.py：生成旁白语音
+- split_wav.py：对视频中的音频进行提取、切分等操作
+- PaddlePaddle_DeepSpeech2文件夹：使用PaddleSpeech获取音频对应文本
+
--- a/speech_synthesis.py
+++ b/speech_synthesis.py
@@ -49,7 +49,6 @@ def change_speed(wav_path, speed=1.0):
    :param speed: 转换后的语速
    :return:
    """
-    print("调速")
    cmd_line = 'ffmpeg -y -i {} -filter:a \"atempo={}\" {}'.format(tmp_file, speed, wav_path)
    os.system(cmd_line)

@@ -92,7 +91,6 @@ def get_narratage_text(sheet_content, speed):
    narratage_end_time = []
    narratage_text = []
    for i, text in enumerate(narratage):
-        print(i, text)
        if text is not None:
            if text == '翻译':
                narratage_text.append(subtitle[i])
@@ -161,7 +159,6 @@ def adjust_volume(origin, start_timestamp, end_timestamp):

 def mix_speech(origin, narratage_paths, start_timestamps):
    composed_wav_path = os.path.join(os.path.dirname(origin), "composed.wav")
-    print(composed_wav_path)
    command_line = 'ffmpeg -i {}'.format(origin)
    for i, narratage_path in enumerate(narratage_paths):
        command_line += " -i {}".format(narratage_path)
@@ -172,7 +169,6 @@ def mix_speech(origin, narratage_paths, start_timestamps):
    command_line = command_line + "".join(["[aud{}]".format(str(i + 1)) for i in range(len(start_timestamps))])
    command_line += "amix=inputs={}\" -vsync 2 -y {}".format(len(start_timestamps) + 1, composed_wav_path)
    os.system(command_line)
-    print(command_line)


 def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state=None):
@@ -244,12 +240,13 @@ if __name__ == '__main__':
    # video_path, sheet_path, output_dir, speed, caption_file = args.video_path,\
    # args.sheet_path, args.output_dir, args.speed, args.caption_file

-    video_path = 'D:/heelo/hysxm_3.mp4'
-    sheet_path = 'D:/heelo/hysxm_3.xlsx'
-    output_dir = 'D:/AddCaption/hysxm_3'
-    speed = 1.25
-    caption_file = 'D:/AddCaption/hysxm_3/hysxm_3.srt'
-
-    # 主函数执行
-    ss_and_export(video_path=video_path, sheet_path=sheet_path, output_dir=output_dir, speed=speed,
-                  caption_file=caption_file)
+    # video_path = 'D:/heelo/hysxm_3.mp4'
+    # sheet_path = 'D:/heelo/hysxm_3.xlsx'
+    # output_dir = 'D:/AddCaption/hysxm_3'
+    # speed = 1.25
+    # caption_file = 'D:/AddCaption/hysxm_3/hysxm_3.srt'
+    #
+    # # 主函数执行
+    # ss_and_export(video_path=video_path, sheet_path=sheet_path, output_dir=output_dir, speed=speed,
+    #               caption_file=caption_file)
+    pass
\ No newline at end of file
--- a/split_wav.py
+++ b/split_wav.py
@@ -89,4 +89,4 @@ if __name__ == '__main__':
    video_path = 'D:/heelo/shaolin.mkv'
    extract_audio(video_path, './tmp', 62, 7489)
    split_audio()
-    extrac_speech()
+    extract_speech()
--- a/try_with_gui.py
+++ b/try_with_gui.py
@@ -11,6 +11,7 @@ import inspect
 window = tk.Tk()
 window.title('无障碍电影辅助工具')  # 标题
 window.geometry('600x400')  # 窗口尺寸
+window.resizable(0, 0)


 def open_video_file():
@@ -75,6 +76,7 @@ def start_process(p, p_label, state, intervals=100):
    启动进度条
    :param p: 进度条组件
    :param p_label: 进度条对应百分比文本
+    :param state: 进度条与任务用于通信对齐的变量，代表任务的实际进度
    :param intervals: 进度条前进所需时间
    :return:
    """
@@ -86,7 +88,7 @@ def start_process(p, p_label, state, intervals=100):
        if state[0] and state[0] != lastState and state[0] * 100 > p['value']:
            p['value'] = round(state[0] * 100, 2)
            lastState = state[0]
-        p_label['text'] = str(round(p['value'], 2)) + "%"
+        p_label['text'] = format(format(p['value'], '2.2f'), "5s") + "%"
        if p['value'] == 100.0:
            p.stop()
            p['value'] = 100.0
@@ -118,9 +120,8 @@ def start_detect():
    from narratage_detection import detect
    # 显示进度条及开始检测
    progressbar_1.grid(column=2, row=1, sticky="W")
-    progress_1.grid(column=3, row=1)
+    progress_1.grid(column=3, row=1, sticky="W")
    processState.set("开始检测")
-    intervals = trans_to_seconds(endTime.get())
    # 多线程同步进行检测和进度条更新
    state = [None]
    threads = [
@@ -135,7 +136,7 @@ def start_detect():
        t.join()
    # 将进度条的进度拉满到100%，并给出“任务已完成”的提示
    progressbar_1['value'] = 100
-    progress_1['text'] = '100%'
+    progress_1['text'] = '100.0%'
    processState.set("任务已完成")
    # 检测完成后，将“停止检测”按钮设置为不可点击状态，”开始检测“按钮设置为可点击状态
    stopDetection.config(state=tk.DISABLED)
@@ -152,7 +153,7 @@ def stop_detect():
    startDetection.config(state=tk.ACTIVE)
    progressbar_1.stop()
    progressbar_1['value'] = 0
-    progress_1['text'] = "0%"
+    progress_1['text'] = "00.00%"
    progressbar_1.grid_forget()
    progress_1.grid_forget()

@@ -219,6 +220,8 @@ def start_synthesis():
    elif not os.path.exists(audio_dir):
        messagebox.showwarning("警告", "当前音频存放路径有误，请检查一遍。")
        return
+    if len(caption_path) == 0:
+        messagebox.showwarning("警告", "请选择字幕文件存放路径")
    if len(sheet_path) == 0:
        messagebox.showwarning("警告", "请选择你要处理的表格")
        return
@@ -258,7 +261,7 @@ def stop_synthesis():
    startSynthesis.config(state=tk.ACTIVE)
    progressbar_2.stop()
    progressbar_2['value'] = 0
-    progress_2['text'] = "0%"
+    progress_2['text'] = "00.00%"
    progressbar_2.grid_forget()
    progress_2.grid_forget()

@@ -317,13 +320,18 @@ tabControl.pack(expand=1, fill="both")
 video_info = ttk.LabelFrame(tab1, text=" 视频信息操作 ")
 video_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4)

+rows = 4
+for i in range(rows):
+    video_info.grid_rowconfigure(i, weight=1)
+    video_info.grid_columnconfigure(i, weight=1)
+
 input_label = ttk.Label(video_info, text="视频文件")
 input_label.grid(column=0, row=0)
 inputFilePath = tk.StringVar()
 inputFile = ttk.Entry(video_info, width=30, textvariable=inputFilePath)
-inputFile.grid(column=1, row=0)
+inputFile.grid(column=1, row=0, sticky="W")
 upload_button = ttk.Button(video_info, text="上传文件", command=open_video_file)
-upload_button.grid(column=2, row=0)
+upload_button.grid(column=2, row=0, sticky="W")

 startTime_label = ttk.Label(video_info, text="视频实际开始时间")
 startTime_label.grid(column=0, row=1)
@@ -354,15 +362,20 @@ choice_3.grid(column=3, row=3, sticky="W")
    - 停止检测按钮
 """
 detect_command = ttk.LabelFrame(tab1, text=" 检测步骤 ")
-detect_command.place(relx=0.05, rely=0.5, relwidth=0.9, relheight=0.4)
+detect_command.place(relx=0.05, rely=0.6, relwidth=0.9, relheight=0.3)
+
+rows = 4
+for i in range(rows):
+    detect_command.grid_rowconfigure(i, weight=1)
+    detect_command.grid_columnconfigure(i, weight=1)

 output_label = ttk.Label(detect_command, text="输出表格")
 output_label.grid(column=0, row=0)
 outputFilePath = tk.StringVar()
 outputFile = ttk.Entry(detect_command, width=30, textvariable=outputFilePath)
-outputFile.grid(column=1, row=0)
+outputFile.grid(column=1, row=0, sticky="W")
 save_button = ttk.Button(detect_command, text="打开文件夹", command=find_save_file)
-save_button.grid(column=2, row=0)
+save_button.grid(column=2, row=0, sticky="W")

 startDetection = ttk.Button(detect_command, text="开始检测", command=lambda: thread_it(start_detect, name="startDetect"))
 startDetection.grid(column=0, row=1)
@@ -388,11 +401,16 @@ stopDetection.config(state=tk.DISABLED)
 audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ")
 audio_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.3)

+rows = 3
+for i in range(rows):
+    audio_info.grid_rowconfigure(i, weight=1)
+    audio_info.grid_columnconfigure(i, weight=1)
+
 video_label = ttk.Label(audio_info, text="原视频")
 video_label.grid(column=0, row=0)
 videoPath = tk.StringVar()
 videoPath_input = ttk.Entry(audio_info, width=30, textvariable=videoPath)
-videoPath_input.grid(column=1, row=0)
+videoPath_input.grid(column=1, row=0, sticky="W")
 upload_button_3 = ttk.Button(audio_info, text="上传文件", command=confirm_video_path)
 upload_button_3.grid(column=2, row=0)

@@ -400,7 +418,7 @@ narratage_label = ttk.Label(audio_info, text="旁白脚本表格")
 narratage_label.grid(column=0, row=1)
 narratagePath = tk.StringVar()
 narratagePath_input = ttk.Entry(audio_info, width=30, textvariable=narratagePath)
-narratagePath_input.grid(column=1, row=1)
+narratagePath_input.grid(column=1, row=1, sticky="W")
 upload_button_2 = ttk.Button(audio_info, text="上传文件", command=open_sheet_file)
 upload_button_2.grid(column=2, row=1)

@@ -421,7 +439,12 @@ speedChosen.grid(column=1, row=2, sticky="W")
    - 停止合成按钮
 """
 synthesis_command = ttk.LabelFrame(tab2, text=" 语音合成步骤 ")
-synthesis_command.place(relx=0.05, rely=0.45, relwidth=0.9, relheight=0.5)
+synthesis_command.place(relx=0.05, rely=0.45, relwidth=0.9, relheight=0.4)
+
+rows = 4
+for i in range(rows):
+    synthesis_command.grid_rowconfigure(i, weight=1)
+    synthesis_command.grid_columnconfigure(i, weight=1)

 audioDir_label = ttk.Label(synthesis_command, text="输出音频存放于")
 audioDir_label.grid(column=0, row=0)
@@ -446,7 +469,7 @@ processState_2 = tk.StringVar()
 stateLabel_2 = tk.Label(synthesis_command, textvariable=processState_2, fg="green")
 stateLabel_2.grid(column=1, row=2, sticky="W")
 progressbar_2 = ttk.Progressbar(synthesis_command, length=100, mode="determinate")
-progress_2 = tk.Label(synthesis_command, text="0%")
+progress_2 = tk.Label(synthesis_command, text="00.00%")
 stopSynthesis = ttk.Button(synthesis_command, text="停止合成",
                           command=lambda: thread_it(stop_synthesis, name="stopSynthesis"))
 stopSynthesis.grid(column=0, row=3)