Commit 172eb5d2 authored by 翟艳秋(20软)'s avatar 翟艳秋(20软)

1.[modified]将界面大小固定,并对各组件均设置为左对齐;

2.[modified]修改进度条数字格式,固定为2位小数(自动补零); 3.[modified]添加对字幕文件存放路径的检查; 4.[modified]将语音合成从异步改为同步,解决语音合成过程中界面卡顿的问题;
parent fad7c317
/dist/
/build/
/missing_packages/
### 文件结构
1、界面文件——try_with_gui.py
2、各功能模块:
- judge_subtitle.py:检测视频是否提供字幕
- detect_with_ocr.py:在有字幕前提下使用ocr获取视频中的字幕
- detect_with_asr.py:在无字幕前提下使用asr获取视频中的字幕
- narratage_detection.py:获取视频字幕及对应时间戳
- speech_synthesis.py:生成旁白语音
- split_wav.py:对视频中的音频进行提取、切分等操作
- PaddlePaddle_DeepSpeech2文件夹:使用PaddleSpeech获取音频对应文本
...@@ -49,7 +49,6 @@ def change_speed(wav_path, speed=1.0): ...@@ -49,7 +49,6 @@ def change_speed(wav_path, speed=1.0):
:param speed: 转换后的语速 :param speed: 转换后的语速
:return: :return:
""" """
print("调速")
cmd_line = 'ffmpeg -y -i {} -filter:a \"atempo={}\" {}'.format(tmp_file, speed, wav_path) cmd_line = 'ffmpeg -y -i {} -filter:a \"atempo={}\" {}'.format(tmp_file, speed, wav_path)
os.system(cmd_line) os.system(cmd_line)
...@@ -92,7 +91,6 @@ def get_narratage_text(sheet_content, speed): ...@@ -92,7 +91,6 @@ def get_narratage_text(sheet_content, speed):
narratage_end_time = [] narratage_end_time = []
narratage_text = [] narratage_text = []
for i, text in enumerate(narratage): for i, text in enumerate(narratage):
print(i, text)
if text is not None: if text is not None:
if text == '翻译': if text == '翻译':
narratage_text.append(subtitle[i]) narratage_text.append(subtitle[i])
...@@ -161,7 +159,6 @@ def adjust_volume(origin, start_timestamp, end_timestamp): ...@@ -161,7 +159,6 @@ def adjust_volume(origin, start_timestamp, end_timestamp):
def mix_speech(origin, narratage_paths, start_timestamps): def mix_speech(origin, narratage_paths, start_timestamps):
composed_wav_path = os.path.join(os.path.dirname(origin), "composed.wav") composed_wav_path = os.path.join(os.path.dirname(origin), "composed.wav")
print(composed_wav_path)
command_line = 'ffmpeg -i {}'.format(origin) command_line = 'ffmpeg -i {}'.format(origin)
for i, narratage_path in enumerate(narratage_paths): for i, narratage_path in enumerate(narratage_paths):
command_line += " -i {}".format(narratage_path) command_line += " -i {}".format(narratage_path)
...@@ -172,7 +169,6 @@ def mix_speech(origin, narratage_paths, start_timestamps): ...@@ -172,7 +169,6 @@ def mix_speech(origin, narratage_paths, start_timestamps):
command_line = command_line + "".join(["[aud{}]".format(str(i + 1)) for i in range(len(start_timestamps))]) command_line = command_line + "".join(["[aud{}]".format(str(i + 1)) for i in range(len(start_timestamps))])
command_line += "amix=inputs={}\" -vsync 2 -y {}".format(len(start_timestamps) + 1, composed_wav_path) command_line += "amix=inputs={}\" -vsync 2 -y {}".format(len(start_timestamps) + 1, composed_wav_path)
os.system(command_line) os.system(command_line)
print(command_line)
def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state=None): def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state=None):
...@@ -244,12 +240,13 @@ if __name__ == '__main__': ...@@ -244,12 +240,13 @@ if __name__ == '__main__':
# video_path, sheet_path, output_dir, speed, caption_file = args.video_path,\ # video_path, sheet_path, output_dir, speed, caption_file = args.video_path,\
# args.sheet_path, args.output_dir, args.speed, args.caption_file # args.sheet_path, args.output_dir, args.speed, args.caption_file
video_path = 'D:/heelo/hysxm_3.mp4' # video_path = 'D:/heelo/hysxm_3.mp4'
sheet_path = 'D:/heelo/hysxm_3.xlsx' # sheet_path = 'D:/heelo/hysxm_3.xlsx'
output_dir = 'D:/AddCaption/hysxm_3' # output_dir = 'D:/AddCaption/hysxm_3'
speed = 1.25 # speed = 1.25
caption_file = 'D:/AddCaption/hysxm_3/hysxm_3.srt' # caption_file = 'D:/AddCaption/hysxm_3/hysxm_3.srt'
#
# 主函数执行 # # 主函数执行
ss_and_export(video_path=video_path, sheet_path=sheet_path, output_dir=output_dir, speed=speed, # ss_and_export(video_path=video_path, sheet_path=sheet_path, output_dir=output_dir, speed=speed,
caption_file=caption_file) # caption_file=caption_file)
pass
\ No newline at end of file
...@@ -89,4 +89,4 @@ if __name__ == '__main__': ...@@ -89,4 +89,4 @@ if __name__ == '__main__':
video_path = 'D:/heelo/shaolin.mkv' video_path = 'D:/heelo/shaolin.mkv'
extract_audio(video_path, './tmp', 62, 7489) extract_audio(video_path, './tmp', 62, 7489)
split_audio() split_audio()
extrac_speech() extract_speech()
...@@ -11,6 +11,7 @@ import inspect ...@@ -11,6 +11,7 @@ import inspect
window = tk.Tk() window = tk.Tk()
window.title('无障碍电影辅助工具') # 标题 window.title('无障碍电影辅助工具') # 标题
window.geometry('600x400') # 窗口尺寸 window.geometry('600x400') # 窗口尺寸
window.resizable(0, 0)
def open_video_file(): def open_video_file():
...@@ -75,6 +76,7 @@ def start_process(p, p_label, state, intervals=100): ...@@ -75,6 +76,7 @@ def start_process(p, p_label, state, intervals=100):
启动进度条 启动进度条
:param p: 进度条组件 :param p: 进度条组件
:param p_label: 进度条对应百分比文本 :param p_label: 进度条对应百分比文本
:param state: 进度条与任务用于通信对齐的变量,代表任务的实际进度
:param intervals: 进度条前进所需时间 :param intervals: 进度条前进所需时间
:return: :return:
""" """
...@@ -86,7 +88,7 @@ def start_process(p, p_label, state, intervals=100): ...@@ -86,7 +88,7 @@ def start_process(p, p_label, state, intervals=100):
if state[0] and state[0] != lastState and state[0] * 100 > p['value']: if state[0] and state[0] != lastState and state[0] * 100 > p['value']:
p['value'] = round(state[0] * 100, 2) p['value'] = round(state[0] * 100, 2)
lastState = state[0] lastState = state[0]
p_label['text'] = str(round(p['value'], 2)) + "%" p_label['text'] = format(format(p['value'], '2.2f'), "5s") + "%"
if p['value'] == 100.0: if p['value'] == 100.0:
p.stop() p.stop()
p['value'] = 100.0 p['value'] = 100.0
...@@ -118,9 +120,8 @@ def start_detect(): ...@@ -118,9 +120,8 @@ def start_detect():
from narratage_detection import detect from narratage_detection import detect
# 显示进度条及开始检测 # 显示进度条及开始检测
progressbar_1.grid(column=2, row=1, sticky="W") progressbar_1.grid(column=2, row=1, sticky="W")
progress_1.grid(column=3, row=1) progress_1.grid(column=3, row=1, sticky="W")
processState.set("开始检测") processState.set("开始检测")
intervals = trans_to_seconds(endTime.get())
# 多线程同步进行检测和进度条更新 # 多线程同步进行检测和进度条更新
state = [None] state = [None]
threads = [ threads = [
...@@ -135,7 +136,7 @@ def start_detect(): ...@@ -135,7 +136,7 @@ def start_detect():
t.join() t.join()
# 将进度条的进度拉满到100%,并给出“任务已完成”的提示 # 将进度条的进度拉满到100%,并给出“任务已完成”的提示
progressbar_1['value'] = 100 progressbar_1['value'] = 100
progress_1['text'] = '100%' progress_1['text'] = '100.0%'
processState.set("任务已完成") processState.set("任务已完成")
# 检测完成后,将“停止检测”按钮设置为不可点击状态,”开始检测“按钮设置为可点击状态 # 检测完成后,将“停止检测”按钮设置为不可点击状态,”开始检测“按钮设置为可点击状态
stopDetection.config(state=tk.DISABLED) stopDetection.config(state=tk.DISABLED)
...@@ -152,7 +153,7 @@ def stop_detect(): ...@@ -152,7 +153,7 @@ def stop_detect():
startDetection.config(state=tk.ACTIVE) startDetection.config(state=tk.ACTIVE)
progressbar_1.stop() progressbar_1.stop()
progressbar_1['value'] = 0 progressbar_1['value'] = 0
progress_1['text'] = "0%" progress_1['text'] = "00.00%"
progressbar_1.grid_forget() progressbar_1.grid_forget()
progress_1.grid_forget() progress_1.grid_forget()
...@@ -219,6 +220,8 @@ def start_synthesis(): ...@@ -219,6 +220,8 @@ def start_synthesis():
elif not os.path.exists(audio_dir): elif not os.path.exists(audio_dir):
messagebox.showwarning("警告", "当前音频存放路径有误,请检查一遍。") messagebox.showwarning("警告", "当前音频存放路径有误,请检查一遍。")
return return
if len(caption_path) == 0:
messagebox.showwarning("警告", "请选择字幕文件存放路径")
if len(sheet_path) == 0: if len(sheet_path) == 0:
messagebox.showwarning("警告", "请选择你要处理的表格") messagebox.showwarning("警告", "请选择你要处理的表格")
return return
...@@ -258,7 +261,7 @@ def stop_synthesis(): ...@@ -258,7 +261,7 @@ def stop_synthesis():
startSynthesis.config(state=tk.ACTIVE) startSynthesis.config(state=tk.ACTIVE)
progressbar_2.stop() progressbar_2.stop()
progressbar_2['value'] = 0 progressbar_2['value'] = 0
progress_2['text'] = "0%" progress_2['text'] = "00.00%"
progressbar_2.grid_forget() progressbar_2.grid_forget()
progress_2.grid_forget() progress_2.grid_forget()
...@@ -317,13 +320,18 @@ tabControl.pack(expand=1, fill="both") ...@@ -317,13 +320,18 @@ tabControl.pack(expand=1, fill="both")
video_info = ttk.LabelFrame(tab1, text=" 视频信息操作 ") video_info = ttk.LabelFrame(tab1, text=" 视频信息操作 ")
video_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4) video_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4)
rows = 4
for i in range(rows):
video_info.grid_rowconfigure(i, weight=1)
video_info.grid_columnconfigure(i, weight=1)
input_label = ttk.Label(video_info, text="视频文件") input_label = ttk.Label(video_info, text="视频文件")
input_label.grid(column=0, row=0) input_label.grid(column=0, row=0)
inputFilePath = tk.StringVar() inputFilePath = tk.StringVar()
inputFile = ttk.Entry(video_info, width=30, textvariable=inputFilePath) inputFile = ttk.Entry(video_info, width=30, textvariable=inputFilePath)
inputFile.grid(column=1, row=0) inputFile.grid(column=1, row=0, sticky="W")
upload_button = ttk.Button(video_info, text="上传文件", command=open_video_file) upload_button = ttk.Button(video_info, text="上传文件", command=open_video_file)
upload_button.grid(column=2, row=0) upload_button.grid(column=2, row=0, sticky="W")
startTime_label = ttk.Label(video_info, text="视频实际开始时间") startTime_label = ttk.Label(video_info, text="视频实际开始时间")
startTime_label.grid(column=0, row=1) startTime_label.grid(column=0, row=1)
...@@ -354,15 +362,20 @@ choice_3.grid(column=3, row=3, sticky="W") ...@@ -354,15 +362,20 @@ choice_3.grid(column=3, row=3, sticky="W")
- 停止检测按钮 - 停止检测按钮
""" """
detect_command = ttk.LabelFrame(tab1, text=" 检测步骤 ") detect_command = ttk.LabelFrame(tab1, text=" 检测步骤 ")
detect_command.place(relx=0.05, rely=0.5, relwidth=0.9, relheight=0.4) detect_command.place(relx=0.05, rely=0.6, relwidth=0.9, relheight=0.3)
rows = 4
for i in range(rows):
detect_command.grid_rowconfigure(i, weight=1)
detect_command.grid_columnconfigure(i, weight=1)
output_label = ttk.Label(detect_command, text="输出表格") output_label = ttk.Label(detect_command, text="输出表格")
output_label.grid(column=0, row=0) output_label.grid(column=0, row=0)
outputFilePath = tk.StringVar() outputFilePath = tk.StringVar()
outputFile = ttk.Entry(detect_command, width=30, textvariable=outputFilePath) outputFile = ttk.Entry(detect_command, width=30, textvariable=outputFilePath)
outputFile.grid(column=1, row=0) outputFile.grid(column=1, row=0, sticky="W")
save_button = ttk.Button(detect_command, text="打开文件夹", command=find_save_file) save_button = ttk.Button(detect_command, text="打开文件夹", command=find_save_file)
save_button.grid(column=2, row=0) save_button.grid(column=2, row=0, sticky="W")
startDetection = ttk.Button(detect_command, text="开始检测", command=lambda: thread_it(start_detect, name="startDetect")) startDetection = ttk.Button(detect_command, text="开始检测", command=lambda: thread_it(start_detect, name="startDetect"))
startDetection.grid(column=0, row=1) startDetection.grid(column=0, row=1)
...@@ -388,11 +401,16 @@ stopDetection.config(state=tk.DISABLED) ...@@ -388,11 +401,16 @@ stopDetection.config(state=tk.DISABLED)
audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ") audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ")
audio_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.3) audio_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.3)
rows = 3
for i in range(rows):
audio_info.grid_rowconfigure(i, weight=1)
audio_info.grid_columnconfigure(i, weight=1)
video_label = ttk.Label(audio_info, text="原视频") video_label = ttk.Label(audio_info, text="原视频")
video_label.grid(column=0, row=0) video_label.grid(column=0, row=0)
videoPath = tk.StringVar() videoPath = tk.StringVar()
videoPath_input = ttk.Entry(audio_info, width=30, textvariable=videoPath) videoPath_input = ttk.Entry(audio_info, width=30, textvariable=videoPath)
videoPath_input.grid(column=1, row=0) videoPath_input.grid(column=1, row=0, sticky="W")
upload_button_3 = ttk.Button(audio_info, text="上传文件", command=confirm_video_path) upload_button_3 = ttk.Button(audio_info, text="上传文件", command=confirm_video_path)
upload_button_3.grid(column=2, row=0) upload_button_3.grid(column=2, row=0)
...@@ -400,7 +418,7 @@ narratage_label = ttk.Label(audio_info, text="旁白脚本表格") ...@@ -400,7 +418,7 @@ narratage_label = ttk.Label(audio_info, text="旁白脚本表格")
narratage_label.grid(column=0, row=1) narratage_label.grid(column=0, row=1)
narratagePath = tk.StringVar() narratagePath = tk.StringVar()
narratagePath_input = ttk.Entry(audio_info, width=30, textvariable=narratagePath) narratagePath_input = ttk.Entry(audio_info, width=30, textvariable=narratagePath)
narratagePath_input.grid(column=1, row=1) narratagePath_input.grid(column=1, row=1, sticky="W")
upload_button_2 = ttk.Button(audio_info, text="上传文件", command=open_sheet_file) upload_button_2 = ttk.Button(audio_info, text="上传文件", command=open_sheet_file)
upload_button_2.grid(column=2, row=1) upload_button_2.grid(column=2, row=1)
...@@ -421,7 +439,12 @@ speedChosen.grid(column=1, row=2, sticky="W") ...@@ -421,7 +439,12 @@ speedChosen.grid(column=1, row=2, sticky="W")
- 停止合成按钮 - 停止合成按钮
""" """
synthesis_command = ttk.LabelFrame(tab2, text=" 语音合成步骤 ") synthesis_command = ttk.LabelFrame(tab2, text=" 语音合成步骤 ")
synthesis_command.place(relx=0.05, rely=0.45, relwidth=0.9, relheight=0.5) synthesis_command.place(relx=0.05, rely=0.45, relwidth=0.9, relheight=0.4)
rows = 4
for i in range(rows):
synthesis_command.grid_rowconfigure(i, weight=1)
synthesis_command.grid_columnconfigure(i, weight=1)
audioDir_label = ttk.Label(synthesis_command, text="输出音频存放于") audioDir_label = ttk.Label(synthesis_command, text="输出音频存放于")
audioDir_label.grid(column=0, row=0) audioDir_label.grid(column=0, row=0)
...@@ -446,7 +469,7 @@ processState_2 = tk.StringVar() ...@@ -446,7 +469,7 @@ processState_2 = tk.StringVar()
stateLabel_2 = tk.Label(synthesis_command, textvariable=processState_2, fg="green") stateLabel_2 = tk.Label(synthesis_command, textvariable=processState_2, fg="green")
stateLabel_2.grid(column=1, row=2, sticky="W") stateLabel_2.grid(column=1, row=2, sticky="W")
progressbar_2 = ttk.Progressbar(synthesis_command, length=100, mode="determinate") progressbar_2 = ttk.Progressbar(synthesis_command, length=100, mode="determinate")
progress_2 = tk.Label(synthesis_command, text="0%") progress_2 = tk.Label(synthesis_command, text="00.00%")
stopSynthesis = ttk.Button(synthesis_command, text="停止合成", stopSynthesis = ttk.Button(synthesis_command, text="停止合成",
command=lambda: thread_it(stop_synthesis, name="stopSynthesis")) command=lambda: thread_it(stop_synthesis, name="stopSynthesis"))
stopSynthesis.grid(column=0, row=3) stopSynthesis.grid(column=0, row=3)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment