Commit 172eb5d2 authored by 翟艳秋(20软)'s avatar 翟艳秋(20软)

1.[modified]将界面大小固定,并对各组件均设置为左对齐;

2.[modified]修改进度条数字格式,固定为2位小数(自动补零); 3.[modified]添加对字幕文件存放路径的检查; 4.[modified]将语音合成从异步改为同步,解决语音合成过程中界面卡顿的问题;
parent fad7c317
/dist/
/build/
/missing_packages/
### 文件结构
1、界面文件——try_with_gui.py
2、各功能模块:
- judge_subtitle.py:检测视频是否提供字幕
- detect_with_ocr.py:在有字幕前提下使用ocr获取视频中的字幕
- detect_with_asr.py:在无字幕前提下使用asr获取视频中的字幕
- narratage_detection.py:获取视频字幕及对应时间戳
- speech_synthesis.py:生成旁白语音
- split_wav.py:对视频中的音频进行提取、切分等操作
- PaddlePaddle_DeepSpeech2文件夹:使用PaddleSpeech获取音频对应文本
......@@ -49,7 +49,6 @@ def change_speed(wav_path, speed=1.0):
:param speed: 转换后的语速
:return:
"""
print("调速")
cmd_line = 'ffmpeg -y -i {} -filter:a \"atempo={}\" {}'.format(tmp_file, speed, wav_path)
os.system(cmd_line)
......@@ -92,7 +91,6 @@ def get_narratage_text(sheet_content, speed):
narratage_end_time = []
narratage_text = []
for i, text in enumerate(narratage):
print(i, text)
if text is not None:
if text == '翻译':
narratage_text.append(subtitle[i])
......@@ -161,7 +159,6 @@ def adjust_volume(origin, start_timestamp, end_timestamp):
def mix_speech(origin, narratage_paths, start_timestamps):
composed_wav_path = os.path.join(os.path.dirname(origin), "composed.wav")
print(composed_wav_path)
command_line = 'ffmpeg -i {}'.format(origin)
for i, narratage_path in enumerate(narratage_paths):
command_line += " -i {}".format(narratage_path)
......@@ -172,7 +169,6 @@ def mix_speech(origin, narratage_paths, start_timestamps):
command_line = command_line + "".join(["[aud{}]".format(str(i + 1)) for i in range(len(start_timestamps))])
command_line += "amix=inputs={}\" -vsync 2 -y {}".format(len(start_timestamps) + 1, composed_wav_path)
os.system(command_line)
print(command_line)
def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state=None):
......@@ -244,12 +240,13 @@ if __name__ == '__main__':
# video_path, sheet_path, output_dir, speed, caption_file = args.video_path,\
# args.sheet_path, args.output_dir, args.speed, args.caption_file
video_path = 'D:/heelo/hysxm_3.mp4'
sheet_path = 'D:/heelo/hysxm_3.xlsx'
output_dir = 'D:/AddCaption/hysxm_3'
speed = 1.25
caption_file = 'D:/AddCaption/hysxm_3/hysxm_3.srt'
# 主函数执行
ss_and_export(video_path=video_path, sheet_path=sheet_path, output_dir=output_dir, speed=speed,
caption_file=caption_file)
# video_path = 'D:/heelo/hysxm_3.mp4'
# sheet_path = 'D:/heelo/hysxm_3.xlsx'
# output_dir = 'D:/AddCaption/hysxm_3'
# speed = 1.25
# caption_file = 'D:/AddCaption/hysxm_3/hysxm_3.srt'
#
# # 主函数执行
# ss_and_export(video_path=video_path, sheet_path=sheet_path, output_dir=output_dir, speed=speed,
# caption_file=caption_file)
pass
\ No newline at end of file
......@@ -89,4 +89,4 @@ if __name__ == '__main__':
video_path = 'D:/heelo/shaolin.mkv'
extract_audio(video_path, './tmp', 62, 7489)
split_audio()
extrac_speech()
extract_speech()
......@@ -11,6 +11,7 @@ import inspect
window = tk.Tk()
window.title('无障碍电影辅助工具') # 标题
window.geometry('600x400') # 窗口尺寸
window.resizable(0, 0)
def open_video_file():
......@@ -75,6 +76,7 @@ def start_process(p, p_label, state, intervals=100):
启动进度条
:param p: 进度条组件
:param p_label: 进度条对应百分比文本
:param state: 进度条与任务用于通信对齐的变量,代表任务的实际进度
:param intervals: 进度条前进所需时间
:return:
"""
......@@ -86,7 +88,7 @@ def start_process(p, p_label, state, intervals=100):
if state[0] and state[0] != lastState and state[0] * 100 > p['value']:
p['value'] = round(state[0] * 100, 2)
lastState = state[0]
p_label['text'] = str(round(p['value'], 2)) + "%"
p_label['text'] = format(format(p['value'], '2.2f'), "5s") + "%"
if p['value'] == 100.0:
p.stop()
p['value'] = 100.0
......@@ -118,9 +120,8 @@ def start_detect():
from narratage_detection import detect
# 显示进度条及开始检测
progressbar_1.grid(column=2, row=1, sticky="W")
progress_1.grid(column=3, row=1)
progress_1.grid(column=3, row=1, sticky="W")
processState.set("开始检测")
intervals = trans_to_seconds(endTime.get())
# 多线程同步进行检测和进度条更新
state = [None]
threads = [
......@@ -135,7 +136,7 @@ def start_detect():
t.join()
# 将进度条的进度拉满到100%,并给出“任务已完成”的提示
progressbar_1['value'] = 100
progress_1['text'] = '100%'
progress_1['text'] = '100.0%'
processState.set("任务已完成")
# 检测完成后,将“停止检测”按钮设置为不可点击状态,”开始检测“按钮设置为可点击状态
stopDetection.config(state=tk.DISABLED)
......@@ -152,7 +153,7 @@ def stop_detect():
startDetection.config(state=tk.ACTIVE)
progressbar_1.stop()
progressbar_1['value'] = 0
progress_1['text'] = "0%"
progress_1['text'] = "00.00%"
progressbar_1.grid_forget()
progress_1.grid_forget()
......@@ -219,6 +220,8 @@ def start_synthesis():
elif not os.path.exists(audio_dir):
messagebox.showwarning("警告", "当前音频存放路径有误,请检查一遍。")
return
if len(caption_path) == 0:
messagebox.showwarning("警告", "请选择字幕文件存放路径")
if len(sheet_path) == 0:
messagebox.showwarning("警告", "请选择你要处理的表格")
return
......@@ -258,7 +261,7 @@ def stop_synthesis():
startSynthesis.config(state=tk.ACTIVE)
progressbar_2.stop()
progressbar_2['value'] = 0
progress_2['text'] = "0%"
progress_2['text'] = "00.00%"
progressbar_2.grid_forget()
progress_2.grid_forget()
......@@ -317,13 +320,18 @@ tabControl.pack(expand=1, fill="both")
video_info = ttk.LabelFrame(tab1, text=" 视频信息操作 ")
video_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4)
rows = 4
for i in range(rows):
video_info.grid_rowconfigure(i, weight=1)
video_info.grid_columnconfigure(i, weight=1)
input_label = ttk.Label(video_info, text="视频文件")
input_label.grid(column=0, row=0)
inputFilePath = tk.StringVar()
inputFile = ttk.Entry(video_info, width=30, textvariable=inputFilePath)
inputFile.grid(column=1, row=0)
inputFile.grid(column=1, row=0, sticky="W")
upload_button = ttk.Button(video_info, text="上传文件", command=open_video_file)
upload_button.grid(column=2, row=0)
upload_button.grid(column=2, row=0, sticky="W")
startTime_label = ttk.Label(video_info, text="视频实际开始时间")
startTime_label.grid(column=0, row=1)
......@@ -354,15 +362,20 @@ choice_3.grid(column=3, row=3, sticky="W")
- 停止检测按钮
"""
detect_command = ttk.LabelFrame(tab1, text=" 检测步骤 ")
detect_command.place(relx=0.05, rely=0.5, relwidth=0.9, relheight=0.4)
detect_command.place(relx=0.05, rely=0.6, relwidth=0.9, relheight=0.3)
rows = 4
for i in range(rows):
detect_command.grid_rowconfigure(i, weight=1)
detect_command.grid_columnconfigure(i, weight=1)
output_label = ttk.Label(detect_command, text="输出表格")
output_label.grid(column=0, row=0)
outputFilePath = tk.StringVar()
outputFile = ttk.Entry(detect_command, width=30, textvariable=outputFilePath)
outputFile.grid(column=1, row=0)
outputFile.grid(column=1, row=0, sticky="W")
save_button = ttk.Button(detect_command, text="打开文件夹", command=find_save_file)
save_button.grid(column=2, row=0)
save_button.grid(column=2, row=0, sticky="W")
startDetection = ttk.Button(detect_command, text="开始检测", command=lambda: thread_it(start_detect, name="startDetect"))
startDetection.grid(column=0, row=1)
......@@ -388,11 +401,16 @@ stopDetection.config(state=tk.DISABLED)
audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ")
audio_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.3)
rows = 3
for i in range(rows):
audio_info.grid_rowconfigure(i, weight=1)
audio_info.grid_columnconfigure(i, weight=1)
video_label = ttk.Label(audio_info, text="原视频")
video_label.grid(column=0, row=0)
videoPath = tk.StringVar()
videoPath_input = ttk.Entry(audio_info, width=30, textvariable=videoPath)
videoPath_input.grid(column=1, row=0)
videoPath_input.grid(column=1, row=0, sticky="W")
upload_button_3 = ttk.Button(audio_info, text="上传文件", command=confirm_video_path)
upload_button_3.grid(column=2, row=0)
......@@ -400,7 +418,7 @@ narratage_label = ttk.Label(audio_info, text="旁白脚本表格")
narratage_label.grid(column=0, row=1)
narratagePath = tk.StringVar()
narratagePath_input = ttk.Entry(audio_info, width=30, textvariable=narratagePath)
narratagePath_input.grid(column=1, row=1)
narratagePath_input.grid(column=1, row=1, sticky="W")
upload_button_2 = ttk.Button(audio_info, text="上传文件", command=open_sheet_file)
upload_button_2.grid(column=2, row=1)
......@@ -421,7 +439,12 @@ speedChosen.grid(column=1, row=2, sticky="W")
- 停止合成按钮
"""
synthesis_command = ttk.LabelFrame(tab2, text=" 语音合成步骤 ")
synthesis_command.place(relx=0.05, rely=0.45, relwidth=0.9, relheight=0.5)
synthesis_command.place(relx=0.05, rely=0.45, relwidth=0.9, relheight=0.4)
rows = 4
for i in range(rows):
synthesis_command.grid_rowconfigure(i, weight=1)
synthesis_command.grid_columnconfigure(i, weight=1)
audioDir_label = ttk.Label(synthesis_command, text="输出音频存放于")
audioDir_label.grid(column=0, row=0)
......@@ -446,7 +469,7 @@ processState_2 = tk.StringVar()
stateLabel_2 = tk.Label(synthesis_command, textvariable=processState_2, fg="green")
stateLabel_2.grid(column=1, row=2, sticky="W")
progressbar_2 = ttk.Progressbar(synthesis_command, length=100, mode="determinate")
progress_2 = tk.Label(synthesis_command, text="0%")
progress_2 = tk.Label(synthesis_command, text="00.00%")
stopSynthesis = ttk.Button(synthesis_command, text="停止合成",
command=lambda: thread_it(stop_synthesis, name="stopSynthesis"))
stopSynthesis.grid(column=0, row=3)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment