Commit f9d50d99 authored by xuanweiace's avatar xuanweiace

Merge branch 'feat_1' of http://gitlab.uiiai.com/xuanweiace/accessibility_movie_2 into feat_1

parents c7b385c9 4514a70e
...@@ -65,12 +65,13 @@ class Assemble_Dialog(QDialog, Ui_Dialog): ...@@ -65,12 +65,13 @@ class Assemble_Dialog(QDialog, Ui_Dialog):
print("start_assemble") print("start_assemble")
video_path = self.lineEdit.text() video_path = self.lineEdit.text()
# 默认 输出的音频是工程目录+/output # 默认 输出的音频是工程目录+/output
audio_dir = self.projectContext.project_base_dir+"output/" audio_dir = os.path.join(self.projectContext.project_base_dir, "output")
sheet_path = self.lineEdit_2.text() sheet_path = self.lineEdit_2.text()
speaker_info = self.lineEdit_3.text() speaker_info = self.lineEdit_3.text()
speed_info = self.lineEdit_4.text() speed_info = self.lineEdit_4.text()
#todo 后续变成常量存起来,或者做成配置 #todo 后续变成常量存起来,或者做成配置
caption_path = replace_path_suffix(self.lineEdit.text(), ".srt") # caption_path = replace_path_suffix(self.lineEdit.text(), ".srt")
caption_path = os.path.join(audio_dir, os.path.basename(video_path).split('.')[0] + ".srt")
print("video_path: ",video_path) print("video_path: ",video_path)
print("audio_dir: ",audio_dir) print("audio_dir: ",audio_dir)
...@@ -78,7 +79,7 @@ class Assemble_Dialog(QDialog, Ui_Dialog): ...@@ -78,7 +79,7 @@ class Assemble_Dialog(QDialog, Ui_Dialog):
print("speed_info: ",speed_info) print("speed_info: ",speed_info)
print("caption_path: ",caption_path) print("caption_path: ",caption_path)
print("speaker_info: ",speaker_info) print("speaker_info: ",speaker_info)
self.start_assemble_signal.emit([video_path,audio_dir, sheet_path,speed_info, caption_path, speaker_info]) self.start_assemble_signal.emit([video_path, audio_dir, sheet_path,speed_info, caption_path, speaker_info])
if __name__ == '__main__': if __name__ == '__main__':
app = QApplication(sys.argv) app = QApplication(sys.argv)
......
...@@ -508,16 +508,15 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -508,16 +508,15 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.export_timer.stop() self.export_timer.stop()
print("===已有线程结束了 in %s ===" % (type)) print("===已有线程结束了 in %s ===" % (type))
self.statusbarLabel.setText(" %s完成" % (type))
self.progressBar.setValue(100)
self.progressLabel.setText(f"100%")
self.projectContext.nd_process = 1
for t in self.threads: for t in self.threads:
if t.exitcode != 0: if t.exitcode != 0:
print("Exception in", t.getName()) print("Exception in", t.getName())
self.show_warning_msg_box("运行出错,请联系开发者处理") self.show_warning_msg_box("运行出错,请联系开发者处理")
print("当前已有的检测结果", self.projectContext.all_elements)
return return
self.statusbarLabel.setText(" %s完成" % (type))
self.progressBar.setValue(100)
self.progressLabel.setText(f"100%")
self.projectContext.nd_process = 1
def deal_synthesis_callback_slot(self, threads, state): def deal_synthesis_callback_slot(self, threads, state):
self.statusbarLabel.setText(" 准备合成:") self.statusbarLabel.setText(" 准备合成:")
...@@ -1165,8 +1164,8 @@ class MainWindow(QMainWindow, Ui_MainWindow): ...@@ -1165,8 +1164,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.projectContext.save_project(False) self.projectContext.save_project(False)
def export_all(self): def export_all(self):
# 暂时存放音频的文件夹被命名为tmp # 存放合成音频的文件夹被命名为output
output_dir = os.path.join(self.projectContext.project_base_dir, "tmp") output_dir = os.path.join(self.projectContext.project_base_dir, "output")
if os.path.exists(output_dir) and len(os.listdir(output_dir)) > 0: if os.path.exists(output_dir) and len(os.listdir(output_dir)) > 0:
self.export.export_slot(self.projectContext.video_path, output_dir) self.export.export_slot(self.projectContext.video_path, output_dir)
else: else:
......
...@@ -89,7 +89,6 @@ def speech_synthesis(text: str, output_file: str, speaker: Speaker, speed: float ...@@ -89,7 +89,6 @@ def speech_synthesis(text: str, output_file: str, speaker: Speaker, speed: float
speaker (Speaker): 说话人 speaker (Speaker): 说话人
speed (float, optional): 指定的音频语速. Defaults to 1.0. speed (float, optional): 指定的音频语速. Defaults to 1.0.
""" """
audio_path = tmp_file
speech_config = SpeechConfig( speech_config = SpeechConfig(
subscription="db34d38d2d3447d482e0f977c66bd624", subscription="db34d38d2d3447d482e0f977c66bd624",
region="eastus" region="eastus"
...@@ -102,7 +101,6 @@ def speech_synthesis(text: str, output_file: str, speaker: Speaker, speed: float ...@@ -102,7 +101,6 @@ def speech_synthesis(text: str, output_file: str, speaker: Speaker, speed: float
if not os.path.exists(os.path.dirname(output_file)): # 如果路径不存在 if not os.path.exists(os.path.dirname(output_file)): # 如果路径不存在
print("output_file路径不存在,创建:", os.path.dirname(output_file)) print("output_file路径不存在,创建:", os.path.dirname(output_file))
os.makedirs(os.path.dirname(output_file)) os.makedirs(os.path.dirname(output_file))
audio_config = AudioOutputConfig(filename=audio_path)
synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None) synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None)
ssml_string = f""" ssml_string = f"""
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{speech_config.speech_synthesis_language}"> <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{speech_config.speech_synthesis_language}">
...@@ -169,7 +167,7 @@ def read_sheet(book_path: str, sheet_name: str = "") -> dict: ...@@ -169,7 +167,7 @@ def read_sheet(book_path: str, sheet_name: str = "") -> dict:
return sheet_content return sheet_content
def get_narratage_text(sheet_content: dict, speed: float) -> Tuple[list, list, list]: def get_narratage_text(sheet_content: dict) -> Tuple[list, list, list]:
"""获取旁白解说文本及起止时间 """获取旁白解说文本及起止时间
Args: Args:
...@@ -183,40 +181,49 @@ def get_narratage_text(sheet_content: dict, speed: float) -> Tuple[list, list, l ...@@ -183,40 +181,49 @@ def get_narratage_text(sheet_content: dict, speed: float) -> Tuple[list, list, l
subtitle = sheet_content['字幕'] subtitle = sheet_content['字幕']
start_time = sheet_content['起始时间'] start_time = sheet_content['起始时间']
end_time = sheet_content['终止时间'] end_time = sheet_content['终止时间']
speeds = sheet_content["语速"]
narratage_start_time = [] narratage_start_time = []
narratage_end_time = [] narratage_end_time = []
narratage_text = [] narratage_text = []
narratage_speed = []
for i, text in enumerate(narratage): for i, text in enumerate(narratage):
# 这里的speed是x.x倍速
speed = float(speeds[i].split('(')[0])
if text is not None: if text is not None:
if text == '翻译': if text == '翻译':
narratage_text.append(subtitle[i]) narratage_text.append(subtitle[i])
else:
narratage_text.append(text)
"""以下为之前自动根据表格生成旁白对应起始时间和终止时间的方法,目前不需要了
"""
# # 如果旁白中有换行符,即分为n段,则按照换行符进行分割,并间隔0.5s
# text_split = text.split('\n')
# # 如果旁白有对应的时间戳(是这段大旁白里的特定位置)
# if start_time[i] is not None and end_time[i] is not None:
# cur_start = float(start_time[i])
# cur_end = float(end_time[i])
# elif subtitle[i] is None:
# # 上一个字幕/旁白的终止时间后0.1s
# cur_start = float(end_time[i - 1]) + normal_interval if i > 0 else 0
# # 如果是最后一句旁白,后面没有字幕及时间戳了,就先把cur_end置为-1
# cur_end = float(
# start_time[i + 1]) if i + 1 < len(start_time) else -1
# else:
# # 有字幕,可覆盖字幕
# cur_start = float(start_time[i])
# cur_end = float(end_time[i])
# for x in text_split:
# if len(x) == 0:
# continue
# cur_end = max(cur_end, cur_start + (len(x) / (normal_speed * speed) + normal_interval))
# narratage_text.append(x)
# narratage_start_time.append(cur_start)
# narratage_end_time.append(cur_end)
# cur_start = cur_start + (len(x) / normal_speed + normal_interval) / speed
narratage_start_time.append(float(start_time[i])) narratage_start_time.append(float(start_time[i]))
narratage_end_time.append(float(end_time[i])) narratage_end_time.append(float(end_time[i]))
else: narratage_speed.append(speed)
# 如果旁白中有换行符,即分为n段,则按照换行符进行分割,并间隔0.5s return narratage_text, narratage_start_time, narratage_end_time, narratage_speed
text_split = text.split('\n')
# 如果旁白有对应的时间戳(是这段大旁白里的特定位置)
if start_time[i] is not None and end_time[i] is not None:
cur_start = float(start_time[i])
cur_end = float(end_time[i])
elif subtitle[i] is None:
cur_start = float(end_time[i - 1]) + 0.1 if i > 0 else 0
# 如果是最后一句旁白,后面没有字幕及时间戳了,就先把cur_end置为-1
cur_end = float(
start_time[i + 1]) if i + 1 < len(start_time) else -1
else:
# 有字幕,可覆盖字幕
cur_start = float(start_time[i])
cur_end = float(end_time[i])
for x in text_split:
if len(x) == 0:
continue
cur_end = max(cur_end, cur_start + (len(x) / normal_speed + normal_interval) / speed)
narratage_text.append(x)
narratage_start_time.append(cur_start)
narratage_end_time.append(cur_end)
cur_start = cur_start + (len(x) / normal_speed + normal_interval) / speed
return narratage_text, narratage_start_time, narratage_end_time
def second_to_str(seconds: float) -> str: def second_to_str(seconds: float) -> str:
...@@ -331,8 +338,7 @@ def ss_and_export(video_path: str, sheet_path: str, output_dir: str, speed: floa ...@@ -331,8 +338,7 @@ def ss_and_export(video_path: str, sheet_path: str, output_dir: str, speed: floa
# print("read sheet at time: ", datetime.datetime.now()) # print("read sheet at time: ", datetime.datetime.now())
sheet_content = read_sheet(book_path) sheet_content = read_sheet(book_path)
# print("get narratage text at time: ", datetime.datetime.now()) # print("get narratage text at time: ", datetime.datetime.now())
narratages, start_timestamp, end_timestamp = get_narratage_text( narratages, start_timestamp, end_timestamp, cur_speed = get_narratage_text(sheet_content)
sheet_content, speed)
# print("export caption at time: ", datetime.datetime.now()) # print("export caption at time: ", datetime.datetime.now())
export_caption(sheet_content, caption_file) export_caption(sheet_content, caption_file)
print("已导出字幕文件") print("已导出字幕文件")
...@@ -343,7 +349,7 @@ def ss_and_export(video_path: str, sheet_path: str, output_dir: str, speed: floa ...@@ -343,7 +349,7 @@ def ss_and_export(video_path: str, sheet_path: str, output_dir: str, speed: floa
for i, text in enumerate(narratages): for i, text in enumerate(narratages):
wav_path = root_path + '/%.2f.wav' % start_timestamp[i] wav_path = root_path + '/%.2f.wav' % start_timestamp[i]
narratage_paths.append(wav_path) narratage_paths.append(wav_path)
speech_synthesis(text, wav_path, chosen_speaker, speed) speech_synthesis(text, wav_path, chosen_speaker, cur_speed[i])
print("目前正在处理{}".format(wav_path)) print("目前正在处理{}".format(wav_path))
if state is not None: if state is not None:
state[0] = float((i + 1) / len(narratages)) * 0.97 state[0] = float((i + 1) / len(narratages)) * 0.97
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment