Commit fad7c317 authored by 翟艳秋(20软)'s avatar 翟艳秋(20软)

1. [add] 原音频中添加旁白音频,生成混合音频;

2. [modified] 将进度条状态调整为小数点后2位; 3. [modified] 检测字幕有无情况的检测范围修正为[实际开始, 实际结束]
parent 7d7ed791
...@@ -222,7 +222,6 @@ def write_excel_xlsx(path, sheet_name, value): ...@@ -222,7 +222,6 @@ def write_excel_xlsx(path, sheet_name, value):
for i in range(0, index): for i in range(0, index):
for j in range(0, len(value[i])): for j in range(0, len(value[i])):
sheet.cell(row=i + 1, column=j + 1, value=str(value[i][j])) sheet.cell(row=i + 1, column=j + 1, value=str(value[i][j]))
print(value[i][j])
if value[i][j] == '' or '插入旁白' in str(value[i][j]) or value[i][j] == '翻译': if value[i][j] == '' or '插入旁白' in str(value[i][j]) or value[i][j] == '翻译':
sheet.cell(row=i + 1, column=j + 1).fill = PatternFill(fill_type='solid', fgColor='ffff00') sheet.cell(row=i + 1, column=j + 1).fill = PatternFill(fill_type='solid', fgColor='ffff00')
workbook.save(path) workbook.save(path)
...@@ -242,7 +241,7 @@ def detect_with_ocr(video_path, book_path, start_time, end_time, state): ...@@ -242,7 +241,7 @@ def detect_with_ocr(video_path, book_path, start_time, end_time, state):
# 输出旁白位置推荐信息到表格 # 输出旁白位置推荐信息到表格
write_excel_xlsx(book_name_xlsx, sheet_name_xlsx, table_content) write_excel_xlsx(book_name_xlsx, sheet_name_xlsx, table_content)
state[0] = 1 state[0] = 1.00
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -54,7 +54,7 @@ def detect_subtitle(frame): ...@@ -54,7 +54,7 @@ def detect_subtitle(frame):
return False return False
def detect_movie(video_path, start, interval): def detect_movie(video_path, start, end, interval):
""" """
使用整部视频进行测试,确定视频是否提供字幕 使用整部视频进行测试,确定视频是否提供字幕
:param video_path: 视频的地址 :param video_path: 视频的地址
...@@ -64,9 +64,8 @@ def detect_movie(video_path, start, interval): ...@@ -64,9 +64,8 @@ def detect_movie(video_path, start, interval):
""" """
video = cv2.VideoCapture(video_path) video = cv2.VideoCapture(video_path)
fps = np.ceil(video.get(cv2.CAP_PROP_FPS)) fps = np.ceil(video.get(cv2.CAP_PROP_FPS))
end_time = video.get(cv2.CAP_PROP_FRAME_COUNT) / fps if start + interval * 3 > end:
if start + interval * 3 > end_time: interval = int((end - start) / 3)
interval = int((end_time - start) / 3)
start = start * fps start = start * fps
interval = interval * fps interval = interval * fps
random_number = 50 random_number = 50
......
...@@ -30,7 +30,7 @@ def detect(video_path, start_time, end_time, book_path, state, subtitle=None): ...@@ -30,7 +30,7 @@ def detect(video_path, start_time, end_time, book_path, state, subtitle=None):
# 根据用户的选择来确定电影是否有字幕,如果“未知”,则自动检测 # 根据用户的选择来确定电影是否有字幕,如果“未知”,则自动检测
if subtitle == 0: if subtitle == 0:
has_subtitle = detect_movie(video_path, start_time, 180) has_subtitle = detect_movie(video_path, start_time, end_time, 180)
elif subtitle == 1: elif subtitle == 1:
has_subtitle = True has_subtitle = True
else: else:
......
...@@ -8,6 +8,7 @@ from azure.cognitiveservices.speech.audio import AudioOutputConfig ...@@ -8,6 +8,7 @@ from azure.cognitiveservices.speech.audio import AudioOutputConfig
import openpyxl import openpyxl
tmp_file = 'tmp.wav' tmp_file = 'tmp.wav'
adjusted_wav_path = "adjusted.wav"
normal_speed = 4 normal_speed = 4
normal_interval = 0.1 normal_interval = 0.1
...@@ -88,21 +89,31 @@ def get_narratage_text(sheet_content, speed): ...@@ -88,21 +89,31 @@ def get_narratage_text(sheet_content, speed):
start_time = sheet_content['起始时间'] start_time = sheet_content['起始时间']
end_time = sheet_content['终止时间'] end_time = sheet_content['终止时间']
narratage_start_time = [] narratage_start_time = []
narratage_end_time = []
narratage_text = [] narratage_text = []
for i, text in enumerate(narratage): for i, text in enumerate(narratage):
print(i, text)
if text is not None: if text is not None:
if text == '翻译': if text == '翻译':
narratage_text.append(subtitle[i]) narratage_text.append(subtitle[i])
narratage_start_time.append(float(start_time[i])) narratage_start_time.append(float(start_time[i]))
narratage_end_time.append(float(end_time[i]))
else: else:
# 如果旁白中有换行符,即分为n段,则按照换行符进行分割,并间隔0.5s # 如果旁白中有换行符,即分为n段,则按照换行符进行分割,并间隔0.5s
text_split = text.split('\n') text_split = text.split('\n')
cur_start = float(end_time[i - 1]) + 0.1 if i > 0 else 0 if subtitle[i] is None:
cur_start = float(end_time[i - 1]) + 0.1 if i > 0 else 0
cur_end = float(start_time[i + 1])
else:
cur_start = float(start_time[i])
cur_end = float(end_time[i])
for x in text_split: for x in text_split:
cur_end = max(cur_end, cur_start + (len(x) / normal_speed + normal_interval) / speed)
narratage_text.append(x) narratage_text.append(x)
narratage_start_time.append(cur_start) narratage_start_time.append(cur_start)
narratage_end_time.append(cur_end)
cur_start = cur_start + (len(x) / normal_speed + normal_interval) / speed cur_start = cur_start + (len(x) / normal_speed + normal_interval) / speed
return narratage_text, narratage_start_time return narratage_text, narratage_start_time, narratage_end_time
def second_to_str(seconds): def second_to_str(seconds):
...@@ -135,13 +146,44 @@ def export_caption(sheet_content, caption_file): ...@@ -135,13 +146,44 @@ def export_caption(sheet_content, caption_file):
f.write(x + "\n\n") f.write(x + "\n\n")
def ss_and_export(sheet_path, output_dir, speed, caption_file, state): def adjust_volume(origin, start_timestamp, end_timestamp):
global adjusted_wav_path
adjusted_wav_path = os.path.join(os.path.dirname(origin), adjusted_wav_path)
n = len(start_timestamp)
command_line = "ffmpeg -i {} -af \"".format(origin)
for i in range(n):
command_line += "volume=enable='between(t,{},{})':volume=0.3".format(start_timestamp[i], end_timestamp[i])
if i != n - 1:
command_line += ","
command_line += "\" -y {}".format(adjusted_wav_path)
os.system(command_line)
def mix_speech(origin, narratage_paths, start_timestamps):
composed_wav_path = os.path.join(os.path.dirname(origin), "composed.wav")
print(composed_wav_path)
command_line = 'ffmpeg -i {}'.format(origin)
for i, narratage_path in enumerate(narratage_paths):
command_line += " -i {}".format(narratage_path)
command_line += " -filter_complex \""
for i, start_timestamp in enumerate(start_timestamps):
command_line += "[{}]adelay=delays={}:all=1[aud{}];".format(i + 1, int(start_timestamp * 1000), i + 1)
command_line += "[0]"
command_line = command_line + "".join(["[aud{}]".format(str(i + 1)) for i in range(len(start_timestamps))])
command_line += "amix=inputs={}\" -vsync 2 -y {}".format(len(start_timestamps) + 1, composed_wav_path)
os.system(command_line)
print(command_line)
def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state=None):
""" """
生成语音并导出字幕 生成语音并导出字幕
:param video_path: 原视频的位置
:param sheet_path: 校对过的旁白脚本表格文件 :param sheet_path: 校对过的旁白脚本表格文件
:param output_dir: 存放音频文件的 :param output_dir: 存放音频文件的文件夹
:param speed: :param speed: 旁白语速
:param caption_file: :param caption_file: 输出的字幕文件存放位置
:param state: 用于与界面中的进度条状态进行通讯
:return: :return:
""" """
...@@ -156,34 +198,58 @@ def ss_and_export(sheet_path, output_dir, speed, caption_file, state): ...@@ -156,34 +198,58 @@ def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
# 读取表格,并获取旁白及对应插入位置 # 读取表格,并获取旁白及对应插入位置
sheet_content = read_sheet(book_path) sheet_content = read_sheet(book_path)
narratages, start_timepoint = get_narratage_text(sheet_content, speed) narratages, start_timestamp, end_timestamp = get_narratage_text(sheet_content, speed)
export_caption(sheet_content, caption_file) export_caption(sheet_content, caption_file)
print("已导出旁白文件") print("已导出字幕文件")
narratage_paths = []
# 生成旁白解说语音 # 生成旁白解说语音
for i, text in enumerate(narratages): for i, text in enumerate(narratages):
wav_path = os.path.join(root_path, '%.2f.wav' % start_timepoint[i]) wav_path = os.path.join(root_path, '%.2f.wav' % start_timestamp[i])
narratage_paths.append(wav_path)
speech_synthesis(text, wav_path, speed) speech_synthesis(text, wav_path, speed)
time.sleep(1) time.sleep(1)
print("目前正在处理{}".format(wav_path)) print("目前正在处理{}".format(wav_path))
state[0] = float((i + 1) / len(narratages)) if state is not None:
state[0] = float((i + 1) / len(narratages)) * 0.97
# 合成总音频,并入原视频音频中 # 合成总音频,并入原视频音频中
# 提取原音频
# 删除临时语音文件 from split_wav import extract_audio
origin_wav_path = extract_audio(video_path, output_dir, 0, -1)
# 调整原音频中旁白对应位置的音量
adjust_volume(origin_wav_path, start_timestamp, end_timestamp)
# 将旁白混入原音频
mix_speech(adjusted_wav_path, narratage_paths, start_timestamp)
if state is not None:
state[0] = 1.00
# 删除临时语音文件、提取出来的原视频音频以及调整后的视频音频
if os.path.exists(tmp_file): if os.path.exists(tmp_file):
time.sleep(1) time.sleep(1)
os.remove(tmp_file) os.remove(tmp_file)
os.remove(origin_wav_path)
os.remove(adjusted_wav_path)
if __name__ == '__main__': if __name__ == '__main__':
# 定义参数 # 定义参数
parser = argparse.ArgumentParser(description='Speech Synthesis guideness') # parser = argparse.ArgumentParser(description='Speech Synthesis guideness')
parser.add_argument("--output_dir", required=True, type=str, help="音频输出位置路径") # parser.add_argument("--video_path", required=True, type=str, help="原视频位置")
parser.add_argument("--sheet_path", required=True, type=str, help='旁白解说表格存储路径') # parser.add_argument("--output_dir", required=True, type=str, help="音频输出位置路径")
parser.add_argument("--caption_file", required=True, type=str, help="输出的字幕文件存储路径") # parser.add_argument("--sheet_path", required=True, type=str, help='旁白解说表格存储路径')
parser.add_argument("--speed", type=float, default=1.0, help="设置语速,默认为1.0") # parser.add_argument("--caption_file", required=True, type=str, help="输出的字幕文件存储路径")
args = parser.parse_args() # parser.add_argument("--speed", type=float, default=1.0, help="设置语速,默认为1.0")
# args = parser.parse_args()
# video_path, sheet_path, output_dir, speed, caption_file = args.video_path,\
# args.sheet_path, args.output_dir, args.speed, args.caption_file
video_path = 'D:/heelo/hysxm_3.mp4'
sheet_path = 'D:/heelo/hysxm_3.xlsx'
output_dir = 'D:/AddCaption/hysxm_3'
speed = 1.25
caption_file = 'D:/AddCaption/hysxm_3/hysxm_3.srt'
# 主函数执行 # 主函数执行
ss_and_export(args.output_dir, args.sheet_path, args.speed, args.caption_file) ss_and_export(video_path=video_path, sheet_path=sheet_path, output_dir=output_dir, speed=speed,
caption_file=caption_file)
...@@ -52,7 +52,7 @@ def split_audio(): ...@@ -52,7 +52,7 @@ def split_audio():
# 从音频中提取人声 # 从音频中提取人声
def extrac_speech(): def extract_speech():
from spleeter.audio.adapter import AudioAdapter from spleeter.audio.adapter import AudioAdapter
from spleeter.separator import Separator from spleeter.separator import Separator
separator = Separator('spleeter:2stems', multiprocess=False) separator = Separator('spleeter:2stems', multiprocess=False)
......
...@@ -84,12 +84,12 @@ def start_process(p, p_label, state, intervals=100): ...@@ -84,12 +84,12 @@ def start_process(p, p_label, state, intervals=100):
while True: while True:
# 当前进度不为None且与上一进度不一样且当前进度比进度条的状态要多时,对进度条状态进行更新 # 当前进度不为None且与上一进度不一样且当前进度比进度条的状态要多时,对进度条状态进行更新
if state[0] and state[0] != lastState and state[0] * 100 > p['value']: if state[0] and state[0] != lastState and state[0] * 100 > p['value']:
p['value'] = int(state[0] * 100) p['value'] = round(state[0] * 100, 2)
lastState = state[0] lastState = state[0]
p_label['text'] = str(int(p['value'])) + "%" p_label['text'] = str(round(p['value'], 2)) + "%"
if p['value'] == 100: if p['value'] == 100.0:
p.stop() p.stop()
p['value'] = 100 p['value'] = 100.0
break break
print("进度条停止") print("进度条停止")
...@@ -184,9 +184,9 @@ def set_caption_file(): ...@@ -184,9 +184,9 @@ def set_caption_file():
设置字幕文件存储路径(使用存放音频的文件夹作为默认文件夹、旁白表格名作为默认字幕名) 设置字幕文件存储路径(使用存放音频的文件夹作为默认文件夹、旁白表格名作为默认字幕名)
:return: :return:
""" """
defaultName = os.path.basename(narratagePath.get()).split('.')[0] + ".srt" defaultName = os.path.basename(videoPath.get()).split('.')[0] + ".srt"
defaultDir = audioDir.get() defaultDir = audioDir.get()
caption_path = filedialog.asksaveasfilename(title=u'保存文件至', caption_path = filedialog.asksaveasfilename(title=u'保存字幕文件至',
initialdir=defaultDir, initialdir=defaultDir,
initialfile=defaultName, initialfile=defaultName,
filetype=[('字幕文件', ".srt")]) filetype=[('字幕文件', ".srt")])
...@@ -206,6 +206,7 @@ def start_synthesis(): ...@@ -206,6 +206,7 @@ def start_synthesis():
开始合成语音 开始合成语音
:return: :return:
""" """
video_path = videoPath.get()
audio_dir = audioDir.get() audio_dir = audioDir.get()
sheet_path = narratagePath.get() sheet_path = narratagePath.get()
speed = float(audio_speed.get().split('(')[0]) speed = float(audio_speed.get().split('(')[0])
...@@ -236,7 +237,7 @@ def start_synthesis(): ...@@ -236,7 +237,7 @@ def start_synthesis():
threads = [ threads = [
threading.Thread(target=start_process, args=(progressbar_2, progress_2, state, 100000), name="startProgress2"), threading.Thread(target=start_process, args=(progressbar_2, progress_2, state, 100000), name="startProgress2"),
threading.Thread(target=ss_and_export, threading.Thread(target=ss_and_export,
args=(sheet_path, audio_dir, speed, caption_path, state), name="ssAndExport")] args=(video_path, sheet_path, audio_dir, speed, caption_path, state), name="ssAndExport")]
for t in threads: for t in threads:
t.start() t.start()
for t in threads: for t in threads:
...@@ -380,12 +381,20 @@ stopDetection.config(state=tk.DISABLED) ...@@ -380,12 +381,20 @@ stopDetection.config(state=tk.DISABLED)
""" """
""" """
语音相关设置,包含以下内容: 语音相关设置,包含以下内容:
- 旁白脚本表格|表格路径|上传文件按钮
- 原视频|视频路径|上传文件按钮 - 原视频|视频路径|上传文件按钮
- 旁白脚本表格|表格路径|上传文件按钮
- 旁白语速选择 - 旁白语速选择
""" """
audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ") audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ")
audio_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4) audio_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.3)
video_label = ttk.Label(audio_info, text="原视频")
video_label.grid(column=0, row=0)
videoPath = tk.StringVar()
videoPath_input = ttk.Entry(audio_info, width=30, textvariable=videoPath)
videoPath_input.grid(column=1, row=0)
upload_button_3 = ttk.Button(audio_info, text="上传文件", command=confirm_video_path)
upload_button_3.grid(column=2, row=0)
narratage_label = ttk.Label(audio_info, text="旁白脚本表格") narratage_label = ttk.Label(audio_info, text="旁白脚本表格")
narratage_label.grid(column=0, row=1) narratage_label.grid(column=0, row=1)
...@@ -404,14 +413,6 @@ speedChosen['values'] = ( ...@@ -404,14 +413,6 @@ speedChosen['values'] = (
speedChosen.current(0) speedChosen.current(0)
speedChosen.grid(column=1, row=2, sticky="W") speedChosen.grid(column=1, row=2, sticky="W")
video_label = ttk.Label(audio_info, text="原视频")
video_label.grid(column=0, row=0)
videoPath = tk.StringVar()
videoPath_input = ttk.Entry(audio_info, width=30, textvariable=videoPath)
videoPath_input.grid(column=1, row=0)
upload_button_3 = ttk.Button(audio_info, text="上传文件", command=confirm_video_path)
upload_button_3.grid(column=2, row=0)
""" """
语音合成步骤,包含以下内容: 语音合成步骤,包含以下内容:
- 输出音频存放于|路径文本框|打开文件夹 - 输出音频存放于|路径文本框|打开文件夹
...@@ -420,7 +421,7 @@ upload_button_3.grid(column=2, row=0) ...@@ -420,7 +421,7 @@ upload_button_3.grid(column=2, row=0)
- 停止合成按钮 - 停止合成按钮
""" """
synthesis_command = ttk.LabelFrame(tab2, text=" 语音合成步骤 ") synthesis_command = ttk.LabelFrame(tab2, text=" 语音合成步骤 ")
synthesis_command.place(relx=0.05, rely=0.55, relwidth=0.9, relheight=0.4) synthesis_command.place(relx=0.05, rely=0.45, relwidth=0.9, relheight=0.5)
audioDir_label = ttk.Label(synthesis_command, text="输出音频存放于") audioDir_label = ttk.Label(synthesis_command, text="输出音频存放于")
audioDir_label.grid(column=0, row=0) audioDir_label.grid(column=0, row=0)
...@@ -430,7 +431,7 @@ audioDir_input.grid(column=1, row=0) ...@@ -430,7 +431,7 @@ audioDir_input.grid(column=1, row=0)
save_button_2 = ttk.Button(synthesis_command, text="打开文件夹", command=find_save_dir) save_button_2 = ttk.Button(synthesis_command, text="打开文件夹", command=find_save_dir)
save_button_2.grid(column=2, row=0) save_button_2.grid(column=2, row=0)
caption_label = ttk.Label(synthesis_command, text="输出字幕文件") caption_label = ttk.Label(synthesis_command, text="输出字幕文件")
caption_label.grid(column=0, row=1) caption_label.grid(column=0, row=1)
captionPath = tk.StringVar() captionPath = tk.StringVar()
captionPath_input = ttk.Entry(synthesis_command, width=30, textvariable=captionPath) captionPath_input = ttk.Entry(synthesis_command, width=30, textvariable=captionPath)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment