1. [add] 原音频中添加旁白音频，生成混合音频;

2. [modified] 将进度条状态调整为小数点后2位; 3. [modified] 检测字幕有无情况的检测范围修正为[实际开始, 实际结束]

1. [add] 原音频中添加旁白音频，生成混合音频;
fad7c317 · 翟艳秋（20软） · 7d7ed791 · fad7c317 · fad7c317 · fad7c317
Commit fad7c317 authored Jan 25, 2022 by 翟艳秋（20软）
6 changed files
--- a/detect_with_ocr.py
+++ b/detect_with_ocr.py
@@ -222,7 +222,6 @@ def write_excel_xlsx(path, sheet_name, value):
    for i in range(0, index):
        for j in range(0, len(value[i])):
            sheet.cell(row=i + 1, column=j + 1, value=str(value[i][j]))
-            print(value[i][j])
            if value[i][j] == '' or '插入旁白' in str(value[i][j]) or value[i][j] == '翻译':
                sheet.cell(row=i + 1, column=j + 1).fill = PatternFill(fill_type='solid', fgColor='ffff00')
    workbook.save(path)
@@ -242,7 +241,7 @@ def detect_with_ocr(video_path, book_path, start_time, end_time, state):

    # 输出旁白位置推荐信息到表格
    write_excel_xlsx(book_name_xlsx, sheet_name_xlsx, table_content)
-    state[0] = 1
+    state[0] = 1.00


 if __name__ == '__main__':

--- a/judge_subtitle.py
+++ b/judge_subtitle.py
@@ -54,7 +54,7 @@ def detect_subtitle(frame):
    return False


-def detect_movie(video_path, start, interval):
+def detect_movie(video_path, start, end, interval):
    """
    使用整部视频进行测试，确定视频是否提供字幕
    :param video_path: 视频的地址
@@ -64,9 +64,8 @@ def detect_movie(video_path, start, interval):
    """
    video = cv2.VideoCapture(video_path)
    fps = np.ceil(video.get(cv2.CAP_PROP_FPS))
-    end_time = video.get(cv2.CAP_PROP_FRAME_COUNT) / fps
-    if start + interval * 3 > end_time:
-        interval = int((end_time - start) / 3)
+    if start + interval * 3 > end:
+        interval = int((end - start) / 3)
    start = start * fps
    interval = interval * fps
    random_number = 50

--- a/narratage_detection.py
+++ b/narratage_detection.py
@@ -30,7 +30,7 @@ def detect(video_path, start_time, end_time, book_path, state, subtitle=None):

    # 根据用户的选择来确定电影是否有字幕，如果“未知”，则自动检测
    if subtitle == 0:
-        has_subtitle = detect_movie(video_path, start_time, 180)
+        has_subtitle = detect_movie(video_path, start_time, end_time, 180)
    elif subtitle == 1:
        has_subtitle = True
    else:

--- a/speech_synthesis.py
+++ b/speech_synthesis.py
@@ -8,6 +8,7 @@ from azure.cognitiveservices.speech.audio import AudioOutputConfig
 import openpyxl

 tmp_file = 'tmp.wav'
+adjusted_wav_path = "adjusted.wav"

 normal_speed = 4
 normal_interval = 0.1
@@ -88,21 +89,31 @@ def get_narratage_text(sheet_content, speed):
    start_time = sheet_content['起始时间']
    end_time = sheet_content['终止时间']
    narratage_start_time = []
+    narratage_end_time = []
    narratage_text = []
    for i, text in enumerate(narratage):
+        print(i, text)
        if text is not None:
            if text == '翻译':
                narratage_text.append(subtitle[i])
                narratage_start_time.append(float(start_time[i]))
+                narratage_end_time.append(float(end_time[i]))
            else:
                # 如果旁白中有换行符，即分为n段，则按照换行符进行分割，并间隔0.5s
                text_split = text.split('\n')
-                cur_start = float(end_time[i - 1]) + 0.1 if i > 0 else 0
+                if subtitle[i] is None:
+                    cur_start = float(end_time[i - 1]) + 0.1 if i > 0 else 0
+                    cur_end = float(start_time[i + 1])
+                else:
+                    cur_start = float(start_time[i])
+                    cur_end = float(end_time[i])
                for x in text_split:
+                    cur_end = max(cur_end, cur_start + (len(x) / normal_speed + normal_interval) / speed)
                    narratage_text.append(x)
                    narratage_start_time.append(cur_start)
+                    narratage_end_time.append(cur_end)
                    cur_start = cur_start + (len(x) / normal_speed + normal_interval) / speed
-    return narratage_text, narratage_start_time
+    return narratage_text, narratage_start_time, narratage_end_time


 def second_to_str(seconds):
@@ -135,13 +146,44 @@ def export_caption(sheet_content, caption_file):
                f.write(x + "\n\n")


-def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
+def adjust_volume(origin, start_timestamp, end_timestamp):
+    global adjusted_wav_path
+    adjusted_wav_path = os.path.join(os.path.dirname(origin), adjusted_wav_path)
+    n = len(start_timestamp)
+    command_line = "ffmpeg -i {} -af \"".format(origin)
+    for i in range(n):
+        command_line += "volume=enable='between(t,{},{})':volume=0.3".format(start_timestamp[i], end_timestamp[i])
+        if i != n - 1:
+            command_line += ","
+    command_line += "\" -y {}".format(adjusted_wav_path)
+    os.system(command_line)
+
+
+def mix_speech(origin, narratage_paths, start_timestamps):
+    composed_wav_path = os.path.join(os.path.dirname(origin), "composed.wav")
+    print(composed_wav_path)
+    command_line = 'ffmpeg -i {}'.format(origin)
+    for i, narratage_path in enumerate(narratage_paths):
+        command_line += " -i {}".format(narratage_path)
+    command_line += " -filter_complex \""
+    for i, start_timestamp in enumerate(start_timestamps):
+        command_line += "[{}]adelay=delays={}:all=1[aud{}];".format(i + 1, int(start_timestamp * 1000), i + 1)
+    command_line += "[0]"
+    command_line = command_line + "".join(["[aud{}]".format(str(i + 1)) for i in range(len(start_timestamps))])
+    command_line += "amix=inputs={}\" -vsync 2 -y {}".format(len(start_timestamps) + 1, composed_wav_path)
+    os.system(command_line)
+    print(command_line)
+
+
+def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state=None):
    """
    生成语音并导出字幕
+    :param video_path: 原视频的位置
    :param sheet_path: 校对过的旁白脚本表格文件
-    :param output_dir: 存放音频文件的
-    :param speed:
-    :param caption_file:
+    :param output_dir: 存放音频文件的文件夹
+    :param speed: 旁白语速
+    :param caption_file: 输出的字幕文件存放位置
+    :param state: 用于与界面中的进度条状态进行通讯
    :return:
    """

@@ -156,34 +198,58 @@ def ss_and_export(sheet_path, output_dir, speed, caption_file, state):

    # 读取表格，并获取旁白及对应插入位置
    sheet_content = read_sheet(book_path)
-    narratages, start_timepoint = get_narratage_text(sheet_content, speed)
+    narratages, start_timestamp, end_timestamp = get_narratage_text(sheet_content, speed)
    export_caption(sheet_content, caption_file)
-    print("已导出旁白文件")
+    print("已导出字幕文件")

+    narratage_paths = []
    # 生成旁白解说语音
    for i, text in enumerate(narratages):
-        wav_path = os.path.join(root_path, '%.2f.wav' % start_timepoint[i])
+        wav_path = os.path.join(root_path, '%.2f.wav' % start_timestamp[i])
+        narratage_paths.append(wav_path)
        speech_synthesis(text, wav_path, speed)
        time.sleep(1)
        print("目前正在处理{}".format(wav_path))
-        state[0] = float((i + 1) / len(narratages))
+        if state is not None:
+            state[0] = float((i + 1) / len(narratages)) * 0.97

    # 合成总音频，并入原视频音频中
-
-    # 删除临时语音文件
+    # 提取原音频
+    from split_wav import extract_audio
+    origin_wav_path = extract_audio(video_path, output_dir, 0, -1)
+    # 调整原音频中旁白对应位置的音量
+    adjust_volume(origin_wav_path, start_timestamp, end_timestamp)
+    # 将旁白混入原音频
+    mix_speech(adjusted_wav_path, narratage_paths, start_timestamp)
+    if state is not None:
+        state[0] = 1.00
+
+    # 删除临时语音文件、提取出来的原视频音频以及调整后的视频音频
    if os.path.exists(tmp_file):
        time.sleep(1)
        os.remove(tmp_file)
+    os.remove(origin_wav_path)
+    os.remove(adjusted_wav_path)


 if __name__ == '__main__':
    # 定义参数
-    parser = argparse.ArgumentParser(description='Speech Synthesis guideness')
-    parser.add_argument("--output_dir", required=True, type=str, help="音频输出位置路径")
-    parser.add_argument("--sheet_path", required=True, type=str, help='旁白解说表格存储路径')
-    parser.add_argument("--caption_file", required=True, type=str, help="输出的字幕文件存储路径")
-    parser.add_argument("--speed", type=float, default=1.0, help="设置语速，默认为1.0")
-    args = parser.parse_args()
+    # parser = argparse.ArgumentParser(description='Speech Synthesis guideness')
+    # parser.add_argument("--video_path", required=True, type=str, help="原视频位置")
+    # parser.add_argument("--output_dir", required=True, type=str, help="音频输出位置路径")
+    # parser.add_argument("--sheet_path", required=True, type=str, help='旁白解说表格存储路径')
+    # parser.add_argument("--caption_file", required=True, type=str, help="输出的字幕文件存储路径")
+    # parser.add_argument("--speed", type=float, default=1.0, help="设置语速，默认为1.0")
+    # args = parser.parse_args()
+    # video_path, sheet_path, output_dir, speed, caption_file = args.video_path,\
+    # args.sheet_path, args.output_dir, args.speed, args.caption_file
+
+    video_path = 'D:/heelo/hysxm_3.mp4'
+    sheet_path = 'D:/heelo/hysxm_3.xlsx'
+    output_dir = 'D:/AddCaption/hysxm_3'
+    speed = 1.25
+    caption_file = 'D:/AddCaption/hysxm_3/hysxm_3.srt'

    # 主函数执行
-    ss_and_export(args.output_dir, args.sheet_path, args.speed, args.caption_file)
+    ss_and_export(video_path=video_path, sheet_path=sheet_path, output_dir=output_dir, speed=speed,
+                  caption_file=caption_file)
--- a/split_wav.py
+++ b/split_wav.py
@@ -52,7 +52,7 @@ def split_audio():


 # 从音频中提取人声
-def extrac_speech():
+def extract_speech():
    from spleeter.audio.adapter import AudioAdapter
    from spleeter.separator import Separator
    separator = Separator('spleeter:2stems', multiprocess=False)

--- a/try_with_gui.py
+++ b/try_with_gui.py
@@ -84,12 +84,12 @@ def start_process(p, p_label, state, intervals=100):
    while True:
        # 当前进度不为None且与上一进度不一样且当前进度比进度条的状态要多时，对进度条状态进行更新
        if state[0] and state[0] != lastState and state[0] * 100 > p['value']:
-            p['value'] = int(state[0] * 100)
+            p['value'] = round(state[0] * 100, 2)
            lastState = state[0]
-        p_label['text'] = str(int(p['value'])) + "%"
-        if p['value'] == 100:
+        p_label['text'] = str(round(p['value'], 2)) + "%"
+        if p['value'] == 100.0:
            p.stop()
-            p['value'] = 100
+            p['value'] = 100.0
            break
    print("进度条停止")

@@ -184,9 +184,9 @@ def set_caption_file():
    设置字幕文件存储路径（使用存放音频的文件夹作为默认文件夹、旁白表格名作为默认字幕名）
    :return:
    """
-    defaultName = os.path.basename(narratagePath.get()).split('.')[0] + ".srt"
+    defaultName = os.path.basename(videoPath.get()).split('.')[0] + ".srt"
    defaultDir = audioDir.get()
-    caption_path = filedialog.asksaveasfilename(title=u'保存文件至',
+    caption_path = filedialog.asksaveasfilename(title=u'保存字幕文件至',
                                                initialdir=defaultDir,
                                                initialfile=defaultName,
                                                filetype=[('字幕文件', ".srt")])
@@ -206,6 +206,7 @@ def start_synthesis():
    开始合成语音
    :return:
    """
+    video_path = videoPath.get()
    audio_dir = audioDir.get()
    sheet_path = narratagePath.get()
    speed = float(audio_speed.get().split('(')[0])
@@ -236,7 +237,7 @@ def start_synthesis():
    threads = [
        threading.Thread(target=start_process, args=(progressbar_2, progress_2, state, 100000), name="startProgress2"),
        threading.Thread(target=ss_and_export,
-                         args=(sheet_path, audio_dir, speed, caption_path, state), name="ssAndExport")]
+                         args=(video_path, sheet_path, audio_dir, speed, caption_path, state), name="ssAndExport")]
    for t in threads:
        t.start()
    for t in threads:
@@ -380,12 +381,20 @@ stopDetection.config(state=tk.DISABLED)
 """
 """
    语音相关设置，包含以下内容：
-    - 旁白脚本表格|表格路径|上传文件按钮
    - 原视频|视频路径|上传文件按钮
+    - 旁白脚本表格|表格路径|上传文件按钮
    - 旁白语速选择
 """
 audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ")
-audio_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.4)
+audio_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.3)
+
+video_label = ttk.Label(audio_info, text="原视频")
+video_label.grid(column=0, row=0)
+videoPath = tk.StringVar()
+videoPath_input = ttk.Entry(audio_info, width=30, textvariable=videoPath)
+videoPath_input.grid(column=1, row=0)
+upload_button_3 = ttk.Button(audio_info, text="上传文件", command=confirm_video_path)
+upload_button_3.grid(column=2, row=0)

 narratage_label = ttk.Label(audio_info, text="旁白脚本表格")
 narratage_label.grid(column=0, row=1)
@@ -404,14 +413,6 @@ speedChosen['values'] = (
 speedChosen.current(0)
 speedChosen.grid(column=1, row=2, sticky="W")

-video_label = ttk.Label(audio_info, text="原视频")
-video_label.grid(column=0, row=0)
-videoPath = tk.StringVar()
-videoPath_input = ttk.Entry(audio_info, width=30, textvariable=videoPath)
-videoPath_input.grid(column=1, row=0)
-upload_button_3 = ttk.Button(audio_info, text="上传文件", command=confirm_video_path)
-upload_button_3.grid(column=2, row=0)
-
 """
    语音合成步骤，包含以下内容：
    - 输出音频存放于|路径文本框|打开文件夹
@@ -420,7 +421,7 @@ upload_button_3.grid(column=2, row=0)
    - 停止合成按钮
 """
 synthesis_command = ttk.LabelFrame(tab2, text=" 语音合成步骤 ")
-synthesis_command.place(relx=0.05, rely=0.55, relwidth=0.9, relheight=0.4)
+synthesis_command.place(relx=0.05, rely=0.45, relwidth=0.9, relheight=0.5)

 audioDir_label = ttk.Label(synthesis_command, text="输出音频存放于")
 audioDir_label.grid(column=0, row=0)
@@ -430,7 +431,7 @@ audioDir_input.grid(column=1, row=0)
 save_button_2 = ttk.Button(synthesis_command, text="打开文件夹", command=find_save_dir)
 save_button_2.grid(column=2, row=0)

-caption_label = ttk.Label(synthesis_command, text="输出字幕文件")
+caption_label = ttk.Label(synthesis_command, text="输出字幕文件于")
 caption_label.grid(column=0, row=1)
 captionPath = tk.StringVar()
 captionPath_input = ttk.Entry(synthesis_command, width=30, textvariable=captionPath)