add ffmpeg util for pb audio

d8721003 · cxy · cb9afe6d · d8721003 · d8721003 · d8721003
Commit d8721003 authored Aug 12, 2023 by cxy
7 changed files
--- a/deploy.bat
+++ b/deploy.bat
@@ -16,6 +16,7 @@ GOTO END

 :END

+c:
 cd C:\LavFilters\
 call install_audio.bat
 call install_splitter.bat

--- a/detect_with_ocr.py
+++ b/detect_with_ocr.py
@@ -397,7 +397,11 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
    aside = ""
    i = len(mainWindow.projectContext.all_elements)
    if element_type == "字幕":
-        mainWindow.projectContext.subtitle_list.append(Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed))
+        new_element = Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed)
+        new_element.print_self()
+        if mainWindow.last_aside_index != None and mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec == "" and new_element.ed_time_sec != "":
+            mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec = new_element.st_time_sec
+        mainWindow.projectContext.subtitle_list.append(new_element)
        mainWindow.projectContext.all_elements.append(mainWindow.projectContext.subtitle_list[-1])
    else:
        if i == 0:
@@ -407,9 +411,14 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):

        # 因为暂时没有用到ed_time_sec，所以直接赋值空吧
        ed_time_sec = ""
-
-        mainWindow.projectContext.aside_list.append(Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed))
+        print(">>>>>>>>>>>>>>>>>append aside")
+        new_element = Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed)
+        new_element.print_self()
+        if mainWindow.last_aside_index != None and mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec == "" and new_element.ed_time_sec != "":
+            mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec = new_element.st_time_sec
+        mainWindow.projectContext.aside_list.append(new_element)
        mainWindow.projectContext.all_elements.append(mainWindow.projectContext.aside_list[-1])
+        mainWindow.last_aside_index = len(mainWindow.projectContext.all_elements) - 1


 def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time: float, state=None, mainWindow: MainWindow=None):

--- a/ffmpeg_util.py
+++ b/ffmpeg_util.py
+import subprocess
+import os
+
+import subprocess
+import math
+import time
+
+def calculate_audio_volume(file_path):
+    # 使用FFmpeg获取音频分贝值
+    command = [
+        'res/ffmpeg-4.3.1/bin/ffmpeg', '-i', file_path,
+        '-af', 'volumedetect', '-f', 'null', '-'
+    ]
+    print(command)
+    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+    output = result.stderr
+    # 从输出中解析分贝值
+    for line in output.split('\n'):
+        if 'mean_volume' in line:
+            print(line)
+            
+            a = float(line.split(':')[1].strip().split("d")[0].strip())
+            print(a)
+            # mean_volume = float((line.split(':')[1].strip()).split(" ")[0])
+            return a
+
+    return None
+
+# def adjust_audio_volume(input_file, output_file, multiplier):
+#     # 使用FFmpeg将音频音量设置为给定倍数
+#     command = [
+#         'ffmpeg', '-i', input_file,
+#         '-af', f'volume={multiplier}*', output_file
+#     ]
+#     subprocess.run(command)
+
+def adjust_audio_volume(input_file, volume_adjustment):
+    temp_path = "./temp.wav"
+    command = [
+        "res/ffmpeg-4.3.1/bin/ffmpeg",
+        "-i", input_file,
+        "-af", f"volume={volume_adjustment}",
+        "-y",  # 覆盖输出文件，加上此选项
+        temp_path  # 输出文件与输入文件相同
+    ]
+    print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>cg audio" + input_file + str(volume_adjustment))
+    subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    time.sleep(1)
+    os.remove(input_file)
+    os.rename(temp_path, input_file)
+
+
+def cg_wav(first_file_path, second_file_path, rate):
+    try:
+        print(">>>>>>>>>>>>>>>cg_wav")
+        print(first_file_path, second_file_path, str(rate))
+        # 计算第一个文件的音量
+        first_file_volume = calculate_audio_volume(first_file_path)
+
+        if first_file_volume is not None:
+            # 计算第二个文件的音量并计算倍数
+            second_file_volume = calculate_audio_volume(second_file_path)
+
+            if second_file_volume is not None:
+                # volume_multiplier = 10 ** ((first_file_volume - second_file_volume) / 20)  # 转换为幅度倍数
+
+                # # 调整第二个文件的音量
+                # adjusted_output_file = 'path/to/output/adjusted_file.wav'
+                # 计算第二个文件应改为自身的大小的倍数
+
+                volume_difference_db = first_file_volume - second_file_volume
+                volume_difference_linear = 10 ** (volume_difference_db / 20)  # 转换为线性倍数
+                adjusted_multiplier = volume_difference_linear * rate
+                adjust_audio_volume(second_file_path, adjusted_multiplier)
+            else:
+                print("无法计算第二个文件的音量")
+        else:
+            print("无法计算第一个文件的音量")
+    except Exception as e:
+        print(">>>>>>>>>>>>>>cg_wav err")
+        print(e)
+
+
+# cg_wav("C:/Users/Administrator/Desktop/无障碍电影/fuiii0000/output/0.01.wav","C:/Users/Administrator/Desktop/无障碍电影/fuiii0000\output\0.01.wav",0.26)
+# calculate_audio_volume("./0.01.wav")
+# adjust_audio_volume("./0.01.wav",1.335)
+# calculate_audio_volume("./0.01.wav")
+
+# def test(projectContext):
+#     from split_wav import extract_audio
+#     video_path = projectContext.video_path
+#     output_dir = os.path.join(projectContext.project_base_dir, "output")
+#     extract_audio(video_path, output_dir, 0, -1)
+
+# self.projectContext.video_path
+# output_dir = os.path.join(self.projectContext.project_base_dir, "output")
+# files = os.listdir(output_dir):
+#     for i, f in enumerate(files):
+#         fname = '.'.join(f.split('.')[:-1])
+#         try:
+#             st_time = float(fname)
+#             cur_audio, _ = soundfile.read(os.path.join(output_dir, f))
+#             # print(len(cur_audio))
+#             st_index = int(st_time * freq)
+#             audio_len = len(cur_audio)
+#             blank_audio[st_index: st_index + audio_len] = cur_audio
+#             origin_wav[st_index: st_index + audio_len] *= origin_audio_rate
+#             state[0] = float((i + 1) / len(files)) * 0.6 + 0.2
+#         except:
+#             continue
\ No newline at end of file
--- a/main_window.py
+++ b/main_window.py
@@ -40,6 +40,7 @@ import re
 from ding_utils import notify

 from excel_utils import read_xls,read_xlsx
+from ffmpeg_util import adjust_audio_volume


 class MainWindow(QMainWindow, Ui_MainWindow):
@@ -48,6 +49,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):

    def __init__(self, project_path):
        super(MainWindow, self).__init__()
+        self.last_aside_index = None
        self.setupUi(self)
        self.statusbar.showMessage("hello", 5000)
        self.projectContext = ProjectContext()
@@ -190,6 +192,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        self.sld_video.sliderMoved.connect(self.moveSlider)   # 进度条拖拽跳转
        self.sld_video.ClickedValue.connect(self.clickedSlider)  # 进度条点击跳转
        self.sld_audio.valueChanged.connect(self.volumeChange)  # 控制声音播放
+        self.pb_sld_audio.valueChanged.connect(self.volumeChangePb)  # 控制声音播放
        self.kd_slider.valueChanged.connect(self.scale_change_slot)

        """旁白音频预览
@@ -299,7 +302,11 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        self.projectContext.Init(project_path)
        self.update_ui()

-        self.rate = 0
+        self.rate = None
+        self.rate_bottom = None
+        self.origin_rate = 100.00
+        self.pb_rate = 100.00
+        self.pb_cg_rate = 1.00

        # 打印到log文件中
        t = RunThread(funcName=make_print_to_file, args=os.path.join(os.getcwd(), 'log'), name="logging")
@@ -376,9 +383,19 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        """弹出旁白区间检测相关信息填写窗口

        """
+        if self.rate == None:
+            self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
+            return
+        if self.rate_bottom == None:
+            self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
+            return
        self.detect_dialog.init_self()
        self.detect_dialog.show()
    
+    def start_detect_direct(self):
+        self.detect_dialog.init_self()
+        self.detect_dialog.start_detect()
+
    def show_assemble_dialog(self):
        """弹出旁白音频合成相关信息填写窗口

@@ -457,6 +474,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        self.action_operate.setEnabled(True)
        self.action_insert_aside_from_now.setEnabled(True)
        self.insert_aside_from_now_btn.setEnabled(True)
+        # print(">>>>>>>>>>>>>>>start detect")
+        # self.start_detect_direct()

    def up_ocr(self):
        h = self.widget.up(3)
@@ -827,8 +846,27 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        volume = round(position/self.sld_audio.maximum()*100)
        print("vlume %f" % volume)
        self.player.setVolume(volume)
+        self.origin_rate = volume if volume >0 else 1
+        self.pb_cg_rate = self.pb_rate / self.origin_rate
+        print(">>>>>>>>>>cg rate :" + str(self.pb_cg_rate))
        self.lab_audio.setText("音量:" + str(volume) + "%")

+    def volumeChangePb(self, position: int):
+        """根据当前音量条调整音量
+
+        Args:
+            position (int): 当前音量条的选值
+        """        
+        volume = round(position/self.pb_sld_audio.maximum()*100)
+        print("vlume %f" % volume)
+        # self.player.setVolume(volume)
+        self.pb_audio.setText("旁白音量:" + str(volume) + "%")
+        self.pb_rate = volume if volume >0 else 1
+        self.pb_cg_rate = self.pb_rate / self.origin_rate
+        print(">>>>>>>>>>cg rate :" + str(self.pb_cg_rate))
+        print("vlume %f" % self.pb_rate)
+
+
    # postion 取值[0,self.sld_video.maximum()]
    def clickedSlider(self, position: int):
        """点击时间轴时，对应切换视频播放进度
@@ -1695,7 +1733,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        """
        output_dir = os.path.join(self.projectContext.project_base_dir, "output")
        if os.path.exists(output_dir) and len(os.listdir(output_dir)) > 0:
-            self.export.export_slot(self.projectContext.video_path, output_dir)
+            self.export.export_slot(self.projectContext.video_path, output_dir, self.pb_cg_rate)
        else:
            self.prompt_dialog.show_with_msg("暂时无合成音频，请至少生成一条\n 旁白音频后再尝试导出")

@@ -1802,7 +1840,11 @@ class MainWindow(QMainWindow, Ui_MainWindow):
            speed (str): 旁白语速
        """
        # 注意，这里需要用同一对象，不能生成多个Element
+        if end_time is None or end_time == "":
+            print(">>>>>>>>>>>>add line")
+            end_time = self.projectContext.all_elements[int(row)].st_time_sec
        new_element = Element(start_time, end_time, subtitle, suggest, aside, speed)
+        new_element.print_self()
        self.projectContext.all_elements.insert(int(row), new_element)
        self.all_tableWidget_idx = int(row)
        if suggest is not None and "插入旁白，推荐字数为" in suggest:

--- a/main_window_ui.py
+++ b/main_window_ui.py
@@ -178,8 +178,8 @@ class Ui_MainWindow(object):
 "\n"
 "  QSlider::handle:horizontal {\n"
 "    image: url(res/images/slider.svg);\n"
-"    width: 12px;\n"
-"    height: 12px;\n"
+"    width: 18px;\n"
+"    height: 18px;\n"
 "    margin: -24px -12px;\n"
 "  }\n"
 "\n"
@@ -202,9 +202,70 @@ class Ui_MainWindow(object):
        self.sld_audio.setOrientation(QtCore.Qt.Horizontal)
        self.sld_audio.setObjectName("sld_audio")
        self.horizontalLayout_5.addWidget(self.sld_audio)
+
+        self.pb_sld_audio = QtWidgets.QSlider(self.centralwidget)
+        self.pb_sld_audio.setMinimumSize(QtCore.QSize(0, 24))
+        self.pb_sld_audio.setMaximumSize(QtCore.QSize(80, 24))
+        self.pb_sld_audio.setProperty("value", 100)
+        self.pb_sld_audio.setStyleSheet("QSlider:horizontal {\n"
+"    min-height: 24px;\n"
+"    max-height: 24px;\n"
+"  }\n"
+"\n"
+"  QSlider:vertical {\n"
+"    min-width: 24px;\n"
+"    max-width: 24px;\n"
+"  }\n"
+"\n"
+"  QSlider::groove:horizontal {\n"
+"    height: 4px;\n"
+"    background: #393939;\n"
+"    margin: 0 12px;\n"
+"  }\n"
+"\n"
+"  QSlider::groove:vertical {\n"
+"    width: 4px;\n"
+"    background: #393939;\n"
+"    margin: 12px 0;\n"
+"    border-radius: 24px;\n"
+"  }\n"
+"\n"
+"  QSlider::handle:horizontal {\n"
+"    image: url(res/images/slider.svg);\n"
+"    width: 18px;\n"
+"    height: 18px;\n"
+"    margin: -24px -12px;\n"
+"  }\n"
+"\n"
+"  QSlider::handle:vertical {\n"
+"    image: url(res/images/slider.svg);\n"
+"    border-radius: 24px;\n"
+"    width: 12px;\n"
+"    height: 12px;\n"
+"    margin: -12px -24px;\n"
+"  }\n"
+"\n"
+"  QSlider::add-page {\n"
+"  background: #232629;\n"
+"  }\n"
+"\n"
+"  QSlider::sub-page {\n"
+"  background: #ffd740;\n"
+"  }")
+        self.pb_sld_audio.setProperty("value", 99)
+        self.pb_sld_audio.setOrientation(QtCore.Qt.Horizontal)
+        self.pb_sld_audio.setObjectName("sld_audio")
+        
+
        self.lab_audio = QtWidgets.QLabel(self.centralwidget)
        self.lab_audio.setObjectName("lab_audio")
        self.horizontalLayout_5.addWidget(self.lab_audio)
+        self.horizontalLayout_5.addWidget(self.sld_audio)
+
+        self.horizontalLayout_5.addWidget(self.pb_sld_audio)
+        self.pb_audio = QtWidgets.QLabel(self.centralwidget)
+        self.pb_audio.setObjectName("pb_audio")
+        self.horizontalLayout_5.addWidget(self.pb_audio)
        self.horizontalLayout_5.setStretch(0, 2)
        self.horizontalLayout_5.setStretch(1, 1)
        self.horizontalLayout_5.setStretch(3, 1)
@@ -302,6 +363,15 @@ class Ui_MainWindow(object):
        self.sld_video.setTickPosition(QtWidgets.QSlider.TicksAbove)
        self.sld_video.setTickInterval(1)
        self.sld_video.setObjectName("sld_video")
+        self.sld_video_pb = myVideoSlider(self.scrollAreaWidgetContents)
+        self.sld_video_pb.setGeometry(QtCore.QRect(10, 20, 780, 30))
+        self.sld_video_pb.setMinimumSize(QtCore.QSize(770, 0))
+        self.sld_video_pb.setMaximumSize(QtCore.QSize(16777215, 20))
+        self.sld_video_pb.setMaximum(100)
+        self.sld_video_pb.setOrientation(QtCore.Qt.Horizontal)
+        self.sld_video_pb.setTickPosition(QtWidgets.QSlider.TicksAbove)
+        self.sld_video_pb.setTickInterval(1)
+        self.sld_video_pb.setObjectName("sld_video_pb")
        self.scrollArea.setWidget(self.scrollAreaWidgetContents)
        self.scrollArea.setGeometry(QtCore.QRect(0, 0, 820, 42))
        self.zm_slider_layout.addWidget(self.scrollArea)
@@ -460,6 +530,7 @@ class Ui_MainWindow(object):
        MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
        self.label_2.setText(_translate("MainWindow", "00:00/00:00"))
        self.lab_audio.setText(_translate("MainWindow", "音量：100%"))
+        self.pb_audio.setText(_translate("MainWindow", "旁白音量：100%"))
        self.insert_aside_from_now_btn.setText(_translate("MainWindow", "当前位置插入旁白"))
        self.tabWidget.setTabText(self.tabWidget.indexOf(self.all_tab), _translate("MainWindow", "字幕旁白"))
        self.tabWidget.setTabText(self.tabWidget.indexOf(self.zm_tab), _translate("MainWindow", "字幕"))

--- a/render.py
+++ b/render.py
@@ -14,6 +14,7 @@ from PyQt5.QtWidgets import *;

 from management import RunThread
 from speech_synthesis import ffmpeg_path
+from ffmpeg_util import cg_wav

 class ExportProcessor(QWidget):
    show_warning_signal = pyqtSignal(str)
@@ -23,9 +24,9 @@ class ExportProcessor(QWidget):
        self.state = [0]
        self.threads = []

-    def export_slot(self, video_path, output_dir):
+    def export_slot(self, video_path, output_dir, pb_cg_rate):
        t = RunThread(funcName=self.start_export,
-                       args=(video_path, output_dir),
+                       args=(video_path, output_dir, pb_cg_rate),
                       name="export")
        t.setDaemon(True)
        self.threads.append(t)
@@ -37,13 +38,13 @@ class ExportProcessor(QWidget):

        self.export_callback_signal.emit(self.threads, self.state)
    
-    def start_export(self, video_path, output_dir):
-        mixed_audio_path = aggrevate_audios(video_path, output_dir, self.state)
+    def start_export(self, video_path, output_dir, pb_cg_rate):
+        mixed_audio_path = aggrevate_audios(video_path, output_dir, self.state, pb_cg_rate)
        export_video(video_path, mixed_audio_path, output_dir, self.state)


    # 生成一条无声的音频，然后把旁白音频逐个按照时间位置放进去，得到仅含旁白的音频和旁白+原声的音频
-def aggrevate_audios(video_path: str, output_dir: str, state=None):
+def aggrevate_audios(video_path: str, output_dir: str, state=None,pb_cg_rate = 1.00):
    # 这个模块最多只有80%的进度
    if state is None:
        state = [0]
@@ -56,6 +57,17 @@ def aggrevate_audios(video_path: str, output_dir: str, state=None):

    # 将生成的旁白音频放入空白音频中，并将原音频的对应位置音量降低为原来的30%
    files = os.listdir(output_dir)
+
+    audio_path = os.path.join(output_dir, os.path.basename(video_path).split('.')[0] + ".wav")
+    for i, f in enumerate(files):
+        fname = '.'.join(f.split('.')[:-1])
+        try:
+            if fname.find(".") != -1:
+                cg_wav(audio_path, os.path.join(output_dir, f), pb_cg_rate)
+        except Exception as e:
+            print(e)
+            continue
+    
    for i, f in enumerate(files):
        fname = '.'.join(f.split('.')[:-1])
        try:

--- a/res/conf.ini
+++ b/res/conf.ini
-{"video_path": null, "excel_path": null, "detection_info": {"detected": false, "nd_process": 0.0, "last_time": 0.0, "caption_boundings": [], "has_subtitle": true}, "speaker_info": {"speaker_type": "\u6d59\u5927\u5185\u90e8tts", "speaker_id": "test\uff0c\u5973\uff0c\u5e74\u8f7b\u4eba", "speaker_speed": "1.00(4\u5b57/\u79d2)"}}
\ No newline at end of file
+{"video_path": null, "excel_path": null, "detection_info": {"detected": false, "nd_process": 0.0, "last_time": 0.0, "caption_boundings": [], "has_subtitle": true}, "speaker_info": {"speaker_type": "\u79d1\u5927\u8baf\u98de", "speaker_id": "\u6653\u8fb0\uff0c\u5973\uff0c\u5e74\u8f7b\u4eba", "speaker_speed": "1.00(4\u5b57/\u79d2)"}}
\ No newline at end of file