Commit d8721003 authored by cxy's avatar cxy

add ffmpeg util for pb audio

parent cb9afe6d
......@@ -16,6 +16,7 @@ GOTO END
:END
c:
cd C:\LavFilters\
call install_audio.bat
call install_splitter.bat
......
......@@ -397,7 +397,11 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
aside = ""
i = len(mainWindow.projectContext.all_elements)
if element_type == "字幕":
mainWindow.projectContext.subtitle_list.append(Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed))
new_element = Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed)
new_element.print_self()
if mainWindow.last_aside_index != None and mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec == "" and new_element.ed_time_sec != "":
mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec = new_element.st_time_sec
mainWindow.projectContext.subtitle_list.append(new_element)
mainWindow.projectContext.all_elements.append(mainWindow.projectContext.subtitle_list[-1])
else:
if i == 0:
......@@ -407,9 +411,14 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
# 因为暂时没有用到ed_time_sec,所以直接赋值空吧
ed_time_sec = ""
mainWindow.projectContext.aside_list.append(Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed))
print(">>>>>>>>>>>>>>>>>append aside")
new_element = Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed)
new_element.print_self()
if mainWindow.last_aside_index != None and mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec == "" and new_element.ed_time_sec != "":
mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec = new_element.st_time_sec
mainWindow.projectContext.aside_list.append(new_element)
mainWindow.projectContext.all_elements.append(mainWindow.projectContext.aside_list[-1])
mainWindow.last_aside_index = len(mainWindow.projectContext.all_elements) - 1
def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time: float, state=None, mainWindow: MainWindow=None):
......
import subprocess
import os
import subprocess
import math
import time
def calculate_audio_volume(file_path):
# 使用FFmpeg获取音频分贝值
command = [
'res/ffmpeg-4.3.1/bin/ffmpeg', '-i', file_path,
'-af', 'volumedetect', '-f', 'null', '-'
]
print(command)
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
output = result.stderr
# 从输出中解析分贝值
for line in output.split('\n'):
if 'mean_volume' in line:
print(line)
a = float(line.split(':')[1].strip().split("d")[0].strip())
print(a)
# mean_volume = float((line.split(':')[1].strip()).split(" ")[0])
return a
return None
# def adjust_audio_volume(input_file, output_file, multiplier):
# # 使用FFmpeg将音频音量设置为给定倍数
# command = [
# 'ffmpeg', '-i', input_file,
# '-af', f'volume={multiplier}*', output_file
# ]
# subprocess.run(command)
def adjust_audio_volume(input_file, volume_adjustment):
temp_path = "./temp.wav"
command = [
"res/ffmpeg-4.3.1/bin/ffmpeg",
"-i", input_file,
"-af", f"volume={volume_adjustment}",
"-y", # 覆盖输出文件,加上此选项
temp_path # 输出文件与输入文件相同
]
print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>cg audio" + input_file + str(volume_adjustment))
subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
time.sleep(1)
os.remove(input_file)
os.rename(temp_path, input_file)
def cg_wav(first_file_path, second_file_path, rate):
try:
print(">>>>>>>>>>>>>>>cg_wav")
print(first_file_path, second_file_path, str(rate))
# 计算第一个文件的音量
first_file_volume = calculate_audio_volume(first_file_path)
if first_file_volume is not None:
# 计算第二个文件的音量并计算倍数
second_file_volume = calculate_audio_volume(second_file_path)
if second_file_volume is not None:
# volume_multiplier = 10 ** ((first_file_volume - second_file_volume) / 20) # 转换为幅度倍数
# # 调整第二个文件的音量
# adjusted_output_file = 'path/to/output/adjusted_file.wav'
# 计算第二个文件应改为自身的大小的倍数
volume_difference_db = first_file_volume - second_file_volume
volume_difference_linear = 10 ** (volume_difference_db / 20) # 转换为线性倍数
adjusted_multiplier = volume_difference_linear * rate
adjust_audio_volume(second_file_path, adjusted_multiplier)
else:
print("无法计算第二个文件的音量")
else:
print("无法计算第一个文件的音量")
except Exception as e:
print(">>>>>>>>>>>>>>cg_wav err")
print(e)
# cg_wav("C:/Users/Administrator/Desktop/无障碍电影/fuiii0000/output/0.01.wav","C:/Users/Administrator/Desktop/无障碍电影/fuiii0000\output\0.01.wav",0.26)
# calculate_audio_volume("./0.01.wav")
# adjust_audio_volume("./0.01.wav",1.335)
# calculate_audio_volume("./0.01.wav")
# def test(projectContext):
# from split_wav import extract_audio
# video_path = projectContext.video_path
# output_dir = os.path.join(projectContext.project_base_dir, "output")
# extract_audio(video_path, output_dir, 0, -1)
# self.projectContext.video_path
# output_dir = os.path.join(self.projectContext.project_base_dir, "output")
# files = os.listdir(output_dir):
# for i, f in enumerate(files):
# fname = '.'.join(f.split('.')[:-1])
# try:
# st_time = float(fname)
# cur_audio, _ = soundfile.read(os.path.join(output_dir, f))
# # print(len(cur_audio))
# st_index = int(st_time * freq)
# audio_len = len(cur_audio)
# blank_audio[st_index: st_index + audio_len] = cur_audio
# origin_wav[st_index: st_index + audio_len] *= origin_audio_rate
# state[0] = float((i + 1) / len(files)) * 0.6 + 0.2
# except:
# continue
\ No newline at end of file
......@@ -40,6 +40,7 @@ import re
from ding_utils import notify
from excel_utils import read_xls,read_xlsx
from ffmpeg_util import adjust_audio_volume
class MainWindow(QMainWindow, Ui_MainWindow):
......@@ -48,6 +49,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
def __init__(self, project_path):
super(MainWindow, self).__init__()
self.last_aside_index = None
self.setupUi(self)
self.statusbar.showMessage("hello", 5000)
self.projectContext = ProjectContext()
......@@ -190,6 +192,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.sld_video.sliderMoved.connect(self.moveSlider) # 进度条拖拽跳转
self.sld_video.ClickedValue.connect(self.clickedSlider) # 进度条点击跳转
self.sld_audio.valueChanged.connect(self.volumeChange) # 控制声音播放
self.pb_sld_audio.valueChanged.connect(self.volumeChangePb) # 控制声音播放
self.kd_slider.valueChanged.connect(self.scale_change_slot)
"""旁白音频预览
......@@ -299,7 +302,11 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.projectContext.Init(project_path)
self.update_ui()
self.rate = 0
self.rate = None
self.rate_bottom = None
self.origin_rate = 100.00
self.pb_rate = 100.00
self.pb_cg_rate = 1.00
# 打印到log文件中
t = RunThread(funcName=make_print_to_file, args=os.path.join(os.getcwd(), 'log'), name="logging")
......@@ -376,9 +383,19 @@ class MainWindow(QMainWindow, Ui_MainWindow):
"""弹出旁白区间检测相关信息填写窗口
"""
if self.rate == None:
self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
return
if self.rate_bottom == None:
self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
return
self.detect_dialog.init_self()
self.detect_dialog.show()
def start_detect_direct(self):
self.detect_dialog.init_self()
self.detect_dialog.start_detect()
def show_assemble_dialog(self):
"""弹出旁白音频合成相关信息填写窗口
......@@ -457,6 +474,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.action_operate.setEnabled(True)
self.action_insert_aside_from_now.setEnabled(True)
self.insert_aside_from_now_btn.setEnabled(True)
# print(">>>>>>>>>>>>>>>start detect")
# self.start_detect_direct()
def up_ocr(self):
h = self.widget.up(3)
......@@ -827,8 +846,27 @@ class MainWindow(QMainWindow, Ui_MainWindow):
volume = round(position/self.sld_audio.maximum()*100)
print("vlume %f" % volume)
self.player.setVolume(volume)
self.origin_rate = volume if volume >0 else 1
self.pb_cg_rate = self.pb_rate / self.origin_rate
print(">>>>>>>>>>cg rate :" + str(self.pb_cg_rate))
self.lab_audio.setText("音量:" + str(volume) + "%")
def volumeChangePb(self, position: int):
"""根据当前音量条调整音量
Args:
position (int): 当前音量条的选值
"""
volume = round(position/self.pb_sld_audio.maximum()*100)
print("vlume %f" % volume)
# self.player.setVolume(volume)
self.pb_audio.setText("旁白音量:" + str(volume) + "%")
self.pb_rate = volume if volume >0 else 1
self.pb_cg_rate = self.pb_rate / self.origin_rate
print(">>>>>>>>>>cg rate :" + str(self.pb_cg_rate))
print("vlume %f" % self.pb_rate)
# postion 取值[0,self.sld_video.maximum()]
def clickedSlider(self, position: int):
"""点击时间轴时,对应切换视频播放进度
......@@ -1695,7 +1733,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
"""
output_dir = os.path.join(self.projectContext.project_base_dir, "output")
if os.path.exists(output_dir) and len(os.listdir(output_dir)) > 0:
self.export.export_slot(self.projectContext.video_path, output_dir)
self.export.export_slot(self.projectContext.video_path, output_dir, self.pb_cg_rate)
else:
self.prompt_dialog.show_with_msg("暂时无合成音频,请至少生成一条\n 旁白音频后再尝试导出")
......@@ -1802,7 +1840,11 @@ class MainWindow(QMainWindow, Ui_MainWindow):
speed (str): 旁白语速
"""
# 注意,这里需要用同一对象,不能生成多个Element
if end_time is None or end_time == "":
print(">>>>>>>>>>>>add line")
end_time = self.projectContext.all_elements[int(row)].st_time_sec
new_element = Element(start_time, end_time, subtitle, suggest, aside, speed)
new_element.print_self()
self.projectContext.all_elements.insert(int(row), new_element)
self.all_tableWidget_idx = int(row)
if suggest is not None and "插入旁白,推荐字数为" in suggest:
......
......@@ -178,8 +178,8 @@ class Ui_MainWindow(object):
"\n"
" QSlider::handle:horizontal {\n"
" image: url(res/images/slider.svg);\n"
" width: 12px;\n"
" height: 12px;\n"
" width: 18px;\n"
" height: 18px;\n"
" margin: -24px -12px;\n"
" }\n"
"\n"
......@@ -202,9 +202,70 @@ class Ui_MainWindow(object):
self.sld_audio.setOrientation(QtCore.Qt.Horizontal)
self.sld_audio.setObjectName("sld_audio")
self.horizontalLayout_5.addWidget(self.sld_audio)
self.pb_sld_audio = QtWidgets.QSlider(self.centralwidget)
self.pb_sld_audio.setMinimumSize(QtCore.QSize(0, 24))
self.pb_sld_audio.setMaximumSize(QtCore.QSize(80, 24))
self.pb_sld_audio.setProperty("value", 100)
self.pb_sld_audio.setStyleSheet("QSlider:horizontal {\n"
" min-height: 24px;\n"
" max-height: 24px;\n"
" }\n"
"\n"
" QSlider:vertical {\n"
" min-width: 24px;\n"
" max-width: 24px;\n"
" }\n"
"\n"
" QSlider::groove:horizontal {\n"
" height: 4px;\n"
" background: #393939;\n"
" margin: 0 12px;\n"
" }\n"
"\n"
" QSlider::groove:vertical {\n"
" width: 4px;\n"
" background: #393939;\n"
" margin: 12px 0;\n"
" border-radius: 24px;\n"
" }\n"
"\n"
" QSlider::handle:horizontal {\n"
" image: url(res/images/slider.svg);\n"
" width: 18px;\n"
" height: 18px;\n"
" margin: -24px -12px;\n"
" }\n"
"\n"
" QSlider::handle:vertical {\n"
" image: url(res/images/slider.svg);\n"
" border-radius: 24px;\n"
" width: 12px;\n"
" height: 12px;\n"
" margin: -12px -24px;\n"
" }\n"
"\n"
" QSlider::add-page {\n"
" background: #232629;\n"
" }\n"
"\n"
" QSlider::sub-page {\n"
" background: #ffd740;\n"
" }")
self.pb_sld_audio.setProperty("value", 99)
self.pb_sld_audio.setOrientation(QtCore.Qt.Horizontal)
self.pb_sld_audio.setObjectName("sld_audio")
self.lab_audio = QtWidgets.QLabel(self.centralwidget)
self.lab_audio.setObjectName("lab_audio")
self.horizontalLayout_5.addWidget(self.lab_audio)
self.horizontalLayout_5.addWidget(self.sld_audio)
self.horizontalLayout_5.addWidget(self.pb_sld_audio)
self.pb_audio = QtWidgets.QLabel(self.centralwidget)
self.pb_audio.setObjectName("pb_audio")
self.horizontalLayout_5.addWidget(self.pb_audio)
self.horizontalLayout_5.setStretch(0, 2)
self.horizontalLayout_5.setStretch(1, 1)
self.horizontalLayout_5.setStretch(3, 1)
......@@ -302,6 +363,15 @@ class Ui_MainWindow(object):
self.sld_video.setTickPosition(QtWidgets.QSlider.TicksAbove)
self.sld_video.setTickInterval(1)
self.sld_video.setObjectName("sld_video")
self.sld_video_pb = myVideoSlider(self.scrollAreaWidgetContents)
self.sld_video_pb.setGeometry(QtCore.QRect(10, 20, 780, 30))
self.sld_video_pb.setMinimumSize(QtCore.QSize(770, 0))
self.sld_video_pb.setMaximumSize(QtCore.QSize(16777215, 20))
self.sld_video_pb.setMaximum(100)
self.sld_video_pb.setOrientation(QtCore.Qt.Horizontal)
self.sld_video_pb.setTickPosition(QtWidgets.QSlider.TicksAbove)
self.sld_video_pb.setTickInterval(1)
self.sld_video_pb.setObjectName("sld_video_pb")
self.scrollArea.setWidget(self.scrollAreaWidgetContents)
self.scrollArea.setGeometry(QtCore.QRect(0, 0, 820, 42))
self.zm_slider_layout.addWidget(self.scrollArea)
......@@ -460,6 +530,7 @@ class Ui_MainWindow(object):
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
self.label_2.setText(_translate("MainWindow", "00:00/00:00"))
self.lab_audio.setText(_translate("MainWindow", "音量:100%"))
self.pb_audio.setText(_translate("MainWindow", "旁白音量:100%"))
self.insert_aside_from_now_btn.setText(_translate("MainWindow", "当前位置插入旁白"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.all_tab), _translate("MainWindow", "字幕旁白"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.zm_tab), _translate("MainWindow", "字幕"))
......
......@@ -14,6 +14,7 @@ from PyQt5.QtWidgets import *;
from management import RunThread
from speech_synthesis import ffmpeg_path
from ffmpeg_util import cg_wav
class ExportProcessor(QWidget):
show_warning_signal = pyqtSignal(str)
......@@ -23,9 +24,9 @@ class ExportProcessor(QWidget):
self.state = [0]
self.threads = []
def export_slot(self, video_path, output_dir):
def export_slot(self, video_path, output_dir, pb_cg_rate):
t = RunThread(funcName=self.start_export,
args=(video_path, output_dir),
args=(video_path, output_dir, pb_cg_rate),
name="export")
t.setDaemon(True)
self.threads.append(t)
......@@ -37,13 +38,13 @@ class ExportProcessor(QWidget):
self.export_callback_signal.emit(self.threads, self.state)
def start_export(self, video_path, output_dir):
mixed_audio_path = aggrevate_audios(video_path, output_dir, self.state)
def start_export(self, video_path, output_dir, pb_cg_rate):
mixed_audio_path = aggrevate_audios(video_path, output_dir, self.state, pb_cg_rate)
export_video(video_path, mixed_audio_path, output_dir, self.state)
# 生成一条无声的音频,然后把旁白音频逐个按照时间位置放进去,得到仅含旁白的音频和旁白+原声的音频
def aggrevate_audios(video_path: str, output_dir: str, state=None):
def aggrevate_audios(video_path: str, output_dir: str, state=None,pb_cg_rate = 1.00):
# 这个模块最多只有80%的进度
if state is None:
state = [0]
......@@ -56,6 +57,17 @@ def aggrevate_audios(video_path: str, output_dir: str, state=None):
# 将生成的旁白音频放入空白音频中,并将原音频的对应位置音量降低为原来的30%
files = os.listdir(output_dir)
audio_path = os.path.join(output_dir, os.path.basename(video_path).split('.')[0] + ".wav")
for i, f in enumerate(files):
fname = '.'.join(f.split('.')[:-1])
try:
if fname.find(".") != -1:
cg_wav(audio_path, os.path.join(output_dir, f), pb_cg_rate)
except Exception as e:
print(e)
continue
for i, f in enumerate(files):
fname = '.'.join(f.split('.')[:-1])
try:
......
{"video_path": null, "excel_path": null, "detection_info": {"detected": false, "nd_process": 0.0, "last_time": 0.0, "caption_boundings": [], "has_subtitle": true}, "speaker_info": {"speaker_type": "\u6d59\u5927\u5185\u90e8tts", "speaker_id": "test\uff0c\u5973\uff0c\u5e74\u8f7b\u4eba", "speaker_speed": "1.00(4\u5b57/\u79d2)"}}
\ No newline at end of file
{"video_path": null, "excel_path": null, "detection_info": {"detected": false, "nd_process": 0.0, "last_time": 0.0, "caption_boundings": [], "has_subtitle": true}, "speaker_info": {"speaker_type": "\u79d1\u5927\u8baf\u98de", "speaker_id": "\u6653\u8fb0\uff0c\u5973\uff0c\u5e74\u8f7b\u4eba", "speaker_speed": "1.00(4\u5b57/\u79d2)"}}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment