Commit 6cd70d8a authored by 翟艳秋(20软)'s avatar 翟艳秋(20软)

1. [add] 添加docstring,做好代码规范化;

2. [add] 添加main_gui.spec(打包参数文件); 3. [modified] 修改旁白检测表格最后一行旁白推荐字数的计算方式; 4. [modified] 修改任务执行后的进度条显示状态。
parent efcd6148
/dist /dist
/build /build
/missing_packages /missing_packages
/aborted_icons
/无障碍电影制作工具(无黑窗口).zip
/无障碍电影制作工具(有黑窗口).zip
...@@ -13,36 +13,17 @@ import os ...@@ -13,36 +13,17 @@ import os
normal_speed = 4 normal_speed = 4
# from data_utils.audio_process import AudioInferProcess
# from utils.predict import Predictor
# from utils.audio_vad import crop_audio_vad
# from utils.utility import add_arguments, print_arguments
# parser = argparse.ArgumentParser(description=__doc__)
# add_arg = functools.partial(add_arguments, argparser=parser)
# add_arg('wav_path', str, './dataset/test.wav', "预测音频的路径")
# add_arg('is_long_audio', bool, False, "是否为长语音")
# add_arg('use_gpu', bool, False, "是否使用GPU预测")
# add_arg('enable_mkldnn', bool, False, "是否使用mkldnn加速")
# add_arg('to_an', bool, True, "是否转为阿拉伯数字")
# add_arg('beam_size', int, 300, "集束搜索解码相关参数,搜索的大小,范围:[5, 500]")
# add_arg('alpha', float, 1.2, "集束搜索解码相关参数,LM系数")
# add_arg('beta', float, 0.35, "集束搜索解码相关参数,WC系数")
# add_arg('cutoff_prob', float, 0.99, "集束搜索解码相关参数,剪枝的概率")
# add_arg('cutoff_top_n', int, 40, "集束搜索解码相关参数,剪枝的最大值")
# add_arg('mean_std_path', str, './PaddlePaddle_DeepSpeech2/dataset/mean_std.npz', "数据集的均值和标准值的npy文件路径")
# add_arg('vocab_path', str, './PaddlePaddle_DeepSpeech2/dataset/zh_vocab.txt', "数据集的词汇表文件路径")
# add_arg('model_dir', str, './PaddlePaddle_DeepSpeech2/models/infer/', "导出的预测模型文件夹路径")
# add_arg('lang_model_path', str, './PaddlePaddle_DeepSpeech2/lm/zh_giga.no_cna_cmn.prune01244.klm',
# "集束搜索解码相关参数,语言模型文件路径")
# add_arg('decoding_method', str, 'ctc_greedy', "结果解码方法,有集束搜索(ctc_beam_search)、贪婪策略(ctc_greedy)",
# choices=['ctc_beam_search', 'ctc_greedy'])
# args = parser.parse_args()
# print_arguments(args)
# 使用paddle deepspeech进行语音识别 # 使用paddle deepspeech进行语音识别
def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, state): def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, state):
"""使用paddle deepspeech进行语音识别
:param wav_path: 音频路径
:param pre_time: 音频前置时间(即视频开始位置)
:param book_name: 旁白存储表格路径
:param sheet_name: excel表格中的表名
:param state: 用于通信的状态关键字
:return:
"""
# 获取数据生成器,处理数据和获取字典需要 # 获取数据生成器,处理数据和获取字典需要
vocab_path = './PaddlePaddle_DeepSpeech2/dataset/zh_vocab.txt' vocab_path = './PaddlePaddle_DeepSpeech2/dataset/zh_vocab.txt'
mean_std_path = './PaddlePaddle_DeepSpeech2/dataset/mean_std.npz' mean_std_path = './PaddlePaddle_DeepSpeech2/dataset/mean_std.npz'
...@@ -106,51 +87,6 @@ def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, st ...@@ -106,51 +87,6 @@ def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, st
if os.path.exists(save_path): if os.path.exists(save_path):
shutil.rmtree(save_path) shutil.rmtree(save_path)
return narratages
# # 使用网上已有的模型进行识别(效果差)
# def predict_audio_with_paddle():
# start = time.time()
# text = asr_executor(
# model='conformer_wenetspeech',
# lang='zh',
# sample_rate=16000,
# config=None, # Set `config` and `ckpt_path` to None to use pretrained model.
# ckpt_path=None,
# audio_file=args.wav_path,
# force_yes=False,
# device=paddle.get_device()
# )
# print("消耗时间:%dms, 识别结果: %s" % (round((time.time() - start) * 1000), text))
#
#
# def predict_long_audio():
# start = time.time()
# # 分割长音频
# audios_path = crop_audio_vad(args.wav_path)
# texts = ''
# scores = []
# # 执行识别
# for i, audio_path in enumerate(audios_path):
# score, text = predictor.predict(audio_path=audio_path, to_an=args.to_an)
# texts = texts + ',' + text
# scores.append(score)
# print("第%d个分割音频, 得分: %d, 识别结果: %s" % (i, score, text))
# print("最终结果,消耗时间:%d, 得分: %d, 识别结果: %s" % (round((time.time() - start) * 1000), sum(scores) / len(scores), texts))
#
#
# def predict_audio():
# start = time.time()
# score, text = predictor.predict(audio_path=args.wav_path, to_an=args.to_an)
# print("消耗时间:%dms, 识别结果: %s, 得分: %d" % (round((time.time() - start) * 1000), text, score))
if __name__ == "__main__": if __name__ == "__main__":
# if args.is_long_audio:
# # predict_long_audio()
# predict_long_audio_with_paddle()
# else:
# # predict_audio()
# predict_audio_with_paddle()
pass pass
...@@ -9,11 +9,11 @@ from split_wav import * ...@@ -9,11 +9,11 @@ from split_wav import *
def create_sheet(path, sheet_name, value): def create_sheet(path, sheet_name, value):
""" """根据给定的表头,初始化表格
根据给定的表头,初始化表格,
:param path: str, 表格(book)的存储位置 :param path: [str], 表格(book)的存储位置
:param sheet_name: str, 表(sheet)的名字 :param sheet_name: [str], 表(sheet)的名字
:param value: list, 表头内容为['起始时间','终止时间','字幕','建议','旁边解说脚本'] :param value: [list], 表头内容为['起始时间','终止时间','字幕','建议','旁边解说脚本']
:return: None :return: None
""" """
index = len(value) index = len(value)
...@@ -30,11 +30,11 @@ def create_sheet(path, sheet_name, value): ...@@ -30,11 +30,11 @@ def create_sheet(path, sheet_name, value):
def write_to_sheet(path, sheet_name, value): def write_to_sheet(path, sheet_name, value):
""" """向已存在的表格中写入数据
向已存在的表格中写入数据
:param path: :param path: 表格存储位置
:param sheet_name: :param sheet_name: excel表内的表名
:param value: :param value: 插入数据
:return: :return:
""" """
index = len(value) index = len(value)
...@@ -50,32 +50,16 @@ def write_to_sheet(path, sheet_name, value): ...@@ -50,32 +50,16 @@ def write_to_sheet(path, sheet_name, value):
workbook.save(path) workbook.save(path)
def trans_to_mono(wav_path):
"""
将音频的通道数channel转换为1
:param wav_path: str, 需要转换的音频地址
:return: new_wav_path: str, 转换后得到的新音频地址
"""
new_wav_path = wav_path[:-4] + "_1.wav"
command = 'ffmpeg -i {} -ac 1 -y {}'.format(wav_path, new_wav_path)
os.system(command)
return new_wav_path
def concat_wav(root):
txt_path = os.path.join(root, 'list.txt')
with open(txt_path, 'w', encoding='utf-8') as f:
for file_name in os.listdir(root):
if os.path.isdir(os.path.join(root, file_name)):
wav_path = os.path.join(root, file_name) + "/vocal.wav"
f.write("file \'" + wav_path + "\'\n")
output_file = os.path.join(root, 'total.wav')
command = 'ffmpeg -f concat -safe 0 -i {} -y {}'.format(txt_path, output_file)
os.system(command)
return output_file
def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None): def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None):
"""使用ASR检测视频中的字幕并推荐旁白
:param video_path: 待处理视频地址
:param book_path: 旁白表格输出地址
:param start_time: 视频实际开始时间
:param end_time: 视频实际结束时间
:param state: 用于通信的状态关键字
:return:
"""
# 临时存储各种中间产物的文件夹 # 临时存储各种中间产物的文件夹
tmp_root = os.path.join(os.path.dirname(video_path), 'tmp') tmp_root = os.path.join(os.path.dirname(video_path), 'tmp')
print(tmp_root) print(tmp_root)
...@@ -87,12 +71,6 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None ...@@ -87,12 +71,6 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None
return return
# 提取出视频中的音频,分割后提取出其中的人声部分并存储 # 提取出视频中的音频,分割后提取出其中的人声部分并存储
audio_path = extract_audio(video_path, tmp_root, start_time, end_time) audio_path = extract_audio(video_path, tmp_root, start_time, end_time)
# root = split_audio()
# extract_speech()
#
# # 将提取出的人声拼接,并将音频的channel调整为1
# total_wav_path = concat_wav(root)
# audio_path = trans_to_mono(total_wav_path)
# xlsx中的表格名为“旁白插入位置建议” # xlsx中的表格名为“旁白插入位置建议”
if os.path.exists(book_path): if os.path.exists(book_path):
......
...@@ -21,10 +21,11 @@ normal_speed = 4 ...@@ -21,10 +21,11 @@ normal_speed = 4
def get_position(video_path, start_time): def get_position(video_path, start_time):
""" """根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
:param start_time: 视频实际开始时间
:param video_path: 视频存储路径 :param video_path: 视频存储路径
:return: 字幕在整个画面中的实际上下边界位置 :return: [float,float], 字幕在整个画面中的实际上下边界位置
""" """
video = cv2.VideoCapture(video_path) video = cv2.VideoCapture(video_path)
subtitle_position = {} subtitle_position = {}
...@@ -90,17 +91,22 @@ def get_position(video_path, start_time): ...@@ -90,17 +91,22 @@ def get_position(video_path, start_time):
def erasePunc(txt): def erasePunc(txt):
"""去除字符串中的非中文字符
:param txt: 待处理字符串
:return: [str], 处理后的字符串
"""
pattern = re.compile(r'[^\u4e00-\u9fa5]') pattern = re.compile(r'[^\u4e00-\u9fa5]')
txt = re.sub(pattern, '', txt) txt = re.sub(pattern, '', txt)
return txt return txt
def string_similar(s1, s2): def string_similar(s1, s2):
""" """比较字符串s1和s2的相似度,主要用于减少输出文件中相似字幕的重复
比较字符串s1和s2的相似度,主要用于减少输出文件中相似字幕的重复
:param s1: :param s1: 第一个字符串
:param s2: :param s2: 第二个字符串
:return: 字符串间的相似度 :return: [float], 字符串间的相似度
""" """
# 去除非中文字符后,再比较相似度 # 去除非中文字符后,再比较相似度
s1 = erasePunc(s1) s1 = erasePunc(s1)
...@@ -109,8 +115,10 @@ def string_similar(s1, s2): ...@@ -109,8 +115,10 @@ def string_similar(s1, s2):
def normalize(text): def normalize(text):
""" """规范化处理文本中的一些标点符号
用于规范化处理文本中的一些标点符号
:param text: 待处理字符串
:return: 处理后的字符串
""" """
# 将英文标点转换为中文标点 # 将英文标点转换为中文标点
E_pun = u',.!?()[]:;' E_pun = u',.!?()[]:;'
...@@ -127,10 +135,10 @@ def normalize(text): ...@@ -127,10 +135,10 @@ def normalize(text):
def detect_subtitle(img): def detect_subtitle(img):
""" """ 检测当前画面得到字幕信息
检测当前画面得到字幕信息
:param img: 当前画面 :param img: 当前画面
:return: 字幕信息 :return: [str|None], 字幕信息
""" """
subTitle = '' subTitle = ''
img = img[int(up_b) - 30:int(down_b) + 30] img = img[int(up_b) - 30:int(down_b) + 30]
...@@ -164,8 +172,8 @@ def detect_subtitle(img): ...@@ -164,8 +172,8 @@ def detect_subtitle(img):
def process_video(video_path, begin, end, book_path, sheet_name, state): def process_video(video_path, begin, end, book_path, sheet_name, state):
""" """ 处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
:param video_path: 待处理视频的路径 :param video_path: 待处理视频的路径
:param begin: 电影的实际开始位置(秒) :param begin: 电影的实际开始位置(秒)
:param end: 电影除演职表外的实际结束位置(秒) :param end: 电影除演职表外的实际结束位置(秒)
...@@ -179,8 +187,6 @@ def process_video(video_path, begin, end, book_path, sheet_name, state): ...@@ -179,8 +187,6 @@ def process_video(video_path, begin, end, book_path, sheet_name, state):
lastSubTitle = None lastSubTitle = None
# res是在视频遍历过程中获取的字幕文件,不掺杂对旁白的分析 # res是在视频遍历过程中获取的字幕文件,不掺杂对旁白的分析
res = [] res = []
# narratage_recommand是旁白推荐信息,用于输出为表格
narratage_recommend = []
cnt = 0 cnt = 0
start_time = 0 start_time = 0
end_time = 0 end_time = 0
...@@ -236,38 +242,27 @@ def process_video(video_path, begin, end, book_path, sheet_name, state): ...@@ -236,38 +242,27 @@ def process_video(video_path, begin, end, book_path, sheet_name, state):
if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 > end: if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 > end:
if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time > 1: if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time > 1:
print('--------------------------------------------------') print('--------------------------------------------------')
recommend_lens = int(res[-1][0] * normal_speed) if len(res) == 1 else int( # 还没有字幕被分析出来
(res[-1][0] - res[-2][1]) * normal_speed) # if len(res) == 0:
recommend_lens = int((video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time) * normal_speed)
# else:
# recommend_lens = int(res[-1][0] * normal_speed) if len(res) == 1 else int(
# (res[-1][0] - res[-2][1]) * normal_speed)
# narratage_recommend.append(['', '', '', '插入旁白,推荐字数为%d' % recommend_lens]) # narratage_recommend.append(['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens]) write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
break break
return narratage_recommend
# def write_excel_xlsx(path, sheet_name, value):
# """
# 将旁白推荐信息输出表格
# :param path: 输出表格的存储路径
# :param sheet_name:表格中的表名
# :param value:输出到表格中的信息
# :return:
# """
# index = len(value)
# workbook = Workbook()
# sheet = workbook.active
# sheet.title = sheet_name
# # 将字幕对应的那一列扩宽一些
# sheet.column_dimensions['C'].width = 50
# sheet.column_dimensions['D'].width = 30
# for i in range(0, index):
# for j in range(0, len(value[i])):
# sheet.cell(row=i + 1, column=j + 1, value=str(value[i][j])).alignment = Alignment(wrapText=True)
# if value[i][j] == '' or '插入旁白' in str(value[i][j]) or value[i][j] == '翻译':
# sheet.cell(row=i + 1, column=j + 1).fill = PatternFill(fill_type='solid', fgColor='ffff00')
# workbook.save(path)
def detect_with_ocr(video_path, book_path, start_time, end_time, state): def detect_with_ocr(video_path, book_path, start_time, end_time, state):
""" 使用ocr检测视频获取字幕并输出旁白推荐
:param video_path: 待处理视频地址
:param book_path: 表格存储位置
:param start_time: 视频实际开始时间
:param end_time: 视频实际结束时见
:param state: 用于通信的状态关键字
:return:
"""
if os.path.exists(book_path): if os.path.exists(book_path):
os.remove(book_path) os.remove(book_path)
book_name_xlsx = book_path book_name_xlsx = book_path
...@@ -285,6 +280,3 @@ def detect_with_ocr(video_path, book_path, start_time, end_time, state): ...@@ -285,6 +280,3 @@ def detect_with_ocr(video_path, book_path, start_time, end_time, state):
if __name__ == '__main__': if __name__ == '__main__':
pass pass
# video_path = "D:/heelo/hysxm_1.mp4"
# book_path = '何以笙箫默.xlsx'
# detect_with_ocr(video_path, book_path, 0, 300, [None])
...@@ -10,8 +10,8 @@ ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False) ...@@ -10,8 +10,8 @@ ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
def random_int_list(start, stop, length): def random_int_list(start, stop, length):
""" """在某一段区间内取n个随机数
在某一段区间内取n个随机数
:param start: 随机数区间的最小值 :param start: 随机数区间的最小值
:param stop: 随机数区间的最大值 :param stop: 随机数区间的最大值
:param length: 随机数个数 :param length: 随机数个数
...@@ -30,8 +30,8 @@ def random_int_list(start, stop, length): ...@@ -30,8 +30,8 @@ def random_int_list(start, stop, length):
def detect_subtitle(frame): def detect_subtitle(frame):
""" """判断画面中是否含字幕
判断画面中是否含字幕
:param frame: 视频的某一帧画面 :param frame: 视频的某一帧画面
:return: Ture or False :return: Ture or False
""" """
...@@ -55,10 +55,11 @@ def detect_subtitle(frame): ...@@ -55,10 +55,11 @@ def detect_subtitle(frame):
def detect_movie(video_path, start, end, interval): def detect_movie(video_path, start, end, interval):
""" """使用整部视频进行测试,确定视频是否提供字幕
使用整部视频进行测试,确定视频是否提供字幕
:param video_path: 视频的地址 :param video_path: 视频的地址
:param start: 取随机帧的时间区间的开始时间 :param start: 取随机帧的时间区间的开始时间
:param end: 视频结束时间
:param interval: 取随机帧的每段区间时长,单位为秒 :param interval: 取随机帧的每段区间时长,单位为秒
:return: True or False(视频是否含字幕) :return: True or False(视频是否含字幕)
""" """
...@@ -94,9 +95,3 @@ def detect_movie(video_path, start, end, interval): ...@@ -94,9 +95,3 @@ def detect_movie(video_path, start, end, interval):
if __name__ == '__main__': if __name__ == '__main__':
pass pass
# video_path = r'D:\heelo\hysxm.mp4'
# start_time = time.time()
# start = 90
# interval = 120
# print(detect_movie(video_path, start, interval))
# print(time.time() - start_time)
This diff is collapsed.
# -*- mode: python ; coding: utf-8 -*-
block_cipher = None
a = Analysis(['main_gui.py'],
pathex=[],
binaries=[],
datas=[],
hiddenimports=['astor','distutils','imgaug','lmdb','pyclipper','pywt','scipy','setuptools','shapely','skimage'],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=['PyQt5','google.api_core','google.cloud','google.cloud.storage','googleapiclient'],
win_no_prefer_redirects=False,
win_private_assemblies=False,
cipher=block_cipher,
noarchive=False)
pyz = PYZ(a.pure, a.zipped_data,
cipher=block_cipher)
exe = EXE(pyz,
a.scripts,
[],
exclude_binaries=True,
name='无障碍电影制作工具',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
console=True,
disable_windowed_traceback=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None , icon='D:\\AddCaption\\accessibility_movie\\eagle_2.ico')
coll = COLLECT(exe,
a.binaries,
a.zipfiles,
a.datas,
strip=False,
upx=True,
upx_exclude=['vcruntime140.dll'],
name='main_gui')
...@@ -5,7 +5,19 @@ import time ...@@ -5,7 +5,19 @@ import time
def detect(video_path, start_time, end_time, book_path, state, subtitle=None): def detect(video_path, start_time, end_time, book_path, state, subtitle=None):
"""字幕及旁白区间检测
:param video_path: 待检测视频
:param start_time: 视频开始时间
:param end_time: 视频结束时间
:param book_path: 存放旁白的表格存储位置
:param state: 任务进行状态
:param subtitle: 视频是否有字幕
:return:
"""
print("开始检测") print("开始检测")
print("start_time", start_time)
print("end_time", end_time)
if book_path is None: if book_path is None:
book_path = os.path.basename(video_path).split('.')[0] + ".xlsx" book_path = os.path.basename(video_path).split('.')[0] + ".xlsx"
else: else:
......
...@@ -16,8 +16,8 @@ ffmpeg_path = r'.\ffmpeg-4.3.1\bin\ffmpeg' ...@@ -16,8 +16,8 @@ ffmpeg_path = r'.\ffmpeg-4.3.1\bin\ffmpeg'
def speech_synthesis(text, output_file, speed): def speech_synthesis(text, output_file, speed):
""" """用于合成讲解音频并输出
用于合成讲解音频并输出
:param text: 解说文本 :param text: 解说文本
:param output_file: 输出文件路径 :param output_file: 输出文件路径
:param speed: 指定的音频语速,默认为1.0 :param speed: 指定的音频语速,默认为1.0
...@@ -54,8 +54,8 @@ def speech_synthesis(text, output_file, speed): ...@@ -54,8 +54,8 @@ def speech_synthesis(text, output_file, speed):
def change_speed(wav_path, speed=1.0): def change_speed(wav_path, speed=1.0):
""" """调整语速
调整语速
:param wav_path: 原音频路径 :param wav_path: 原音频路径
:param speed: 转换后的语速 :param speed: 转换后的语速
:return: :return:
...@@ -65,7 +65,8 @@ def change_speed(wav_path, speed=1.0): ...@@ -65,7 +65,8 @@ def change_speed(wav_path, speed=1.0):
def read_sheet(book_path, sheet_name=None): def read_sheet(book_path, sheet_name=None):
""" """读表
从表格中读出所有的内容,用dict保存(表格的格式固定,第一行为表头(起始时间|终止时间|字幕|建议|解说脚本)) 从表格中读出所有的内容,用dict保存(表格的格式固定,第一行为表头(起始时间|终止时间|字幕|建议|解说脚本))
:param book_path: 表格的存储路径 :param book_path: 表格的存储路径
:param sheet_name: 想要读取的表在excel表格中的名字(可选项) :param sheet_name: 想要读取的表在excel表格中的名字(可选项)
...@@ -87,12 +88,13 @@ def read_sheet(book_path, sheet_name=None): ...@@ -87,12 +88,13 @@ def read_sheet(book_path, sheet_name=None):
def get_narratage_text(sheet_content, speed): def get_narratage_text(sheet_content, speed):
""" """获取旁白解说文本及起止时间
根据从表格中获取到的内容,分析得到解说文本+对应开始时间
:param sheet_content: dict,keys=["起始时间","终止时间","字幕","建议","解说脚本"] :param sheet_content: [dict],keys=["起始时间","终止时间","字幕","建议","解说脚本"]
:param speed: float, 旁白语速 :param speed: [float], 旁白语速
:return: narratage_text: list, 旁白文本, :return: narratage_text: [list], 旁白文本,
narratage_start_time: list, 旁白对应开始时间 narratage_start_time: [list], 旁白对应开始时间
narratage_end_time: [list], 旁白对应结束时间
""" """
narratage = sheet_content['解说脚本'] narratage = sheet_content['解说脚本']
subtitle = sheet_content['字幕'] subtitle = sheet_content['字幕']
...@@ -134,6 +136,11 @@ def get_narratage_text(sheet_content, speed): ...@@ -134,6 +136,11 @@ def get_narratage_text(sheet_content, speed):
def second_to_str(seconds): def second_to_str(seconds):
"""秒数转字符串
:param seconds:秒数
:return: [str], ’时:分:秒‘格式的时间字符串
"""
seconds = float(seconds) seconds = float(seconds)
hour = int(seconds / 3600) hour = int(seconds / 3600)
minute = int((seconds - hour * 3600) / 60) minute = int((seconds - hour * 3600) / 60)
...@@ -144,8 +151,8 @@ def second_to_str(seconds): ...@@ -144,8 +151,8 @@ def second_to_str(seconds):
def export_caption(sheet_content, caption_file): def export_caption(sheet_content, caption_file):
""" """将用户校正后的字幕输出为字幕文件(srt格式)
将用户校正后的字幕输出为字幕文件(srt格式)
:param sheet_content: 用户校正后的表格内容 :param sheet_content: 用户校正后的表格内容
:return: :return:
""" """
...@@ -164,6 +171,13 @@ def export_caption(sheet_content, caption_file): ...@@ -164,6 +171,13 @@ def export_caption(sheet_content, caption_file):
def adjust_volume(origin, start_timestamp, end_timestamp): def adjust_volume(origin, start_timestamp, end_timestamp):
"""调整原音频中待插入旁白位置的音量
:param origin: 原音频存储位置
:param start_timestamp: 旁白开始时间
:param end_timestamp: 旁白结束时间
:return:
"""
global adjusted_wav_path global adjusted_wav_path
adjusted_wav_path = os.path.join(os.path.dirname(origin), adjusted_wav_path) adjusted_wav_path = os.path.join(os.path.dirname(origin), adjusted_wav_path)
n = len(start_timestamp) n = len(start_timestamp)
...@@ -177,6 +191,13 @@ def adjust_volume(origin, start_timestamp, end_timestamp): ...@@ -177,6 +191,13 @@ def adjust_volume(origin, start_timestamp, end_timestamp):
def mix_speech(origin, narratage_paths, start_timestamps): def mix_speech(origin, narratage_paths, start_timestamps):
"""将合成音频与原音频混合
:param origin: 原音频存储位置
:param narratage_paths: 旁白音频的存储位置
:param start_timestamps: 旁白音频的开始时间
:return:
"""
composed_wav_path = os.path.join(os.path.dirname(origin), "composed.wav") composed_wav_path = os.path.join(os.path.dirname(origin), "composed.wav")
command_line = '{} -i {}'.format(ffmpeg_path, origin) command_line = '{} -i {}'.format(ffmpeg_path, origin)
for i, narratage_path in enumerate(narratage_paths): for i, narratage_path in enumerate(narratage_paths):
...@@ -191,8 +212,8 @@ def mix_speech(origin, narratage_paths, start_timestamps): ...@@ -191,8 +212,8 @@ def mix_speech(origin, narratage_paths, start_timestamps):
def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state=None): def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state=None):
""" """生成语音并导出字幕
生成语音并导出字幕
:param video_path: 原视频的位置 :param video_path: 原视频的位置
:param sheet_path: 校对过的旁白脚本表格文件 :param sheet_path: 校对过的旁白脚本表格文件
:param output_dir: 存放音频文件的文件夹 :param output_dir: 存放音频文件的文件夹
...@@ -250,24 +271,4 @@ def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state ...@@ -250,24 +271,4 @@ def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state
if __name__ == '__main__': if __name__ == '__main__':
# 定义参数
# parser = argparse.ArgumentParser(description='Speech Synthesis guideness')
# parser.add_argument("--video_path", required=True, type=str, help="原视频位置")
# parser.add_argument("--output_dir", required=True, type=str, help="音频输出位置路径")
# parser.add_argument("--sheet_path", required=True, type=str, help='旁白解说表格存储路径')
# parser.add_argument("--caption_file", required=True, type=str, help="输出的字幕文件存储路径")
# parser.add_argument("--speed", type=float, default=1.0, help="设置语速,默认为1.0")
# args = parser.parse_args()
# video_path, sheet_path, output_dir, speed, caption_file = args.video_path,\
# args.sheet_path, args.output_dir, args.speed, args.caption_file
# video_path = 'D:/heelo/hysxm_3.mp4'
# sheet_path = 'D:/heelo/hysxm_3.xlsx'
# output_dir = 'D:/AddCaption/hysxm_3'
# speed = 1.25
# caption_file = 'D:/AddCaption/hysxm_3/hysxm_3.srt'
#
# # 主函数执行
# ss_and_export(video_path=video_path, sheet_path=sheet_path, output_dir=output_dir, speed=speed,
# caption_file=caption_file)
pass pass
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment