Commit 6cd70d8a authored by 翟艳秋(20软)'s avatar 翟艳秋(20软)

1. [add] 添加docstring,做好代码规范化;

2. [add] 添加main_gui.spec(打包参数文件); 3. [modified] 修改旁白检测表格最后一行旁白推荐字数的计算方式; 4. [modified] 修改任务执行后的进度条显示状态。
parent efcd6148
/dist /dist
/build /build
/missing_packages /missing_packages
/aborted_icons
/无障碍电影制作工具(无黑窗口).zip
/无障碍电影制作工具(有黑窗口).zip
...@@ -13,36 +13,17 @@ import os ...@@ -13,36 +13,17 @@ import os
normal_speed = 4 normal_speed = 4
# from data_utils.audio_process import AudioInferProcess
# from utils.predict import Predictor
# from utils.audio_vad import crop_audio_vad
# from utils.utility import add_arguments, print_arguments
# parser = argparse.ArgumentParser(description=__doc__)
# add_arg = functools.partial(add_arguments, argparser=parser)
# add_arg('wav_path', str, './dataset/test.wav', "预测音频的路径")
# add_arg('is_long_audio', bool, False, "是否为长语音")
# add_arg('use_gpu', bool, False, "是否使用GPU预测")
# add_arg('enable_mkldnn', bool, False, "是否使用mkldnn加速")
# add_arg('to_an', bool, True, "是否转为阿拉伯数字")
# add_arg('beam_size', int, 300, "集束搜索解码相关参数,搜索的大小,范围:[5, 500]")
# add_arg('alpha', float, 1.2, "集束搜索解码相关参数,LM系数")
# add_arg('beta', float, 0.35, "集束搜索解码相关参数,WC系数")
# add_arg('cutoff_prob', float, 0.99, "集束搜索解码相关参数,剪枝的概率")
# add_arg('cutoff_top_n', int, 40, "集束搜索解码相关参数,剪枝的最大值")
# add_arg('mean_std_path', str, './PaddlePaddle_DeepSpeech2/dataset/mean_std.npz', "数据集的均值和标准值的npy文件路径")
# add_arg('vocab_path', str, './PaddlePaddle_DeepSpeech2/dataset/zh_vocab.txt', "数据集的词汇表文件路径")
# add_arg('model_dir', str, './PaddlePaddle_DeepSpeech2/models/infer/', "导出的预测模型文件夹路径")
# add_arg('lang_model_path', str, './PaddlePaddle_DeepSpeech2/lm/zh_giga.no_cna_cmn.prune01244.klm',
# "集束搜索解码相关参数,语言模型文件路径")
# add_arg('decoding_method', str, 'ctc_greedy', "结果解码方法,有集束搜索(ctc_beam_search)、贪婪策略(ctc_greedy)",
# choices=['ctc_beam_search', 'ctc_greedy'])
# args = parser.parse_args()
# print_arguments(args)
# 使用paddle deepspeech进行语音识别 # 使用paddle deepspeech进行语音识别
def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, state): def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, state):
"""使用paddle deepspeech进行语音识别
:param wav_path: 音频路径
:param pre_time: 音频前置时间(即视频开始位置)
:param book_name: 旁白存储表格路径
:param sheet_name: excel表格中的表名
:param state: 用于通信的状态关键字
:return:
"""
# 获取数据生成器,处理数据和获取字典需要 # 获取数据生成器,处理数据和获取字典需要
vocab_path = './PaddlePaddle_DeepSpeech2/dataset/zh_vocab.txt' vocab_path = './PaddlePaddle_DeepSpeech2/dataset/zh_vocab.txt'
mean_std_path = './PaddlePaddle_DeepSpeech2/dataset/mean_std.npz' mean_std_path = './PaddlePaddle_DeepSpeech2/dataset/mean_std.npz'
...@@ -106,51 +87,6 @@ def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, st ...@@ -106,51 +87,6 @@ def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, st
if os.path.exists(save_path): if os.path.exists(save_path):
shutil.rmtree(save_path) shutil.rmtree(save_path)
return narratages
# # 使用网上已有的模型进行识别(效果差)
# def predict_audio_with_paddle():
# start = time.time()
# text = asr_executor(
# model='conformer_wenetspeech',
# lang='zh',
# sample_rate=16000,
# config=None, # Set `config` and `ckpt_path` to None to use pretrained model.
# ckpt_path=None,
# audio_file=args.wav_path,
# force_yes=False,
# device=paddle.get_device()
# )
# print("消耗时间:%dms, 识别结果: %s" % (round((time.time() - start) * 1000), text))
#
#
# def predict_long_audio():
# start = time.time()
# # 分割长音频
# audios_path = crop_audio_vad(args.wav_path)
# texts = ''
# scores = []
# # 执行识别
# for i, audio_path in enumerate(audios_path):
# score, text = predictor.predict(audio_path=audio_path, to_an=args.to_an)
# texts = texts + ',' + text
# scores.append(score)
# print("第%d个分割音频, 得分: %d, 识别结果: %s" % (i, score, text))
# print("最终结果,消耗时间:%d, 得分: %d, 识别结果: %s" % (round((time.time() - start) * 1000), sum(scores) / len(scores), texts))
#
#
# def predict_audio():
# start = time.time()
# score, text = predictor.predict(audio_path=args.wav_path, to_an=args.to_an)
# print("消耗时间:%dms, 识别结果: %s, 得分: %d" % (round((time.time() - start) * 1000), text, score))
if __name__ == "__main__": if __name__ == "__main__":
# if args.is_long_audio:
# # predict_long_audio()
# predict_long_audio_with_paddle()
# else:
# # predict_audio()
# predict_audio_with_paddle()
pass pass
...@@ -9,11 +9,11 @@ from split_wav import * ...@@ -9,11 +9,11 @@ from split_wav import *
def create_sheet(path, sheet_name, value): def create_sheet(path, sheet_name, value):
""" """根据给定的表头,初始化表格
根据给定的表头,初始化表格,
:param path: str, 表格(book)的存储位置 :param path: [str], 表格(book)的存储位置
:param sheet_name: str, 表(sheet)的名字 :param sheet_name: [str], 表(sheet)的名字
:param value: list, 表头内容为['起始时间','终止时间','字幕','建议','旁边解说脚本'] :param value: [list], 表头内容为['起始时间','终止时间','字幕','建议','旁边解说脚本']
:return: None :return: None
""" """
index = len(value) index = len(value)
...@@ -30,11 +30,11 @@ def create_sheet(path, sheet_name, value): ...@@ -30,11 +30,11 @@ def create_sheet(path, sheet_name, value):
def write_to_sheet(path, sheet_name, value): def write_to_sheet(path, sheet_name, value):
""" """向已存在的表格中写入数据
向已存在的表格中写入数据
:param path: :param path: 表格存储位置
:param sheet_name: :param sheet_name: excel表内的表名
:param value: :param value: 插入数据
:return: :return:
""" """
index = len(value) index = len(value)
...@@ -50,32 +50,16 @@ def write_to_sheet(path, sheet_name, value): ...@@ -50,32 +50,16 @@ def write_to_sheet(path, sheet_name, value):
workbook.save(path) workbook.save(path)
def trans_to_mono(wav_path):
"""
将音频的通道数channel转换为1
:param wav_path: str, 需要转换的音频地址
:return: new_wav_path: str, 转换后得到的新音频地址
"""
new_wav_path = wav_path[:-4] + "_1.wav"
command = 'ffmpeg -i {} -ac 1 -y {}'.format(wav_path, new_wav_path)
os.system(command)
return new_wav_path
def concat_wav(root):
txt_path = os.path.join(root, 'list.txt')
with open(txt_path, 'w', encoding='utf-8') as f:
for file_name in os.listdir(root):
if os.path.isdir(os.path.join(root, file_name)):
wav_path = os.path.join(root, file_name) + "/vocal.wav"
f.write("file \'" + wav_path + "\'\n")
output_file = os.path.join(root, 'total.wav')
command = 'ffmpeg -f concat -safe 0 -i {} -y {}'.format(txt_path, output_file)
os.system(command)
return output_file
def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None): def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None):
"""使用ASR检测视频中的字幕并推荐旁白
:param video_path: 待处理视频地址
:param book_path: 旁白表格输出地址
:param start_time: 视频实际开始时间
:param end_time: 视频实际结束时间
:param state: 用于通信的状态关键字
:return:
"""
# 临时存储各种中间产物的文件夹 # 临时存储各种中间产物的文件夹
tmp_root = os.path.join(os.path.dirname(video_path), 'tmp') tmp_root = os.path.join(os.path.dirname(video_path), 'tmp')
print(tmp_root) print(tmp_root)
...@@ -87,12 +71,6 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None ...@@ -87,12 +71,6 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None
return return
# 提取出视频中的音频,分割后提取出其中的人声部分并存储 # 提取出视频中的音频,分割后提取出其中的人声部分并存储
audio_path = extract_audio(video_path, tmp_root, start_time, end_time) audio_path = extract_audio(video_path, tmp_root, start_time, end_time)
# root = split_audio()
# extract_speech()
#
# # 将提取出的人声拼接,并将音频的channel调整为1
# total_wav_path = concat_wav(root)
# audio_path = trans_to_mono(total_wav_path)
# xlsx中的表格名为“旁白插入位置建议” # xlsx中的表格名为“旁白插入位置建议”
if os.path.exists(book_path): if os.path.exists(book_path):
......
...@@ -21,10 +21,11 @@ normal_speed = 4 ...@@ -21,10 +21,11 @@ normal_speed = 4
def get_position(video_path, start_time): def get_position(video_path, start_time):
""" """根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
:param start_time: 视频实际开始时间
:param video_path: 视频存储路径 :param video_path: 视频存储路径
:return: 字幕在整个画面中的实际上下边界位置 :return: [float,float], 字幕在整个画面中的实际上下边界位置
""" """
video = cv2.VideoCapture(video_path) video = cv2.VideoCapture(video_path)
subtitle_position = {} subtitle_position = {}
...@@ -90,17 +91,22 @@ def get_position(video_path, start_time): ...@@ -90,17 +91,22 @@ def get_position(video_path, start_time):
def erasePunc(txt): def erasePunc(txt):
"""去除字符串中的非中文字符
:param txt: 待处理字符串
:return: [str], 处理后的字符串
"""
pattern = re.compile(r'[^\u4e00-\u9fa5]') pattern = re.compile(r'[^\u4e00-\u9fa5]')
txt = re.sub(pattern, '', txt) txt = re.sub(pattern, '', txt)
return txt return txt
def string_similar(s1, s2): def string_similar(s1, s2):
""" """比较字符串s1和s2的相似度,主要用于减少输出文件中相似字幕的重复
比较字符串s1和s2的相似度,主要用于减少输出文件中相似字幕的重复
:param s1: :param s1: 第一个字符串
:param s2: :param s2: 第二个字符串
:return: 字符串间的相似度 :return: [float], 字符串间的相似度
""" """
# 去除非中文字符后,再比较相似度 # 去除非中文字符后,再比较相似度
s1 = erasePunc(s1) s1 = erasePunc(s1)
...@@ -109,8 +115,10 @@ def string_similar(s1, s2): ...@@ -109,8 +115,10 @@ def string_similar(s1, s2):
def normalize(text): def normalize(text):
""" """规范化处理文本中的一些标点符号
用于规范化处理文本中的一些标点符号
:param text: 待处理字符串
:return: 处理后的字符串
""" """
# 将英文标点转换为中文标点 # 将英文标点转换为中文标点
E_pun = u',.!?()[]:;' E_pun = u',.!?()[]:;'
...@@ -127,10 +135,10 @@ def normalize(text): ...@@ -127,10 +135,10 @@ def normalize(text):
def detect_subtitle(img): def detect_subtitle(img):
""" """ 检测当前画面得到字幕信息
检测当前画面得到字幕信息
:param img: 当前画面 :param img: 当前画面
:return: 字幕信息 :return: [str|None], 字幕信息
""" """
subTitle = '' subTitle = ''
img = img[int(up_b) - 30:int(down_b) + 30] img = img[int(up_b) - 30:int(down_b) + 30]
...@@ -164,8 +172,8 @@ def detect_subtitle(img): ...@@ -164,8 +172,8 @@ def detect_subtitle(img):
def process_video(video_path, begin, end, book_path, sheet_name, state): def process_video(video_path, begin, end, book_path, sheet_name, state):
""" """ 处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
:param video_path: 待处理视频的路径 :param video_path: 待处理视频的路径
:param begin: 电影的实际开始位置(秒) :param begin: 电影的实际开始位置(秒)
:param end: 电影除演职表外的实际结束位置(秒) :param end: 电影除演职表外的实际结束位置(秒)
...@@ -179,8 +187,6 @@ def process_video(video_path, begin, end, book_path, sheet_name, state): ...@@ -179,8 +187,6 @@ def process_video(video_path, begin, end, book_path, sheet_name, state):
lastSubTitle = None lastSubTitle = None
# res是在视频遍历过程中获取的字幕文件,不掺杂对旁白的分析 # res是在视频遍历过程中获取的字幕文件,不掺杂对旁白的分析
res = [] res = []
# narratage_recommand是旁白推荐信息,用于输出为表格
narratage_recommend = []
cnt = 0 cnt = 0
start_time = 0 start_time = 0
end_time = 0 end_time = 0
...@@ -236,38 +242,27 @@ def process_video(video_path, begin, end, book_path, sheet_name, state): ...@@ -236,38 +242,27 @@ def process_video(video_path, begin, end, book_path, sheet_name, state):
if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 > end: if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 > end:
if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time > 1: if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time > 1:
print('--------------------------------------------------') print('--------------------------------------------------')
recommend_lens = int(res[-1][0] * normal_speed) if len(res) == 1 else int( # 还没有字幕被分析出来
(res[-1][0] - res[-2][1]) * normal_speed) # if len(res) == 0:
recommend_lens = int((video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time) * normal_speed)
# else:
# recommend_lens = int(res[-1][0] * normal_speed) if len(res) == 1 else int(
# (res[-1][0] - res[-2][1]) * normal_speed)
# narratage_recommend.append(['', '', '', '插入旁白,推荐字数为%d' % recommend_lens]) # narratage_recommend.append(['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens]) write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
break break
return narratage_recommend
# def write_excel_xlsx(path, sheet_name, value):
# """
# 将旁白推荐信息输出表格
# :param path: 输出表格的存储路径
# :param sheet_name:表格中的表名
# :param value:输出到表格中的信息
# :return:
# """
# index = len(value)
# workbook = Workbook()
# sheet = workbook.active
# sheet.title = sheet_name
# # 将字幕对应的那一列扩宽一些
# sheet.column_dimensions['C'].width = 50
# sheet.column_dimensions['D'].width = 30
# for i in range(0, index):
# for j in range(0, len(value[i])):
# sheet.cell(row=i + 1, column=j + 1, value=str(value[i][j])).alignment = Alignment(wrapText=True)
# if value[i][j] == '' or '插入旁白' in str(value[i][j]) or value[i][j] == '翻译':
# sheet.cell(row=i + 1, column=j + 1).fill = PatternFill(fill_type='solid', fgColor='ffff00')
# workbook.save(path)
def detect_with_ocr(video_path, book_path, start_time, end_time, state): def detect_with_ocr(video_path, book_path, start_time, end_time, state):
""" 使用ocr检测视频获取字幕并输出旁白推荐
:param video_path: 待处理视频地址
:param book_path: 表格存储位置
:param start_time: 视频实际开始时间
:param end_time: 视频实际结束时见
:param state: 用于通信的状态关键字
:return:
"""
if os.path.exists(book_path): if os.path.exists(book_path):
os.remove(book_path) os.remove(book_path)
book_name_xlsx = book_path book_name_xlsx = book_path
...@@ -285,6 +280,3 @@ def detect_with_ocr(video_path, book_path, start_time, end_time, state): ...@@ -285,6 +280,3 @@ def detect_with_ocr(video_path, book_path, start_time, end_time, state):
if __name__ == '__main__': if __name__ == '__main__':
pass pass
# video_path = "D:/heelo/hysxm_1.mp4"
# book_path = '何以笙箫默.xlsx'
# detect_with_ocr(video_path, book_path, 0, 300, [None])
...@@ -10,8 +10,8 @@ ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False) ...@@ -10,8 +10,8 @@ ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
def random_int_list(start, stop, length): def random_int_list(start, stop, length):
""" """在某一段区间内取n个随机数
在某一段区间内取n个随机数
:param start: 随机数区间的最小值 :param start: 随机数区间的最小值
:param stop: 随机数区间的最大值 :param stop: 随机数区间的最大值
:param length: 随机数个数 :param length: 随机数个数
...@@ -30,8 +30,8 @@ def random_int_list(start, stop, length): ...@@ -30,8 +30,8 @@ def random_int_list(start, stop, length):
def detect_subtitle(frame): def detect_subtitle(frame):
""" """判断画面中是否含字幕
判断画面中是否含字幕
:param frame: 视频的某一帧画面 :param frame: 视频的某一帧画面
:return: Ture or False :return: Ture or False
""" """
...@@ -55,10 +55,11 @@ def detect_subtitle(frame): ...@@ -55,10 +55,11 @@ def detect_subtitle(frame):
def detect_movie(video_path, start, end, interval): def detect_movie(video_path, start, end, interval):
""" """使用整部视频进行测试,确定视频是否提供字幕
使用整部视频进行测试,确定视频是否提供字幕
:param video_path: 视频的地址 :param video_path: 视频的地址
:param start: 取随机帧的时间区间的开始时间 :param start: 取随机帧的时间区间的开始时间
:param end: 视频结束时间
:param interval: 取随机帧的每段区间时长,单位为秒 :param interval: 取随机帧的每段区间时长,单位为秒
:return: True or False(视频是否含字幕) :return: True or False(视频是否含字幕)
""" """
...@@ -94,9 +95,3 @@ def detect_movie(video_path, start, end, interval): ...@@ -94,9 +95,3 @@ def detect_movie(video_path, start, end, interval):
if __name__ == '__main__': if __name__ == '__main__':
pass pass
# video_path = r'D:\heelo\hysxm.mp4'
# start_time = time.time()
# start = 90
# interval = 120
# print(detect_movie(video_path, start, interval))
# print(time.time() - start_time)
...@@ -6,7 +6,6 @@ import traceback ...@@ -6,7 +6,6 @@ import traceback
from mttkinter import mtTkinter as tk from mttkinter import mtTkinter as tk
from tkinter import filedialog, ttk, messagebox from tkinter import filedialog, ttk, messagebox
import sys import sys
import io
import os import os
import datetime import datetime
from speech_synthesis import ss_and_export from speech_synthesis import ss_and_export
...@@ -25,34 +24,39 @@ window.iconbitmap("eagle_2.ico") ...@@ -25,34 +24,39 @@ window.iconbitmap("eagle_2.ico")
video_duration = "" video_duration = ""
def create_detail_day(): def create_detail_day() -> str:
"""生成当天日期
:return: [str], 当天日期
"""
daytime = datetime.datetime.now().strftime('day' + '%Y_%m_%d') daytime = datetime.datetime.now().strftime('day' + '%Y_%m_%d')
return daytime return daytime
def make_print_to_file(path='./'): def make_print_to_file(path='./'):
class Logger(object): """将print的内容输出到log文件夹中
def __init__(self, filename="detect_with_ocr.log", path='./'):
:param path:设置的log文件夹路径
:return:
"""
if not os.path.exists(path): if not os.path.exists(path):
os.mkdir(path) os.mkdir(path)
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') filename = create_detail_day() + '.log'
self.terminal = sys.stdout f = open(os.path.join(path, filename), 'a', encoding='utf-8')
self.log = open(os.path.join(path, filename), "a", encoding='utf8') sys.stdout = f
print(path)
def write(self, message):
self.terminal.write(message)
self.log.write(message)
def flush(self):
pass
sys.stdout = Logger(create_detail_day() + '.log', path=path)
class RunThread(threading.Thread):
"""复写线程类,用于解决主线程无法捕捉子线程中异常的问题
"""
# 复写线程,用于解决主线程无法步骤子线程中异常的问题
class RunThread(threading.Thread): # The timer class is derived from the class threading.Thread
def __init__(self, funcName, name, args=()): def __init__(self, funcName, name, args=()):
"""初始化类中的各项数据
:param funcName: 线程调用的函数名
:param name: 线程名
:param args: 传入函数的各项参数
"""
threading.Thread.__init__(self) threading.Thread.__init__(self)
self._args = args self._args = args
self._funcName = funcName self._funcName = funcName
...@@ -62,6 +66,10 @@ class RunThread(threading.Thread): # The timer class is derived from the class ...@@ -62,6 +66,10 @@ class RunThread(threading.Thread): # The timer class is derived from the class
self.exc_traceback = '' self.exc_traceback = ''
def run(self): # Overwrite run() method, put what you want the thread do here def run(self): # Overwrite run() method, put what you want the thread do here
"""运行线程,捕捉错误并更新参数
:return:
"""
try: try:
self._run() self._run()
except Exception as e: except Exception as e:
...@@ -71,12 +79,19 @@ class RunThread(threading.Thread): # The timer class is derived from the class ...@@ -71,12 +79,19 @@ class RunThread(threading.Thread): # The timer class is derived from the class
self.exc_traceback = ''.join(traceback.format_exception(*sys.exc_info())) # 在改成员变量中记录异常信息 self.exc_traceback = ''.join(traceback.format_exception(*sys.exc_info())) # 在改成员变量中记录异常信息
def _run(self): def _run(self):
"""运行函数,并合理抛出异常
:return:
"""
try:
self._funcName(*self._args) self._funcName(*self._args)
except Exception as e:
raise e
def open_video_file(): def open_video_file():
""" """在旁白推荐tab中打开视频文件
打开文件
:return: :return:
""" """
video_path = filedialog.askopenfilename(title=u'选择文件', video_path = filedialog.askopenfilename(title=u'选择文件',
...@@ -99,8 +114,8 @@ def open_video_file(): ...@@ -99,8 +114,8 @@ def open_video_file():
def find_save_file(): def find_save_file():
""" """在旁白推荐tab中找到保存表格的地址
找到保存表格的地址
:return: :return:
""" """
video_path = inputFilePath.get() video_path = inputFilePath.get()
...@@ -118,6 +133,11 @@ def find_save_file(): ...@@ -118,6 +133,11 @@ def find_save_file():
def trans_to_seconds(timePoint): def trans_to_seconds(timePoint):
"""将用户输入的时间字符串转换为秒数
:param timePoint: 时间字符串
:return: [float], 秒数
"""
time_in_seconds = 0 time_in_seconds = 0
timePoints = timePoint.split(':') timePoints = timePoint.split(':')
units = 1 units = 1
...@@ -127,8 +147,12 @@ def trans_to_seconds(timePoint): ...@@ -127,8 +147,12 @@ def trans_to_seconds(timePoint):
return time_in_seconds return time_in_seconds
# 检查时间格式是否正确 def check_timePoint(timePoint) -> bool:
def check_timePoint(timePoint): """检查时间字符串格式是否正确
:param timePoint: 时间字符串
:return: [bool], True or False
"""
timePoints = timePoint.split(":") timePoints = timePoint.split(":")
hms = [0, 0, 0] hms = [0, 0, 0]
# 必须有三个隔断,分别代表小时、分钟和秒 # 必须有三个隔断,分别代表小时、分钟和秒
...@@ -156,8 +180,8 @@ def check_timePoint(timePoint): ...@@ -156,8 +180,8 @@ def check_timePoint(timePoint):
def start_process(p, p_label, state, intervals=100): def start_process(p, p_label, state, intervals=100):
""" """启动进度条
启动进度条
:param p: 进度条组件 :param p: 进度条组件
:param p_label: 进度条对应百分比文本 :param p_label: 进度条对应百分比文本
:param state: 进度条与任务用于通信对齐的变量,代表任务的实际进度 :param state: 进度条与任务用于通信对齐的变量,代表任务的实际进度
...@@ -181,8 +205,8 @@ def start_process(p, p_label, state, intervals=100): ...@@ -181,8 +205,8 @@ def start_process(p, p_label, state, intervals=100):
def start_detect(): def start_detect():
""" """检测旁白
开始检测旁白
:return: :return:
""" """
# 检测各种输入的合理性 # 检测各种输入的合理性
...@@ -229,7 +253,7 @@ def start_detect(): ...@@ -229,7 +253,7 @@ def start_detect():
# 显示进度条及开始检测 # 显示进度条及开始检测
progressbar_1.grid(column=2, row=1, sticky="W") progressbar_1.grid(column=2, row=1, sticky="W")
progress_1.grid(column=3, row=1, sticky="W") progress_1.grid(column=3, row=1, sticky="W")
processState.set("开始检测") processState.set("检测中……")
# 多线程同步进行检测和进度条更新 # 多线程同步进行检测和进度条更新
state = [None] state = [None]
threads = [] threads = []
...@@ -269,14 +293,18 @@ def start_detect(): ...@@ -269,14 +293,18 @@ def start_detect():
# 若不是意外中断,则将进度条的进度拉满到100%,并给出“任务已完成”的提示 # 若不是意外中断,则将进度条的进度拉满到100%,并给出“任务已完成”的提示
processState.set("任务已完成") processState.set("任务已完成")
progressbar_1.stop() progressbar_1.stop()
progressbar_1['value'] = 100 progressbar_1['value'] = 100.0
progress_1['text'] = "100.00%" progress_1['text'] = "100.0%"
# 检测完成后,将“停止检测”按钮设置为不可点击状态,”开始检测“按钮设置为可点击状态 # 检测完成后,将“停止检测”按钮设置为不可点击状态,”开始检测“按钮设置为可点击状态
stopDetection.config(state=tk.DISABLED) stopDetection.config(state=tk.DISABLED)
startDetection.config(state=tk.ACTIVE) startDetection.config(state=tk.ACTIVE)
def stop_detect(): def stop_detect():
"""停止旁白区间检测
:return:
"""
for x in threading.enumerate(): for x in threading.enumerate():
if x.getName() in ["startDetect", "startProgress1", "detect"]: if x.getName() in ["startDetect", "startProgress1", "detect"]:
_async_raise(x.ident, SystemExit) _async_raise(x.ident, SystemExit)
...@@ -291,9 +319,21 @@ def stop_detect(): ...@@ -291,9 +319,21 @@ def stop_detect():
progress_1.grid_forget() progress_1.grid_forget()
def open_sheet_file(): def confirm_video_path():
"""在旁白与字幕导出tab中输入视频路径
:return:
""" """
选择导入的旁白解说脚本表格所在位置 # 仅能打开mp4\rmvb\avi\mkv格式的文件
video_path = filedialog.askopenfilename(title=u'选择文件',
filetypes=[("视频文件", ".avi"), ("视频文件", ".mp4"), ("视频文件", ".rmvb"),
("视频文件", ".mkv")])
videoPath.set(video_path)
def open_sheet_file():
"""选择导入的旁白解说脚本表格所在位置
:return: :return:
""" """
sheet_path = filedialog.askopenfilename(title=u'选择文件', sheet_path = filedialog.askopenfilename(title=u'选择文件',
...@@ -304,8 +344,8 @@ def open_sheet_file(): ...@@ -304,8 +344,8 @@ def open_sheet_file():
def find_save_dir(): def find_save_dir():
""" """寻找存储音频的文件夹
寻找存储音频的文件夹
:return: :return:
""" """
audio_dir = filedialog.askdirectory(title=u'保存文件至') audio_dir = filedialog.askdirectory(title=u'保存文件至')
...@@ -314,8 +354,9 @@ def find_save_dir(): ...@@ -314,8 +354,9 @@ def find_save_dir():
def set_caption_file(): def set_caption_file():
""" """设置字幕文件存储路径
设置字幕文件存储路径(使用存放音频的文件夹作为默认文件夹、旁白表格名作为默认字幕名)
使用存放音频的文件夹作为默认文件夹、旁白表格名作为默认字幕名
:return: :return:
""" """
defaultName = os.path.basename(videoPath.get()).split('.')[0] + ".srt" defaultName = os.path.basename(videoPath.get()).split('.')[0] + ".srt"
...@@ -327,15 +368,12 @@ def set_caption_file(): ...@@ -327,15 +368,12 @@ def set_caption_file():
captionPath.set(caption_path) captionPath.set(caption_path)
def confirm_video_path():
# 仅能打开mp4\rmvb\avi\mkv格式的文件
video_path = filedialog.askopenfilename(title=u'选择文件',
filetypes=[("视频文件", ".avi"), ("视频文件", ".mp4"), ("视频文件", ".rmvb"),
("视频文件", ".mkv")])
videoPath.set(video_path)
def get_sheetHead(book_path): def get_sheetHead(book_path):
"""获取表头
:param book_path: 表格存储路径
:return: [list], 表头信息
"""
workbook = openpyxl.load_workbook(book_path) workbook = openpyxl.load_workbook(book_path)
sheet = workbook.active sheet = workbook.active
rows = sheet.max_row rows = sheet.max_row
...@@ -347,8 +385,13 @@ def get_sheetHead(book_path): ...@@ -347,8 +385,13 @@ def get_sheetHead(book_path):
return sheet_head return sheet_head
def check_sheet_content(sheet_path): def check_sheet_content(book_path):
sheet_heads = get_sheetHead(sheet_path) """检测表头是否符合要求
:param book_path: 表格存储路径
:return:
"""
sheet_heads = get_sheetHead(book_path)
need_heads = ['起始时间', '终止时间', '字幕', '建议', '解说脚本'] need_heads = ['起始时间', '终止时间', '字幕', '建议', '解说脚本']
if len(sheet_heads) == 0: if len(sheet_heads) == 0:
...@@ -360,8 +403,8 @@ def check_sheet_content(sheet_path): ...@@ -360,8 +403,8 @@ def check_sheet_content(sheet_path):
def start_synthesis(): def start_synthesis():
""" """开始合成语音
开始合成语音
:return: :return:
""" """
video_path = videoPath.get() video_path = videoPath.get()
...@@ -433,18 +476,27 @@ def start_synthesis(): ...@@ -433,18 +476,27 @@ def start_synthesis():
if t.exitcode != 0: if t.exitcode != 0:
print("Exception in", t.getName()) print("Exception in", t.getName())
messagebox.showerror("错误", "运行出错,请联系开发者处理") messagebox.showerror("错误", "运行出错,请联系开发者处理")
processState.set("任务中断") processState_2.set("任务中断")
progress_state = progressbar_2['value'] progress_state = progressbar_2['value']
progressbar_2.stop() progressbar_2.stop()
progressbar_2['value'] = progress_state progressbar_2['value'] = progress_state
startSynthesis.config(state=tk.ACTIVE) startSynthesis.config(state=tk.ACTIVE)
stopSynthesis.config(state=tk.DISABLED) stopSynthesis.config(state=tk.DISABLED)
return return
processState_2.set("任务完成")
progressbar_2.stop()
progressbar_2['value'] = 100.0
progress_2['text'] = "100.00%"
startSynthesis.config(state=tk.ACTIVE) startSynthesis.config(state=tk.ACTIVE)
stopSynthesis.config(state=tk.DISABLED) stopSynthesis.config(state=tk.DISABLED)
def stop_synthesis(): def stop_synthesis():
"""停止合成
:return:
"""
for x in threading.enumerate(): for x in threading.enumerate():
if x.getName() in ["startSynthesis", "startProgress2", "ssAndExport"]: if x.getName() in ["startSynthesis", "startProgress2", "ssAndExport"]:
_async_raise(x.ident, SystemExit) _async_raise(x.ident, SystemExit)
...@@ -460,6 +512,13 @@ def stop_synthesis(): ...@@ -460,6 +512,13 @@ def stop_synthesis():
def thread_it(func, *args, name): def thread_it(func, *args, name):
"""创建守护线程
:param func: 待执行的函数名
:param args: 函数所需参数
:param name: 线程名
:return:
"""
# 创建线程 # 创建线程
t = threading.Thread(target=func, args=args, name=name) t = threading.Thread(target=func, args=args, name=name)
# 守护 # 守护
...@@ -469,8 +528,8 @@ def thread_it(func, *args, name): ...@@ -469,8 +528,8 @@ def thread_it(func, *args, name):
def _async_raise(tid, exctype): def _async_raise(tid, exctype):
""" """终结线程
终结线程
:param tid: 线程id :param tid: 线程id
:param exctype: 关闭方式 :param exctype: 关闭方式
:return: :return:
...@@ -486,12 +545,6 @@ def _async_raise(tid, exctype): ...@@ -486,12 +545,6 @@ def _async_raise(tid, exctype):
raise SystemError("PyThreadState_SetAsyncExc failed") raise SystemError("PyThreadState_SetAsyncExc failed")
def _quit():
window.quit()
window.destroy()
exit()
# 创建tab栏 # 创建tab栏
tabControl = ttk.Notebook(window) tabControl = ttk.Notebook(window)
...@@ -589,7 +642,7 @@ stopDetection.config(state=tk.DISABLED) ...@@ -589,7 +642,7 @@ stopDetection.config(state=tk.DISABLED)
语音相关设置,包含以下内容: 语音相关设置,包含以下内容:
- 原视频|视频路径|上传文件按钮 - 原视频|视频路径|上传文件按钮
- 旁白脚本表格|表格路径|上传文件按钮 - 旁白脚本表格|表格路径|上传文件按钮
- 旁白语速选择 - 旁白语速选择|语速选项
""" """
audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ") audio_info = ttk.LabelFrame(tab2, text=" 语音相关设置 ")
audio_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.3) audio_info.place(relx=0.05, rely=0.05, relwidth=0.9, relheight=0.3)
...@@ -669,8 +722,11 @@ stopSynthesis.grid(column=0, row=3) ...@@ -669,8 +722,11 @@ stopSynthesis.grid(column=0, row=3)
stopSynthesis.config(state=tk.DISABLED) stopSynthesis.config(state=tk.DISABLED)
# 用户点击关闭时进行询问
def on_closing(): def on_closing():
"""弹窗询问是否确认关闭
:return:
"""
if messagebox.askokcancel("提示", "您确定想要退出该程序吗?"): if messagebox.askokcancel("提示", "您确定想要退出该程序吗?"):
window.destroy() window.destroy()
......
# -*- mode: python ; coding: utf-8 -*-
block_cipher = None
a = Analysis(['main_gui.py'],
pathex=[],
binaries=[],
datas=[],
hiddenimports=['astor','distutils','imgaug','lmdb','pyclipper','pywt','scipy','setuptools','shapely','skimage'],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
excludes=['PyQt5','google.api_core','google.cloud','google.cloud.storage','googleapiclient'],
win_no_prefer_redirects=False,
win_private_assemblies=False,
cipher=block_cipher,
noarchive=False)
pyz = PYZ(a.pure, a.zipped_data,
cipher=block_cipher)
exe = EXE(pyz,
a.scripts,
[],
exclude_binaries=True,
name='无障碍电影制作工具',
debug=False,
bootloader_ignore_signals=False,
strip=False,
upx=True,
console=True,
disable_windowed_traceback=False,
target_arch=None,
codesign_identity=None,
entitlements_file=None , icon='D:\\AddCaption\\accessibility_movie\\eagle_2.ico')
coll = COLLECT(exe,
a.binaries,
a.zipfiles,
a.datas,
strip=False,
upx=True,
upx_exclude=['vcruntime140.dll'],
name='main_gui')
...@@ -5,7 +5,19 @@ import time ...@@ -5,7 +5,19 @@ import time
def detect(video_path, start_time, end_time, book_path, state, subtitle=None): def detect(video_path, start_time, end_time, book_path, state, subtitle=None):
"""字幕及旁白区间检测
:param video_path: 待检测视频
:param start_time: 视频开始时间
:param end_time: 视频结束时间
:param book_path: 存放旁白的表格存储位置
:param state: 任务进行状态
:param subtitle: 视频是否有字幕
:return:
"""
print("开始检测") print("开始检测")
print("start_time", start_time)
print("end_time", end_time)
if book_path is None: if book_path is None:
book_path = os.path.basename(video_path).split('.')[0] + ".xlsx" book_path = os.path.basename(video_path).split('.')[0] + ".xlsx"
else: else:
......
...@@ -16,8 +16,8 @@ ffmpeg_path = r'.\ffmpeg-4.3.1\bin\ffmpeg' ...@@ -16,8 +16,8 @@ ffmpeg_path = r'.\ffmpeg-4.3.1\bin\ffmpeg'
def speech_synthesis(text, output_file, speed): def speech_synthesis(text, output_file, speed):
""" """用于合成讲解音频并输出
用于合成讲解音频并输出
:param text: 解说文本 :param text: 解说文本
:param output_file: 输出文件路径 :param output_file: 输出文件路径
:param speed: 指定的音频语速,默认为1.0 :param speed: 指定的音频语速,默认为1.0
...@@ -54,8 +54,8 @@ def speech_synthesis(text, output_file, speed): ...@@ -54,8 +54,8 @@ def speech_synthesis(text, output_file, speed):
def change_speed(wav_path, speed=1.0): def change_speed(wav_path, speed=1.0):
""" """调整语速
调整语速
:param wav_path: 原音频路径 :param wav_path: 原音频路径
:param speed: 转换后的语速 :param speed: 转换后的语速
:return: :return:
...@@ -65,7 +65,8 @@ def change_speed(wav_path, speed=1.0): ...@@ -65,7 +65,8 @@ def change_speed(wav_path, speed=1.0):
def read_sheet(book_path, sheet_name=None): def read_sheet(book_path, sheet_name=None):
""" """读表
从表格中读出所有的内容,用dict保存(表格的格式固定,第一行为表头(起始时间|终止时间|字幕|建议|解说脚本)) 从表格中读出所有的内容,用dict保存(表格的格式固定,第一行为表头(起始时间|终止时间|字幕|建议|解说脚本))
:param book_path: 表格的存储路径 :param book_path: 表格的存储路径
:param sheet_name: 想要读取的表在excel表格中的名字(可选项) :param sheet_name: 想要读取的表在excel表格中的名字(可选项)
...@@ -87,12 +88,13 @@ def read_sheet(book_path, sheet_name=None): ...@@ -87,12 +88,13 @@ def read_sheet(book_path, sheet_name=None):
def get_narratage_text(sheet_content, speed): def get_narratage_text(sheet_content, speed):
""" """获取旁白解说文本及起止时间
根据从表格中获取到的内容,分析得到解说文本+对应开始时间
:param sheet_content: dict,keys=["起始时间","终止时间","字幕","建议","解说脚本"] :param sheet_content: [dict],keys=["起始时间","终止时间","字幕","建议","解说脚本"]
:param speed: float, 旁白语速 :param speed: [float], 旁白语速
:return: narratage_text: list, 旁白文本, :return: narratage_text: [list], 旁白文本,
narratage_start_time: list, 旁白对应开始时间 narratage_start_time: [list], 旁白对应开始时间
narratage_end_time: [list], 旁白对应结束时间
""" """
narratage = sheet_content['解说脚本'] narratage = sheet_content['解说脚本']
subtitle = sheet_content['字幕'] subtitle = sheet_content['字幕']
...@@ -134,6 +136,11 @@ def get_narratage_text(sheet_content, speed): ...@@ -134,6 +136,11 @@ def get_narratage_text(sheet_content, speed):
def second_to_str(seconds): def second_to_str(seconds):
"""秒数转字符串
:param seconds:秒数
:return: [str], ’时:分:秒‘格式的时间字符串
"""
seconds = float(seconds) seconds = float(seconds)
hour = int(seconds / 3600) hour = int(seconds / 3600)
minute = int((seconds - hour * 3600) / 60) minute = int((seconds - hour * 3600) / 60)
...@@ -144,8 +151,8 @@ def second_to_str(seconds): ...@@ -144,8 +151,8 @@ def second_to_str(seconds):
def export_caption(sheet_content, caption_file): def export_caption(sheet_content, caption_file):
""" """将用户校正后的字幕输出为字幕文件(srt格式)
将用户校正后的字幕输出为字幕文件(srt格式)
:param sheet_content: 用户校正后的表格内容 :param sheet_content: 用户校正后的表格内容
:return: :return:
""" """
...@@ -164,6 +171,13 @@ def export_caption(sheet_content, caption_file): ...@@ -164,6 +171,13 @@ def export_caption(sheet_content, caption_file):
def adjust_volume(origin, start_timestamp, end_timestamp): def adjust_volume(origin, start_timestamp, end_timestamp):
"""调整原音频中待插入旁白位置的音量
:param origin: 原音频存储位置
:param start_timestamp: 旁白开始时间
:param end_timestamp: 旁白结束时间
:return:
"""
global adjusted_wav_path global adjusted_wav_path
adjusted_wav_path = os.path.join(os.path.dirname(origin), adjusted_wav_path) adjusted_wav_path = os.path.join(os.path.dirname(origin), adjusted_wav_path)
n = len(start_timestamp) n = len(start_timestamp)
...@@ -177,6 +191,13 @@ def adjust_volume(origin, start_timestamp, end_timestamp): ...@@ -177,6 +191,13 @@ def adjust_volume(origin, start_timestamp, end_timestamp):
def mix_speech(origin, narratage_paths, start_timestamps): def mix_speech(origin, narratage_paths, start_timestamps):
"""将合成音频与原音频混合
:param origin: 原音频存储位置
:param narratage_paths: 旁白音频的存储位置
:param start_timestamps: 旁白音频的开始时间
:return:
"""
composed_wav_path = os.path.join(os.path.dirname(origin), "composed.wav") composed_wav_path = os.path.join(os.path.dirname(origin), "composed.wav")
command_line = '{} -i {}'.format(ffmpeg_path, origin) command_line = '{} -i {}'.format(ffmpeg_path, origin)
for i, narratage_path in enumerate(narratage_paths): for i, narratage_path in enumerate(narratage_paths):
...@@ -191,8 +212,8 @@ def mix_speech(origin, narratage_paths, start_timestamps): ...@@ -191,8 +212,8 @@ def mix_speech(origin, narratage_paths, start_timestamps):
def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state=None): def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state=None):
""" """生成语音并导出字幕
生成语音并导出字幕
:param video_path: 原视频的位置 :param video_path: 原视频的位置
:param sheet_path: 校对过的旁白脚本表格文件 :param sheet_path: 校对过的旁白脚本表格文件
:param output_dir: 存放音频文件的文件夹 :param output_dir: 存放音频文件的文件夹
...@@ -250,24 +271,4 @@ def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state ...@@ -250,24 +271,4 @@ def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state
if __name__ == '__main__': if __name__ == '__main__':
# 定义参数
# parser = argparse.ArgumentParser(description='Speech Synthesis guideness')
# parser.add_argument("--video_path", required=True, type=str, help="原视频位置")
# parser.add_argument("--output_dir", required=True, type=str, help="音频输出位置路径")
# parser.add_argument("--sheet_path", required=True, type=str, help='旁白解说表格存储路径')
# parser.add_argument("--caption_file", required=True, type=str, help="输出的字幕文件存储路径")
# parser.add_argument("--speed", type=float, default=1.0, help="设置语速,默认为1.0")
# args = parser.parse_args()
# video_path, sheet_path, output_dir, speed, caption_file = args.video_path,\
# args.sheet_path, args.output_dir, args.speed, args.caption_file
# video_path = 'D:/heelo/hysxm_3.mp4'
# sheet_path = 'D:/heelo/hysxm_3.xlsx'
# output_dir = 'D:/AddCaption/hysxm_3'
# speed = 1.25
# caption_file = 'D:/AddCaption/hysxm_3/hysxm_3.srt'
#
# # 主函数执行
# ss_and_export(video_path=video_path, sheet_path=sheet_path, output_dir=output_dir, speed=speed,
# caption_file=caption_file)
pass pass
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment