Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
accessibility_movie_1
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
翟艳秋(20软)
accessibility_movie_1
Commits
6cd70d8a
Commit
6cd70d8a
authored
Mar 07, 2022
by
翟艳秋(20软)
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1. [add] 添加docstring,做好代码规范化;
2. [add] 添加main_gui.spec(打包参数文件); 3. [modified] 修改旁白检测表格最后一行旁白推荐字数的计算方式; 4. [modified] 修改任务执行后的进度条显示状态。
parent
efcd6148
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
286 additions
and
269 deletions
+286
-269
.gitignore
.gitignore
+3
-0
infer_path.py
PaddlePaddle_DeepSpeech2/infer_path.py
+9
-73
detect_with_asr.py
detect_with_asr.py
+19
-41
detect_with_ocr.py
detect_with_ocr.py
+38
-46
judge_subtitle.py
judge_subtitle.py
+7
-12
main_gui.py
main_gui.py
+118
-62
main_gui.spec
main_gui.spec
+44
-0
narratage_detection.py
narratage_detection.py
+12
-0
speech_synthesis.py
speech_synthesis.py
+36
-35
No files found.
.gitignore
View file @
6cd70d8a
/dist
/build
/missing_packages
/aborted_icons
/无障碍电影制作工具(无黑窗口).zip
/无障碍电影制作工具(有黑窗口).zip
PaddlePaddle_DeepSpeech2/infer_path.py
View file @
6cd70d8a
...
...
@@ -13,36 +13,17 @@ import os
normal_speed
=
4
# from data_utils.audio_process import AudioInferProcess
# from utils.predict import Predictor
# from utils.audio_vad import crop_audio_vad
# from utils.utility import add_arguments, print_arguments
# parser = argparse.ArgumentParser(description=__doc__)
# add_arg = functools.partial(add_arguments, argparser=parser)
# add_arg('wav_path', str, './dataset/test.wav', "预测音频的路径")
# add_arg('is_long_audio', bool, False, "是否为长语音")
# add_arg('use_gpu', bool, False, "是否使用GPU预测")
# add_arg('enable_mkldnn', bool, False, "是否使用mkldnn加速")
# add_arg('to_an', bool, True, "是否转为阿拉伯数字")
# add_arg('beam_size', int, 300, "集束搜索解码相关参数,搜索的大小,范围:[5, 500]")
# add_arg('alpha', float, 1.2, "集束搜索解码相关参数,LM系数")
# add_arg('beta', float, 0.35, "集束搜索解码相关参数,WC系数")
# add_arg('cutoff_prob', float, 0.99, "集束搜索解码相关参数,剪枝的概率")
# add_arg('cutoff_top_n', int, 40, "集束搜索解码相关参数,剪枝的最大值")
# add_arg('mean_std_path', str, './PaddlePaddle_DeepSpeech2/dataset/mean_std.npz', "数据集的均值和标准值的npy文件路径")
# add_arg('vocab_path', str, './PaddlePaddle_DeepSpeech2/dataset/zh_vocab.txt', "数据集的词汇表文件路径")
# add_arg('model_dir', str, './PaddlePaddle_DeepSpeech2/models/infer/', "导出的预测模型文件夹路径")
# add_arg('lang_model_path', str, './PaddlePaddle_DeepSpeech2/lm/zh_giga.no_cna_cmn.prune01244.klm',
# "集束搜索解码相关参数,语言模型文件路径")
# add_arg('decoding_method', str, 'ctc_greedy', "结果解码方法,有集束搜索(ctc_beam_search)、贪婪策略(ctc_greedy)",
# choices=['ctc_beam_search', 'ctc_greedy'])
# args = parser.parse_args()
# print_arguments(args)
# 使用paddle deepspeech进行语音识别
def
predict_long_audio_with_paddle
(
wav_path
,
pre_time
,
book_name
,
sheet_name
,
state
):
"""使用paddle deepspeech进行语音识别
:param wav_path: 音频路径
:param pre_time: 音频前置时间(即视频开始位置)
:param book_name: 旁白存储表格路径
:param sheet_name: excel表格中的表名
:param state: 用于通信的状态关键字
:return:
"""
# 获取数据生成器,处理数据和获取字典需要
vocab_path
=
'./PaddlePaddle_DeepSpeech2/dataset/zh_vocab.txt'
mean_std_path
=
'./PaddlePaddle_DeepSpeech2/dataset/mean_std.npz'
...
...
@@ -106,51 +87,6 @@ def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, st
if
os
.
path
.
exists
(
save_path
):
shutil
.
rmtree
(
save_path
)
return
narratages
# # 使用网上已有的模型进行识别(效果差)
# def predict_audio_with_paddle():
# start = time.time()
# text = asr_executor(
# model='conformer_wenetspeech',
# lang='zh',
# sample_rate=16000,
# config=None, # Set `config` and `ckpt_path` to None to use pretrained model.
# ckpt_path=None,
# audio_file=args.wav_path,
# force_yes=False,
# device=paddle.get_device()
# )
# print("消耗时间:%dms, 识别结果: %s" % (round((time.time() - start) * 1000), text))
#
#
# def predict_long_audio():
# start = time.time()
# # 分割长音频
# audios_path = crop_audio_vad(args.wav_path)
# texts = ''
# scores = []
# # 执行识别
# for i, audio_path in enumerate(audios_path):
# score, text = predictor.predict(audio_path=audio_path, to_an=args.to_an)
# texts = texts + ',' + text
# scores.append(score)
# print("第%d个分割音频, 得分: %d, 识别结果: %s" % (i, score, text))
# print("最终结果,消耗时间:%d, 得分: %d, 识别结果: %s" % (round((time.time() - start) * 1000), sum(scores) / len(scores), texts))
#
#
# def predict_audio():
# start = time.time()
# score, text = predictor.predict(audio_path=args.wav_path, to_an=args.to_an)
# print("消耗时间:%dms, 识别结果: %s, 得分: %d" % (round((time.time() - start) * 1000), text, score))
if
__name__
==
"__main__"
:
# if args.is_long_audio:
# # predict_long_audio()
# predict_long_audio_with_paddle()
# else:
# # predict_audio()
# predict_audio_with_paddle()
pass
detect_with_asr.py
View file @
6cd70d8a
...
...
@@ -9,11 +9,11 @@ from split_wav import *
def
create_sheet
(
path
,
sheet_name
,
value
):
"""
根据给定的表头,初始化表格,
:param path:
str
, 表格(book)的存储位置
:param sheet_name:
str
, 表(sheet)的名字
:param value:
list
, 表头内容为['起始时间','终止时间','字幕','建议','旁边解说脚本']
"""
根据给定的表头,初始化表格
:param path:
[str]
, 表格(book)的存储位置
:param sheet_name:
[str]
, 表(sheet)的名字
:param value:
[list]
, 表头内容为['起始时间','终止时间','字幕','建议','旁边解说脚本']
:return: None
"""
index
=
len
(
value
)
...
...
@@ -30,11 +30,11 @@ def create_sheet(path, sheet_name, value):
def
write_to_sheet
(
path
,
sheet_name
,
value
):
"""
向已存在的表格中写入数据
:param path:
:param sheet_name:
:param value:
"""
向已存在的表格中写入数据
:param path:
表格存储位置
:param sheet_name:
excel表内的表名
:param value:
插入数据
:return:
"""
index
=
len
(
value
)
...
...
@@ -50,32 +50,16 @@ def write_to_sheet(path, sheet_name, value):
workbook
.
save
(
path
)
def
trans_to_mono
(
wav_path
):
"""
将音频的通道数channel转换为1
:param wav_path: str, 需要转换的音频地址
:return: new_wav_path: str, 转换后得到的新音频地址
"""
new_wav_path
=
wav_path
[:
-
4
]
+
"_1.wav"
command
=
'ffmpeg -i {} -ac 1 -y {}'
.
format
(
wav_path
,
new_wav_path
)
os
.
system
(
command
)
return
new_wav_path
def
concat_wav
(
root
):
txt_path
=
os
.
path
.
join
(
root
,
'list.txt'
)
with
open
(
txt_path
,
'w'
,
encoding
=
'utf-8'
)
as
f
:
for
file_name
in
os
.
listdir
(
root
):
if
os
.
path
.
isdir
(
os
.
path
.
join
(
root
,
file_name
)):
wav_path
=
os
.
path
.
join
(
root
,
file_name
)
+
"/vocal.wav"
f
.
write
(
"file
\'
"
+
wav_path
+
"
\'\n
"
)
output_file
=
os
.
path
.
join
(
root
,
'total.wav'
)
command
=
'ffmpeg -f concat -safe 0 -i {} -y {}'
.
format
(
txt_path
,
output_file
)
os
.
system
(
command
)
return
output_file
def
detect_with_asr
(
video_path
,
book_path
,
start_time
=
0
,
end_time
=-
1
,
state
=
None
):
"""使用ASR检测视频中的字幕并推荐旁白
:param video_path: 待处理视频地址
:param book_path: 旁白表格输出地址
:param start_time: 视频实际开始时间
:param end_time: 视频实际结束时间
:param state: 用于通信的状态关键字
:return:
"""
# 临时存储各种中间产物的文件夹
tmp_root
=
os
.
path
.
join
(
os
.
path
.
dirname
(
video_path
),
'tmp'
)
print
(
tmp_root
)
...
...
@@ -87,12 +71,6 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1, state=None
return
# 提取出视频中的音频,分割后提取出其中的人声部分并存储
audio_path
=
extract_audio
(
video_path
,
tmp_root
,
start_time
,
end_time
)
# root = split_audio()
# extract_speech()
#
# # 将提取出的人声拼接,并将音频的channel调整为1
# total_wav_path = concat_wav(root)
# audio_path = trans_to_mono(total_wav_path)
# xlsx中的表格名为“旁白插入位置建议”
if
os
.
path
.
exists
(
book_path
):
...
...
detect_with_ocr.py
View file @
6cd70d8a
...
...
@@ -21,10 +21,11 @@ normal_speed = 4
def
get_position
(
video_path
,
start_time
):
"""
根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
"""根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
:param start_time: 视频实际开始时间
:param video_path: 视频存储路径
:return: 字幕在整个画面中的实际上下边界位置
:return:
[float,float],
字幕在整个画面中的实际上下边界位置
"""
video
=
cv2
.
VideoCapture
(
video_path
)
subtitle_position
=
{}
...
...
@@ -90,17 +91,22 @@ def get_position(video_path, start_time):
def
erasePunc
(
txt
):
"""去除字符串中的非中文字符
:param txt: 待处理字符串
:return: [str], 处理后的字符串
"""
pattern
=
re
.
compile
(
r'[^\u4e00-\u9fa5]'
)
txt
=
re
.
sub
(
pattern
,
''
,
txt
)
return
txt
def
string_similar
(
s1
,
s2
):
"""
比较字符串s1和s2的相似度,主要用于减少输出文件中相似字幕的重复
:param s1:
:param s2:
:return: 字符串间的相似度
"""
比较字符串s1和s2的相似度,主要用于减少输出文件中相似字幕的重复
:param s1:
第一个字符串
:param s2:
第二个字符串
:return:
[float],
字符串间的相似度
"""
# 去除非中文字符后,再比较相似度
s1
=
erasePunc
(
s1
)
...
...
@@ -109,8 +115,10 @@ def string_similar(s1, s2):
def
normalize
(
text
):
"""
用于规范化处理文本中的一些标点符号
"""规范化处理文本中的一些标点符号
:param text: 待处理字符串
:return: 处理后的字符串
"""
# 将英文标点转换为中文标点
E_pun
=
u',.!?()[]:;'
...
...
@@ -127,10 +135,10 @@ def normalize(text):
def
detect_subtitle
(
img
):
"""
检测当前画面得到字幕信息
"""
检测当前画面得到字幕信息
:param img: 当前画面
:return: 字幕信息
:return:
[str|None],
字幕信息
"""
subTitle
=
''
img
=
img
[
int
(
up_b
)
-
30
:
int
(
down_b
)
+
30
]
...
...
@@ -164,8 +172,8 @@ def detect_subtitle(img):
def
process_video
(
video_path
,
begin
,
end
,
book_path
,
sheet_name
,
state
):
"""
处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
"""
处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
:param video_path: 待处理视频的路径
:param begin: 电影的实际开始位置(秒)
:param end: 电影除演职表外的实际结束位置(秒)
...
...
@@ -179,8 +187,6 @@ def process_video(video_path, begin, end, book_path, sheet_name, state):
lastSubTitle
=
None
# res是在视频遍历过程中获取的字幕文件,不掺杂对旁白的分析
res
=
[]
# narratage_recommand是旁白推荐信息,用于输出为表格
narratage_recommend
=
[]
cnt
=
0
start_time
=
0
end_time
=
0
...
...
@@ -236,38 +242,27 @@ def process_video(video_path, begin, end, book_path, sheet_name, state):
if
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
>
end
:
if
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
-
end_time
>
1
:
print
(
'--------------------------------------------------'
)
recommend_lens
=
int
(
res
[
-
1
][
0
]
*
normal_speed
)
if
len
(
res
)
==
1
else
int
(
(
res
[
-
1
][
0
]
-
res
[
-
2
][
1
])
*
normal_speed
)
# 还没有字幕被分析出来
# if len(res) == 0:
recommend_lens
=
int
((
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
-
end_time
)
*
normal_speed
)
# else:
# recommend_lens = int(res[-1][0] * normal_speed) if len(res) == 1 else int(
# (res[-1][0] - res[-2][1]) * normal_speed)
# narratage_recommend.append(['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
write_to_sheet
(
book_path
,
sheet_name
,
[
''
,
''
,
''
,
'插入旁白,推荐字数为
%
d'
%
recommend_lens
])
break
return
narratage_recommend
# def write_excel_xlsx(path, sheet_name, value):
# """
# 将旁白推荐信息输出表格
# :param path: 输出表格的存储路径
# :param sheet_name:表格中的表名
# :param value:输出到表格中的信息
# :return:
# """
# index = len(value)
# workbook = Workbook()
# sheet = workbook.active
# sheet.title = sheet_name
# # 将字幕对应的那一列扩宽一些
# sheet.column_dimensions['C'].width = 50
# sheet.column_dimensions['D'].width = 30
# for i in range(0, index):
# for j in range(0, len(value[i])):
# sheet.cell(row=i + 1, column=j + 1, value=str(value[i][j])).alignment = Alignment(wrapText=True)
# if value[i][j] == '' or '插入旁白' in str(value[i][j]) or value[i][j] == '翻译':
# sheet.cell(row=i + 1, column=j + 1).fill = PatternFill(fill_type='solid', fgColor='ffff00')
# workbook.save(path)
def
detect_with_ocr
(
video_path
,
book_path
,
start_time
,
end_time
,
state
):
""" 使用ocr检测视频获取字幕并输出旁白推荐
:param video_path: 待处理视频地址
:param book_path: 表格存储位置
:param start_time: 视频实际开始时间
:param end_time: 视频实际结束时见
:param state: 用于通信的状态关键字
:return:
"""
if
os
.
path
.
exists
(
book_path
):
os
.
remove
(
book_path
)
book_name_xlsx
=
book_path
...
...
@@ -285,6 +280,3 @@ def detect_with_ocr(video_path, book_path, start_time, end_time, state):
if
__name__
==
'__main__'
:
pass
# video_path = "D:/heelo/hysxm_1.mp4"
# book_path = '何以笙箫默.xlsx'
# detect_with_ocr(video_path, book_path, 0, 300, [None])
judge_subtitle.py
View file @
6cd70d8a
...
...
@@ -10,8 +10,8 @@ ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)
def
random_int_list
(
start
,
stop
,
length
):
"""
在某一段区间内取n个随机数
"""
在某一段区间内取n个随机数
:param start: 随机数区间的最小值
:param stop: 随机数区间的最大值
:param length: 随机数个数
...
...
@@ -30,8 +30,8 @@ def random_int_list(start, stop, length):
def
detect_subtitle
(
frame
):
"""
判断画面中是否含字幕
"""
判断画面中是否含字幕
:param frame: 视频的某一帧画面
:return: Ture or False
"""
...
...
@@ -55,10 +55,11 @@ def detect_subtitle(frame):
def
detect_movie
(
video_path
,
start
,
end
,
interval
):
"""
使用整部视频进行测试,确定视频是否提供字幕
"""
使用整部视频进行测试,确定视频是否提供字幕
:param video_path: 视频的地址
:param start: 取随机帧的时间区间的开始时间
:param end: 视频结束时间
:param interval: 取随机帧的每段区间时长,单位为秒
:return: True or False(视频是否含字幕)
"""
...
...
@@ -94,9 +95,3 @@ def detect_movie(video_path, start, end, interval):
if
__name__
==
'__main__'
:
pass
# video_path = r'D:\heelo\hysxm.mp4'
# start_time = time.time()
# start = 90
# interval = 120
# print(detect_movie(video_path, start, interval))
# print(time.time() - start_time)
main_gui.py
View file @
6cd70d8a
...
...
@@ -6,7 +6,6 @@ import traceback
from
mttkinter
import
mtTkinter
as
tk
from
tkinter
import
filedialog
,
ttk
,
messagebox
import
sys
import
io
import
os
import
datetime
from
speech_synthesis
import
ss_and_export
...
...
@@ -25,34 +24,39 @@ window.iconbitmap("eagle_2.ico")
video_duration
=
""
def
create_detail_day
():
def
create_detail_day
()
->
str
:
"""生成当天日期
:return: [str], 当天日期
"""
daytime
=
datetime
.
datetime
.
now
()
.
strftime
(
'day'
+
'
%
Y_
%
m_
%
d'
)
return
daytime
def
make_print_to_file
(
path
=
'./'
):
class
Logger
(
object
):
def
__init__
(
self
,
filename
=
"detect_with_ocr.log"
,
path
=
'./'
):
if
not
os
.
path
.
exists
(
path
):
os
.
mkdir
(
path
)
sys
.
stdout
=
io
.
TextIOWrapper
(
sys
.
stdout
.
buffer
,
encoding
=
'utf-8'
)
self
.
terminal
=
sys
.
stdout
self
.
log
=
open
(
os
.
path
.
join
(
path
,
filename
),
"a"
,
encoding
=
'utf8'
)
print
(
path
)
def
write
(
self
,
message
):
self
.
terminal
.
write
(
message
)
self
.
log
.
write
(
message
)
"""将print的内容输出到log文件夹中
def
flush
(
self
):
pass
:param path:设置的log文件夹路径
:return:
"""
if
not
os
.
path
.
exists
(
path
):
os
.
mkdir
(
path
)
filename
=
create_detail_day
()
+
'.log'
f
=
open
(
os
.
path
.
join
(
path
,
filename
),
'a'
,
encoding
=
'utf-8'
)
sys
.
stdout
=
f
sys
.
stdout
=
Logger
(
create_detail_day
()
+
'.log'
,
path
=
path
)
class
RunThread
(
threading
.
Thread
):
"""复写线程类,用于解决主线程无法捕捉子线程中异常的问题
"""
# 复写线程,用于解决主线程无法步骤子线程中异常的问题
class
RunThread
(
threading
.
Thread
):
# The timer class is derived from the class threading.Thread
def
__init__
(
self
,
funcName
,
name
,
args
=
()):
"""初始化类中的各项数据
:param funcName: 线程调用的函数名
:param name: 线程名
:param args: 传入函数的各项参数
"""
threading
.
Thread
.
__init__
(
self
)
self
.
_args
=
args
self
.
_funcName
=
funcName
...
...
@@ -62,6 +66,10 @@ class RunThread(threading.Thread): # The timer class is derived from the class
self
.
exc_traceback
=
''
def
run
(
self
):
# Overwrite run() method, put what you want the thread do here
"""运行线程,捕捉错误并更新参数
:return:
"""
try
:
self
.
_run
()
except
Exception
as
e
:
...
...
@@ -71,12 +79,19 @@ class RunThread(threading.Thread): # The timer class is derived from the class
self
.
exc_traceback
=
''
.
join
(
traceback
.
format_exception
(
*
sys
.
exc_info
()))
# 在改成员变量中记录异常信息
def
_run
(
self
):
self
.
_funcName
(
*
self
.
_args
)
"""运行函数,并合理抛出异常
:return:
"""
try
:
self
.
_funcName
(
*
self
.
_args
)
except
Exception
as
e
:
raise
e
def
open_video_file
():
"""
打开文件
"""
在旁白推荐tab中打开视频文件
:return:
"""
video_path
=
filedialog
.
askopenfilename
(
title
=
u'选择文件'
,
...
...
@@ -99,8 +114,8 @@ def open_video_file():
def
find_save_file
():
"""
找到保存表格的地址
"""
在旁白推荐tab中找到保存表格的地址
:return:
"""
video_path
=
inputFilePath
.
get
()
...
...
@@ -118,6 +133,11 @@ def find_save_file():
def
trans_to_seconds
(
timePoint
):
"""将用户输入的时间字符串转换为秒数
:param timePoint: 时间字符串
:return: [float], 秒数
"""
time_in_seconds
=
0
timePoints
=
timePoint
.
split
(
':'
)
units
=
1
...
...
@@ -127,8 +147,12 @@ def trans_to_seconds(timePoint):
return
time_in_seconds
# 检查时间格式是否正确
def
check_timePoint
(
timePoint
):
def
check_timePoint
(
timePoint
)
->
bool
:
"""检查时间字符串格式是否正确
:param timePoint: 时间字符串
:return: [bool], True or False
"""
timePoints
=
timePoint
.
split
(
":"
)
hms
=
[
0
,
0
,
0
]
# 必须有三个隔断,分别代表小时、分钟和秒
...
...
@@ -156,8 +180,8 @@ def check_timePoint(timePoint):
def
start_process
(
p
,
p_label
,
state
,
intervals
=
100
):
"""
启动进度条
"""
启动进度条
:param p: 进度条组件
:param p_label: 进度条对应百分比文本
:param state: 进度条与任务用于通信对齐的变量,代表任务的实际进度
...
...
@@ -181,8 +205,8 @@ def start_process(p, p_label, state, intervals=100):
def
start_detect
():
"""
开始检测旁白
"""
检测旁白
:return:
"""
# 检测各种输入的合理性
...
...
@@ -229,7 +253,7 @@ def start_detect():
# 显示进度条及开始检测
progressbar_1
.
grid
(
column
=
2
,
row
=
1
,
sticky
=
"W"
)
progress_1
.
grid
(
column
=
3
,
row
=
1
,
sticky
=
"W"
)
processState
.
set
(
"
开始检测
"
)
processState
.
set
(
"
检测中……
"
)
# 多线程同步进行检测和进度条更新
state
=
[
None
]
threads
=
[]
...
...
@@ -269,14 +293,18 @@ def start_detect():
# 若不是意外中断,则将进度条的进度拉满到100%,并给出“任务已完成”的提示
processState
.
set
(
"任务已完成"
)
progressbar_1
.
stop
()
progressbar_1
[
'value'
]
=
100
progress_1
[
'text'
]
=
"100.0
0
%
"
progressbar_1
[
'value'
]
=
100
.0
progress_1
[
'text'
]
=
"100.0
%
"
# 检测完成后,将“停止检测”按钮设置为不可点击状态,”开始检测“按钮设置为可点击状态
stopDetection
.
config
(
state
=
tk
.
DISABLED
)
startDetection
.
config
(
state
=
tk
.
ACTIVE
)
def
stop_detect
():
"""停止旁白区间检测
:return:
"""
for
x
in
threading
.
enumerate
():
if
x
.
getName
()
in
[
"startDetect"
,
"startProgress1"
,
"detect"
]:
_async_raise
(
x
.
ident
,
SystemExit
)
...
...
@@ -291,9 +319,21 @@ def stop_detect():
progress_1
.
grid_forget
()
def
open_sheet_file
():
def
confirm_video_path
():
"""在旁白与字幕导出tab中输入视频路径
:return:
"""
选择导入的旁白解说脚本表格所在位置
# 仅能打开mp4\rmvb\avi\mkv格式的文件
video_path
=
filedialog
.
askopenfilename
(
title
=
u'选择文件'
,
filetypes
=
[(
"视频文件"
,
".avi"
),
(
"视频文件"
,
".mp4"
),
(
"视频文件"
,
".rmvb"
),
(
"视频文件"
,
".mkv"
)])
videoPath
.
set
(
video_path
)
def
open_sheet_file
():
"""选择导入的旁白解说脚本表格所在位置
:return:
"""
sheet_path
=
filedialog
.
askopenfilename
(
title
=
u'选择文件'
,
...
...
@@ -304,8 +344,8 @@ def open_sheet_file():
def
find_save_dir
():
"""
寻找存储音频的文件夹
"""
寻找存储音频的文件夹
:return:
"""
audio_dir
=
filedialog
.
askdirectory
(
title
=
u'保存文件至'
)
...
...
@@ -314,8 +354,9 @@ def find_save_dir():
def
set_caption_file
():
"""
设置字幕文件存储路径(使用存放音频的文件夹作为默认文件夹、旁白表格名作为默认字幕名)
"""设置字幕文件存储路径
使用存放音频的文件夹作为默认文件夹、旁白表格名作为默认字幕名
:return:
"""
defaultName
=
os
.
path
.
basename
(
videoPath
.
get
())
.
split
(
'.'
)[
0
]
+
".srt"
...
...
@@ -327,15 +368,12 @@ def set_caption_file():
captionPath
.
set
(
caption_path
)
def
confirm_video_path
():
# 仅能打开mp4\rmvb\avi\mkv格式的文件
video_path
=
filedialog
.
askopenfilename
(
title
=
u'选择文件'
,
filetypes
=
[(
"视频文件"
,
".avi"
),
(
"视频文件"
,
".mp4"
),
(
"视频文件"
,
".rmvb"
),
(
"视频文件"
,
".mkv"
)])
videoPath
.
set
(
video_path
)
def
get_sheetHead
(
book_path
):
"""获取表头
:param book_path: 表格存储路径
:return: [list], 表头信息
"""
workbook
=
openpyxl
.
load_workbook
(
book_path
)
sheet
=
workbook
.
active
rows
=
sheet
.
max_row
...
...
@@ -347,8 +385,13 @@ def get_sheetHead(book_path):
return
sheet_head
def
check_sheet_content
(
sheet_path
):
sheet_heads
=
get_sheetHead
(
sheet_path
)
def
check_sheet_content
(
book_path
):
"""检测表头是否符合要求
:param book_path: 表格存储路径
:return:
"""
sheet_heads
=
get_sheetHead
(
book_path
)
need_heads
=
[
'起始时间'
,
'终止时间'
,
'字幕'
,
'建议'
,
'解说脚本'
]
if
len
(
sheet_heads
)
==
0
:
...
...
@@ -360,8 +403,8 @@ def check_sheet_content(sheet_path):
def
start_synthesis
():
"""
开始合成语音
"""
开始合成语音
:return:
"""
video_path
=
videoPath
.
get
()
...
...
@@ -433,18 +476,27 @@ def start_synthesis():
if
t
.
exitcode
!=
0
:
print
(
"Exception in"
,
t
.
getName
())
messagebox
.
showerror
(
"错误"
,
"运行出错,请联系开发者处理"
)
processState
.
set
(
"任务中断"
)
processState
_2
.
set
(
"任务中断"
)
progress_state
=
progressbar_2
[
'value'
]
progressbar_2
.
stop
()
progressbar_2
[
'value'
]
=
progress_state
startSynthesis
.
config
(
state
=
tk
.
ACTIVE
)
stopSynthesis
.
config
(
state
=
tk
.
DISABLED
)
return
processState_2
.
set
(
"任务完成"
)
progressbar_2
.
stop
()
progressbar_2
[
'value'
]
=
100.0
progress_2
[
'text'
]
=
"100.00
%
"
startSynthesis
.
config
(
state
=
tk
.
ACTIVE
)
stopSynthesis
.
config
(
state
=
tk
.
DISABLED
)
def
stop_synthesis
():
"""停止合成
:return:
"""
for
x
in
threading
.
enumerate
():
if
x
.
getName
()
in
[
"startSynthesis"
,
"startProgress2"
,
"ssAndExport"
]:
_async_raise
(
x
.
ident
,
SystemExit
)
...
...
@@ -460,6 +512,13 @@ def stop_synthesis():
def
thread_it
(
func
,
*
args
,
name
):
"""创建守护线程
:param func: 待执行的函数名
:param args: 函数所需参数
:param name: 线程名
:return:
"""
# 创建线程
t
=
threading
.
Thread
(
target
=
func
,
args
=
args
,
name
=
name
)
# 守护
...
...
@@ -469,8 +528,8 @@ def thread_it(func, *args, name):
def
_async_raise
(
tid
,
exctype
):
"""
终结线程
"""
终结线程
:param tid: 线程id
:param exctype: 关闭方式
:return:
...
...
@@ -486,12 +545,6 @@ def _async_raise(tid, exctype):
raise
SystemError
(
"PyThreadState_SetAsyncExc failed"
)
def
_quit
():
window
.
quit
()
window
.
destroy
()
exit
()
# 创建tab栏
tabControl
=
ttk
.
Notebook
(
window
)
...
...
@@ -589,7 +642,7 @@ stopDetection.config(state=tk.DISABLED)
语音相关设置,包含以下内容:
- 原视频|视频路径|上传文件按钮
- 旁白脚本表格|表格路径|上传文件按钮
- 旁白语速选择
- 旁白语速选择
|语速选项
"""
audio_info
=
ttk
.
LabelFrame
(
tab2
,
text
=
" 语音相关设置 "
)
audio_info
.
place
(
relx
=
0.05
,
rely
=
0.05
,
relwidth
=
0.9
,
relheight
=
0.3
)
...
...
@@ -669,8 +722,11 @@ stopSynthesis.grid(column=0, row=3)
stopSynthesis
.
config
(
state
=
tk
.
DISABLED
)
# 用户点击关闭时进行询问
def
on_closing
():
"""弹窗询问是否确认关闭
:return:
"""
if
messagebox
.
askokcancel
(
"提示"
,
"您确定想要退出该程序吗?"
):
window
.
destroy
()
...
...
main_gui.spec
0 → 100644
View file @
6cd70d8a
# -*- mode: python ; coding: utf-8 -*-
block_cipher
=
None
a
=
Analysis
([
'main_gui.py'
],
pathex
=
[],
binaries
=
[],
datas
=
[],
hiddenimports
=
[
'astor'
,
'distutils'
,
'imgaug'
,
'lmdb'
,
'pyclipper'
,
'pywt'
,
'scipy'
,
'setuptools'
,
'shapely'
,
'skimage'
],
hookspath
=
[],
hooksconfig
=
{},
runtime_hooks
=
[],
excludes
=
[
'PyQt5'
,
'google.api_core'
,
'google.cloud'
,
'google.cloud.storage'
,
'googleapiclient'
],
win_no_prefer_redirects
=
False
,
win_private_assemblies
=
False
,
cipher
=
block_cipher
,
noarchive
=
False
)
pyz
=
PYZ
(
a
.
pure
,
a
.
zipped_data
,
cipher
=
block_cipher
)
exe
=
EXE
(
pyz
,
a
.
scripts
,
[],
exclude_binaries
=
True
,
name
=
'无障碍电影制作工具'
,
debug
=
False
,
bootloader_ignore_signals
=
False
,
strip
=
False
,
upx
=
True
,
console
=
True
,
disable_windowed_traceback
=
False
,
target_arch
=
None
,
codesign_identity
=
None
,
entitlements_file
=
None
,
icon
=
'D:
\\
AddCaption
\\
accessibility_movie
\\
eagle_2.ico'
)
coll
=
COLLECT
(
exe
,
a
.
binaries
,
a
.
zipfiles
,
a
.
datas
,
strip
=
False
,
upx
=
True
,
upx_exclude
=
[
'vcruntime140.dll'
],
name
=
'main_gui'
)
narratage_detection.py
View file @
6cd70d8a
...
...
@@ -5,7 +5,19 @@ import time
def
detect
(
video_path
,
start_time
,
end_time
,
book_path
,
state
,
subtitle
=
None
):
"""字幕及旁白区间检测
:param video_path: 待检测视频
:param start_time: 视频开始时间
:param end_time: 视频结束时间
:param book_path: 存放旁白的表格存储位置
:param state: 任务进行状态
:param subtitle: 视频是否有字幕
:return:
"""
print
(
"开始检测"
)
print
(
"start_time"
,
start_time
)
print
(
"end_time"
,
end_time
)
if
book_path
is
None
:
book_path
=
os
.
path
.
basename
(
video_path
)
.
split
(
'.'
)[
0
]
+
".xlsx"
else
:
...
...
speech_synthesis.py
View file @
6cd70d8a
...
...
@@ -16,8 +16,8 @@ ffmpeg_path = r'.\ffmpeg-4.3.1\bin\ffmpeg'
def
speech_synthesis
(
text
,
output_file
,
speed
):
"""
用于合成讲解音频并输出
"""
用于合成讲解音频并输出
:param text: 解说文本
:param output_file: 输出文件路径
:param speed: 指定的音频语速,默认为1.0
...
...
@@ -54,8 +54,8 @@ def speech_synthesis(text, output_file, speed):
def
change_speed
(
wav_path
,
speed
=
1.0
):
"""
调整语速
"""
调整语速
:param wav_path: 原音频路径
:param speed: 转换后的语速
:return:
...
...
@@ -65,7 +65,8 @@ def change_speed(wav_path, speed=1.0):
def
read_sheet
(
book_path
,
sheet_name
=
None
):
"""
"""读表
从表格中读出所有的内容,用dict保存(表格的格式固定,第一行为表头(起始时间|终止时间|字幕|建议|解说脚本))
:param book_path: 表格的存储路径
:param sheet_name: 想要读取的表在excel表格中的名字(可选项)
...
...
@@ -87,12 +88,13 @@ def read_sheet(book_path, sheet_name=None):
def
get_narratage_text
(
sheet_content
,
speed
):
"""
根据从表格中获取到的内容,分析得到解说文本+对应开始时间
:param sheet_content: dict,keys=["起始时间","终止时间","字幕","建议","解说脚本"]
:param speed: float, 旁白语速
:return: narratage_text: list, 旁白文本,
narratage_start_time: list, 旁白对应开始时间
"""获取旁白解说文本及起止时间
:param sheet_content: [dict],keys=["起始时间","终止时间","字幕","建议","解说脚本"]
:param speed: [float], 旁白语速
:return: narratage_text: [list], 旁白文本,
narratage_start_time: [list], 旁白对应开始时间
narratage_end_time: [list], 旁白对应结束时间
"""
narratage
=
sheet_content
[
'解说脚本'
]
subtitle
=
sheet_content
[
'字幕'
]
...
...
@@ -134,6 +136,11 @@ def get_narratage_text(sheet_content, speed):
def
second_to_str
(
seconds
):
"""秒数转字符串
:param seconds:秒数
:return: [str], ’时:分:秒‘格式的时间字符串
"""
seconds
=
float
(
seconds
)
hour
=
int
(
seconds
/
3600
)
minute
=
int
((
seconds
-
hour
*
3600
)
/
60
)
...
...
@@ -144,8 +151,8 @@ def second_to_str(seconds):
def
export_caption
(
sheet_content
,
caption_file
):
"""
将用户校正后的字幕输出为字幕文件(srt格式)
"""
将用户校正后的字幕输出为字幕文件(srt格式)
:param sheet_content: 用户校正后的表格内容
:return:
"""
...
...
@@ -164,6 +171,13 @@ def export_caption(sheet_content, caption_file):
def
adjust_volume
(
origin
,
start_timestamp
,
end_timestamp
):
"""调整原音频中待插入旁白位置的音量
:param origin: 原音频存储位置
:param start_timestamp: 旁白开始时间
:param end_timestamp: 旁白结束时间
:return:
"""
global
adjusted_wav_path
adjusted_wav_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
origin
),
adjusted_wav_path
)
n
=
len
(
start_timestamp
)
...
...
@@ -177,6 +191,13 @@ def adjust_volume(origin, start_timestamp, end_timestamp):
def
mix_speech
(
origin
,
narratage_paths
,
start_timestamps
):
"""将合成音频与原音频混合
:param origin: 原音频存储位置
:param narratage_paths: 旁白音频的存储位置
:param start_timestamps: 旁白音频的开始时间
:return:
"""
composed_wav_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
origin
),
"composed.wav"
)
command_line
=
'{} -i {}'
.
format
(
ffmpeg_path
,
origin
)
for
i
,
narratage_path
in
enumerate
(
narratage_paths
):
...
...
@@ -191,8 +212,8 @@ def mix_speech(origin, narratage_paths, start_timestamps):
def
ss_and_export
(
video_path
,
sheet_path
,
output_dir
,
speed
,
caption_file
,
state
=
None
):
"""
生成语音并导出字幕
"""
生成语音并导出字幕
:param video_path: 原视频的位置
:param sheet_path: 校对过的旁白脚本表格文件
:param output_dir: 存放音频文件的文件夹
...
...
@@ -250,24 +271,4 @@ def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state
if
__name__
==
'__main__'
:
# 定义参数
# parser = argparse.ArgumentParser(description='Speech Synthesis guideness')
# parser.add_argument("--video_path", required=True, type=str, help="原视频位置")
# parser.add_argument("--output_dir", required=True, type=str, help="音频输出位置路径")
# parser.add_argument("--sheet_path", required=True, type=str, help='旁白解说表格存储路径')
# parser.add_argument("--caption_file", required=True, type=str, help="输出的字幕文件存储路径")
# parser.add_argument("--speed", type=float, default=1.0, help="设置语速,默认为1.0")
# args = parser.parse_args()
# video_path, sheet_path, output_dir, speed, caption_file = args.video_path,\
# args.sheet_path, args.output_dir, args.speed, args.caption_file
# video_path = 'D:/heelo/hysxm_3.mp4'
# sheet_path = 'D:/heelo/hysxm_3.xlsx'
# output_dir = 'D:/AddCaption/hysxm_3'
# speed = 1.25
# caption_file = 'D:/AddCaption/hysxm_3/hysxm_3.srt'
#
# # 主函数执行
# ss_and_export(video_path=video_path, sheet_path=sheet_path, output_dir=output_dir, speed=speed,
# caption_file=caption_file)
pass
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment