Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
accessibility_movie_1
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
翟艳秋(20软)
accessibility_movie_1
Commits
dda3b840
Commit
dda3b840
authored
Jan 18, 2022
by
翟艳秋(20软)
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1. 修改判定字幕的算法; 2.在界面中添加停止按钮,用于终止当前任务,重新开始新的任务; 3.将检测旁白的进度条优化为真进度条;
parent
742031dc
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
123 additions
and
41 deletions
+123
-41
detect_with_asr.py
detect_with_asr.py
+3
-3
detect_with_ocr.py
detect_with_ocr.py
+6
-4
judge_subtitle.py
judge_subtitle.py
+11
-7
narratage_detection.py
narratage_detection.py
+3
-3
speech_synthesis.py
speech_synthesis.py
+1
-0
try_with_gui.py
try_with_gui.py
+99
-24
No files found.
detect_with_asr.py
View file @
dda3b840
...
...
@@ -73,7 +73,7 @@ def concat_wav(root):
return
output_file
def
detect_with_asr
(
video_path
,
book_path
,
start_time
=
0
,
end_time
=-
1
):
def
detect_with_asr
(
video_path
,
book_path
,
start_time
=
0
,
end_time
=-
1
,
state
=
None
):
# 临时存储各种中间产物的文件夹
tmp_root
=
'./tmp'
if
not
os
.
path
.
exists
(
tmp_root
):
...
...
@@ -102,9 +102,9 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1):
sys
.
path
.
append
(
"./PaddlePaddle_DeepSpeech2"
)
from
infer_path
import
predict_long_audio_with_paddle
table_content
=
predict_long_audio_with_paddle
(
audio_path
,
book_name_xlsx
,
start_tim
e
)
table_content
=
predict_long_audio_with_paddle
(
audio_path
,
start_time
,
stat
e
)
write_to_sheet
(
book_name_xlsx
,
sheet_name_xlsx
,
table_content
)
state
[
0
]
=
1
# 删除中间文件
# shutil.rmtree(tmp_root)
...
...
detect_with_ocr.py
View file @
dda3b840
...
...
@@ -14,7 +14,7 @@ up_b, down_b = 0, 0
ocr
=
PaddleOCR
(
use_angle_cls
=
True
,
lang
=
"ch"
,
show_log
=
False
)
def
get_position
(
video_path
,
start_time
):
def
get_position
(
video_path
,
start_time
):
"""
根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
:param video_path: 视频存储路径
...
...
@@ -130,7 +130,7 @@ def detect_subtitle(img):
return
None
def
process_video
(
video_path
,
begin
,
end
):
def
process_video
(
video_path
,
begin
,
end
,
state
):
"""
处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
:param video_path: 待处理视频的路径
...
...
@@ -156,6 +156,7 @@ def process_video(video_path, begin, end):
cnt
+=
1
# 每秒取4帧画面左右
if
cnt
%
int
(
fps
/
4
)
==
0
:
state
[
0
]
=
float
(
cnt
/
video
.
get
(
cv2
.
CAP_PROP_FRAME_COUNT
))
if
state
[
0
]
<
0.99
else
0.99
subTitle
=
detect_subtitle
(
frame
)
# 第一次找到字幕
if
lastSubTitle
is
None
and
subTitle
is
not
None
:
...
...
@@ -215,7 +216,7 @@ def write_excel_xlsx(path, sheet_name, value):
workbook
.
save
(
path
)
def
detect_with_ocr
(
video_path
,
book_path
,
start_time
,
end_time
):
def
detect_with_ocr
(
video_path
,
book_path
,
start_time
,
end_time
,
state
):
book_name_xlsx
=
book_path
sheet_name_xlsx
=
"旁白插入位置建议"
...
...
@@ -225,10 +226,11 @@ def detect_with_ocr(video_path, book_path, start_time, end_time):
# 获取并构建输出信息
table_head
=
[[
"起始时间"
,
"终止时间"
,
"字幕"
,
'建议'
,
'解说脚本'
]]
table_content
=
table_head
+
process_video
(
video_path
,
start_time
,
end_time
)
table_content
=
table_head
+
process_video
(
video_path
,
start_time
,
end_time
,
state
)
# 输出旁白位置推荐信息到表格
write_excel_xlsx
(
book_name_xlsx
,
sheet_name_xlsx
,
table_content
)
state
[
0
]
=
1
if
__name__
==
'__main__'
:
...
...
judge_subtitle.py
View file @
dda3b840
...
...
@@ -68,7 +68,6 @@ def detect_movie(video_path, start, interval):
interval
=
interval
*
fps
random_number
=
50
ans
=
[
False
]
*
3
print
(
ans
)
for
i
in
range
(
3
):
random_list
=
random_int_list
(
start
,
start
+
interval
,
random_number
)
start
=
start
+
interval
...
...
@@ -82,15 +81,20 @@ def detect_movie(video_path, start, interval):
if
ans
[
i
]:
print
(
random_point
)
break
if
not
ans
[
i
]:
print
(
'{}-{}时间段内未检测到字幕'
.
format
(
start
,
start
+
interval
))
if
i
==
1
and
Counter
(
ans
)
.
most_common
(
1
)[
0
][
0
]
is
False
:
break
video
.
release
()
print
(
ans
)
return
Counter
(
ans
)
.
most_common
(
1
)[
0
][
0
]
if
__name__
==
'__main__'
:
video_path
=
r'D:\heelo\hysxm.mp4'
start_time
=
time
.
time
()
start
=
90
interval
=
120
print
(
detect_movie
(
video_path
,
start
,
interval
))
print
(
time
.
time
()
-
start_time
)
pass
# video_path = r'D:\heelo\hysxm.mp4'
# start_time = time.time()
# start = 90
# interval = 120
# print(detect_movie(video_path, start, interval))
# print(time.time() - start_time)
narratage_detection.py
View file @
dda3b840
...
...
@@ -18,7 +18,7 @@ def trans_to_seconds(timepoint):
return
time_in_seconds
def
detect
(
video_path
,
start_time
,
end_time
,
book_path
):
def
detect
(
video_path
,
start_time
,
end_time
,
book_path
,
state
):
print
(
"开始检测"
)
if
book_path
is
None
:
book_path
=
os
.
path
.
basename
(
video_path
)
.
split
(
'.'
)[
0
]
+
".xlsx"
...
...
@@ -30,9 +30,9 @@ def detect(video_path, start_time, end_time, book_path):
has_subtitle
=
detect_movie
(
video_path
,
start_time
,
60
)
if
has_subtitle
:
detect_with_ocr
(
video_path
,
book_path
,
start_time
,
end_time
)
detect_with_ocr
(
video_path
,
book_path
,
start_time
,
end_time
,
state
)
else
:
detect_with_asr
(
video_path
,
book_path
,
start_time
,
end_time
)
detect_with_asr
(
video_path
,
book_path
,
start_time
,
end_time
,
state
)
if
__name__
==
'__main__'
:
...
...
speech_synthesis.py
View file @
dda3b840
...
...
@@ -160,6 +160,7 @@ def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
for
i
,
text
in
enumerate
(
narratages
):
wav_path
=
os
.
path
.
join
(
root_path
,
'
%.2
f.wav'
%
start_timepoint
[
i
])
speech_synthesis
(
text
,
wav_path
,
speed
)
time
.
sleep
(
1
)
print
(
"目前正在处理{}"
.
format
(
wav_path
))
state
[
0
]
=
float
((
i
+
1
)
/
len
(
narratages
))
...
...
try_with_gui.py
View file @
dda3b840
# -*- coding:utf-8 -*-
import
threading
from
mttkinter
import
mtTkinter
as
tk
from
tkinter
import
filedialog
,
ttk
,
messagebox
,
Frame
,
Canvas
from
tkinter
import
filedialog
,
ttk
,
messagebox
import
os
import
time
import
ffmpeg
from
speech_synthesis
import
ss_and_export
import
ctypes
import
inspect
window
=
tk
.
Tk
()
window
.
title
(
'无障碍电影辅助工具'
)
# 标题
...
...
@@ -24,6 +25,7 @@ def open_video_file():
# 获取视频的时长等信息,初始化开始结束时间
info
=
ffmpeg
.
probe
(
video_path
)
vs
=
next
(
c
for
c
in
info
[
'streams'
]
if
c
[
'codec_type'
]
==
'video'
)
print
(
vs
)
try
:
duration
=
int
(
float
(
vs
[
'duration'
]))
hours
=
int
(
duration
/
3600
)
...
...
@@ -31,7 +33,10 @@ def open_video_file():
seconds
=
int
(
duration
-
60
*
minutes
-
3600
*
hours
)
endTime
.
set
(
"
%02
d:
%02
d:
%02
d"
%
(
hours
,
minutes
,
seconds
))
except
:
endTime
.
set
(
vs
[
'tags'
][
'DURATION'
])
for
k
in
vs
[
'tags'
]
.
keys
():
k_l
=
str
.
lower
(
k
)
if
'duration'
in
k_l
:
endTime
.
set
(
vs
[
'tags'
][
k
])
if
len
(
video_path
)
!=
0
and
not
is_video
(
video_path
):
messagebox
.
showwarning
(
'警告'
,
"请选择正确的视频格式,能够处理的视频格式如下所示:
\n
'.mkv', '.rmvb', '.mp4', '.avi'"
)
...
...
@@ -89,15 +94,13 @@ def start_process(p, p_label, state, intervals=100):
"""
print
(
"进度条开始滚动"
)
p
.
start
(
interval
=
int
(
intervals
))
last
s
tate
=
state
[
0
]
last
S
tate
=
state
[
0
]
while
True
:
if
state
[
0
]
and
state
[
0
]
!=
laststate
:
# 当前进度不为None且与上一进度不一样且当前进度比进度条的状态要多时,对进度条状态进行更新
if
state
[
0
]
and
state
[
0
]
!=
lastState
and
state
[
0
]
*
100
>
p
[
'value'
]:
p
[
'value'
]
=
int
(
state
[
0
]
*
100
)
lastState
=
state
[
0
]
p_label
[
'text'
]
=
str
(
int
(
p
[
'value'
]))
+
"
%
"
if
p
[
'value'
]
==
99
:
p
.
stop
()
p
[
'value'
]
=
99
break
if
p
[
'value'
]
==
100
:
p
.
stop
()
p
[
'value'
]
=
100
...
...
@@ -124,26 +127,50 @@ def start_detect():
messagebox
.
showwarning
(
"警告"
,
"请输入表格存放路径"
)
return
# 开始检测后,将“开始检测”按钮设置为不可点击状态,“停止检测”按钮设置为可点击状态
startDetection
.
config
(
state
=
tk
.
DISABLED
)
stopDetection
.
config
(
state
=
tk
.
ACTIVE
)
processState
.
set
(
"正在启动中……"
)
from
narratage_detection
import
detect
# 显示进度条及开始检测
progressbar_1
.
grid
(
column
=
2
,
row
=
1
)
progressbar_1
.
grid
(
column
=
2
,
row
=
1
,
sticky
=
"W"
)
progress_1
.
grid
(
column
=
3
,
row
=
1
)
processState
.
set
(
"开始检测"
)
intervals
=
trans_to_seconds
(
endTime
.
get
())
# 多线程同步进行检测和进度条更新
threads
=
[
threading
.
Thread
(
target
=
start_process
,
args
=
(
progressbar_1
,
progress_1
,
None
,
intervals
*
5
)),
threading
.
Thread
(
target
=
detect
,
args
=
(
video_path
,
startTime
.
get
(),
endTime
.
get
(),
book_path
))]
state
=
[
None
]
threads
=
[
threading
.
Thread
(
target
=
start_process
,
args
=
(
progressbar_1
,
progress_1
,
state
,
100000
),
name
=
"startProgress1"
),
threading
.
Thread
(
target
=
detect
,
args
=
(
video_path
,
startTime
.
get
(),
endTime
.
get
(),
book_path
,
state
),
name
=
"detect"
)]
for
t
in
threads
:
t
.
start
()
# 线程完成任务后结束线程
for
t
in
threads
:
t
.
join
()
print
(
"线程{}已结束"
.
format
(
t
.
name
))
# 将进度条的进度拉满到100%,并给出“任务已完成”的提示
progressbar_1
[
'value'
]
=
100
progress_1
[
'text'
]
=
'100
%
'
processState
.
set
(
"任务已完成"
)
# 检测完成后,将“停止检测”按钮设置为不可点击状态,”开始检测“按钮设置为可点击状态
stopDetection
.
config
(
state
=
tk
.
DISABLED
)
startDetection
.
config
(
state
=
tk
.
ACTIVE
)
def
stop_detect
():
for
x
in
threading
.
enumerate
():
if
x
.
getName
()
in
[
"startDetect"
,
"startProgress1"
,
"detect"
]:
_async_raise
(
x
.
ident
,
SystemExit
)
# 设置检测状态为”已停止“,”停止检测“按钮为不可点击状态,”开始检测“按钮为可点击状态,检测进度条初始化为0,并隐藏
processState
.
set
(
"已停止"
)
stopDetection
.
config
(
state
=
tk
.
DISABLED
)
startDetection
.
config
(
state
=
tk
.
ACTIVE
)
progressbar_1
.
stop
()
progressbar_1
[
'value'
]
=
0
progress_1
[
'text'
]
=
"0
%
"
progressbar_1
.
grid_forget
()
progress_1
.
grid_forget
()
def
open_sheet_file
():
...
...
@@ -206,31 +233,69 @@ def start_synthesis():
messagebox
.
showwarning
(
"警告"
,
"当前输入的表格不存在,请检查一遍。"
)
# 显示进度条、进度条百分比及任务状态提示文本
startSynthesis
.
config
(
state
=
tk
.
DISABLED
)
stopSynthesis
.
config
(
state
=
tk
.
ACTIVE
)
progressbar_2
.
grid
(
column
=
2
,
row
=
2
)
progress_2
.
grid
(
column
=
3
,
row
=
2
)
processState_2
.
set
(
"开始生成音频及字幕"
)
# 多线程同时实现语音合成+字幕导出、进度条
state
=
[
None
]
threads
=
[
threading
.
Thread
(
target
=
start_process
,
args
=
(
progressbar_2
,
progress_2
,
state
)),
threads
=
[
threading
.
Thread
(
target
=
start_process
,
args
=
(
progressbar_2
,
progress_2
,
state
)
,
name
=
"startProgress2"
),
threading
.
Thread
(
target
=
ss_and_export
,
args
=
(
sheet_path
,
audio_dir
,
speed
,
caption_path
,
state
))]
args
=
(
sheet_path
,
audio_dir
,
speed
,
caption_path
,
state
)
,
name
=
"ssAndExport"
)]
for
t
in
threads
:
t
.
start
()
for
t
in
threads
:
t
.
join
()
processState_2
.
set
(
"语音和字幕已导出完毕"
)
def
thread_it
(
func
,
*
args
):
startSynthesis
.
config
(
state
=
tk
.
ACTIVE
)
stopSynthesis
.
config
(
state
=
tk
.
DISABLED
)
def
stop_synthesis
():
print
(
threading
.
enumerate
())
for
x
in
threading
.
enumerate
():
if
x
.
getName
()
in
[
"startSynthesis"
,
"startProgress2"
,
"ssAndExport"
]:
_async_raise
(
x
.
ident
,
SystemExit
)
# 设置检测状态为”已停止“,”停止检测“按钮为不可点击状态,”开始检测“按钮为可点击状态,检测进度条初始化为0,并隐藏
processState_2
.
set
(
"已停止"
)
stopSynthesis
.
config
(
state
=
tk
.
DISABLED
)
startSynthesis
.
config
(
state
=
tk
.
ACTIVE
)
progressbar_2
.
stop
()
progressbar_2
[
'value'
]
=
0
progress_2
[
'text'
]
=
"0
%
"
progressbar_2
.
grid_forget
()
progress_2
.
grid_forget
()
def
thread_it
(
func
,
*
args
,
name
):
# 创建线程
t
=
threading
.
Thread
(
target
=
func
,
args
=
args
)
t
=
threading
.
Thread
(
target
=
func
,
args
=
args
,
name
=
name
)
# 守护
t
.
setDaemon
(
True
)
# 启动
t
.
start
()
def
_async_raise
(
tid
,
exctype
):
"""
终结线程
:param tid: 线程id
:param exctype: 关闭方式
:return:
"""
tid
=
ctypes
.
c_long
(
tid
)
if
not
inspect
.
isclass
(
exctype
):
exctype
=
type
(
exctype
)
res
=
ctypes
.
pythonapi
.
PyThreadState_SetAsyncExc
(
tid
,
ctypes
.
py_object
(
exctype
))
if
res
==
0
:
raise
ValueError
(
"invalid thread id"
)
elif
res
!=
1
:
ctypes
.
pythonapi
.
PyThreadState_SetAsyncExc
(
tid
,
None
)
raise
SystemError
(
"PyThreadState_SetAsyncExc failed"
)
def
_quit
():
window
.
quit
()
window
.
destroy
()
...
...
@@ -256,7 +321,6 @@ tabControl.pack(expand=1, fill="both")
- 视频实际结束时间|文本框
"""
video_info
=
ttk
.
LabelFrame
(
tab1
,
text
=
" 视频信息操作 "
)
# video_info.grid(column=0, row=0, padx=8, pady=4)
video_info
.
place
(
relx
=
0.05
,
rely
=
0.05
,
relwidth
=
0.9
,
relheight
=
0.4
)
input_label
=
ttk
.
Label
(
video_info
,
text
=
"视频文件"
)
...
...
@@ -283,6 +347,7 @@ endTime_entered.grid(column=1, row=2, sticky="W")
检测步骤相关内容,包含以下内容:
- 输出表格路径|输出表格路径文本框|打开文件夹
- 开始检测按钮|当前检测状态提示文本|任务进度条|进度条百分比
- 停止检测按钮
"""
detect_command
=
ttk
.
LabelFrame
(
tab1
,
text
=
" 检测步骤 "
)
detect_command
.
place
(
relx
=
0.05
,
rely
=
0.5
,
relwidth
=
0.9
,
relheight
=
0.4
)
...
...
@@ -295,19 +360,24 @@ outputFile.grid(column=1, row=0)
save_button
=
ttk
.
Button
(
detect_command
,
text
=
"打开文件夹"
,
command
=
find_save_file
)
save_button
.
grid
(
column
=
2
,
row
=
0
)
startDetection
=
ttk
.
Button
(
detect_command
,
text
=
"开始检测"
,
command
=
lambda
:
thread_it
(
start_detect
))
startDetection
=
ttk
.
Button
(
detect_command
,
text
=
"开始检测"
,
command
=
lambda
:
thread_it
(
start_detect
,
name
=
"startDetect"
))
startDetection
.
grid
(
column
=
0
,
row
=
1
)
processState
=
tk
.
StringVar
()
stateLabel
=
tk
.
Label
(
detect_command
,
textvariable
=
processState
,
fg
=
"green"
)
stateLabel
.
grid
(
column
=
1
,
row
=
1
,
sticky
=
"W"
)
progressbar_1
=
ttk
.
Progressbar
(
detect_command
,
length
=
10
0
,
mode
=
"determinate"
)
progressbar_1
=
ttk
.
Progressbar
(
detect_command
,
length
=
8
0
,
mode
=
"determinate"
)
progress_1
=
tk
.
Label
(
detect_command
,
text
=
"0
%
"
)
stopDetection
=
ttk
.
Button
(
detect_command
,
text
=
"停止检测"
,
command
=
lambda
:
thread_it
(
stop_detect
,
name
=
"stopDetect"
))
stopDetection
.
grid
(
column
=
0
,
row
=
2
)
stopDetection
.
config
(
state
=
tk
.
DISABLED
)
"""
为旁白语音合成添加部件
"""
"""
语音相关设置,包含以下内容:
- 旁白脚本表格|表格路径|上传文件按钮
"""
audio_info
=
ttk
.
LabelFrame
(
tab2
,
text
=
" 语音相关设置 "
)
audio_info
.
place
(
relx
=
0.05
,
rely
=
0.05
,
relwidth
=
0.9
,
relheight
=
0.4
)
...
...
@@ -347,13 +417,18 @@ audioDir_input.grid(column=1, row=0)
save_button_2
=
ttk
.
Button
(
synthesis_command
,
text
=
"打开文件夹"
,
command
=
find_save_dir
)
save_button_2
.
grid
(
column
=
2
,
row
=
0
)
startSynthesis
=
ttk
.
Button
(
synthesis_command
,
text
=
"开始合成"
,
command
=
lambda
:
thread_it
(
start_synthesis
))
startSynthesis
=
ttk
.
Button
(
synthesis_command
,
text
=
"开始合成"
,
command
=
lambda
:
thread_it
(
start_synthesis
,
name
=
"startSynthesis"
))
startSynthesis
.
grid
(
column
=
0
,
row
=
2
)
processState_2
=
tk
.
StringVar
()
stateLabel_2
=
tk
.
Label
(
synthesis_command
,
textvariable
=
processState_2
,
fg
=
"green"
)
stateLabel_2
.
grid
(
column
=
1
,
row
=
2
,
sticky
=
"W"
)
progressbar_2
=
ttk
.
Progressbar
(
synthesis_command
,
length
=
100
,
mode
=
"determinate"
)
progress_2
=
tk
.
Label
(
synthesis_command
,
text
=
"0
%
"
)
stopSynthesis
=
ttk
.
Button
(
synthesis_command
,
text
=
"停止合成"
,
command
=
lambda
:
thread_it
(
stop_synthesis
,
name
=
"stopSynthesis"
))
stopSynthesis
.
grid
(
column
=
0
,
row
=
3
)
stopSynthesis
.
config
(
state
=
tk
.
DISABLED
)
# 刷新显示
window
.
mainloop
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment