Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
accessibility_movie_1
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
翟艳秋(20软)
accessibility_movie_1
Commits
dda3b840
Commit
dda3b840
authored
Jan 18, 2022
by
翟艳秋(20软)
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1. 修改判定字幕的算法; 2.在界面中添加停止按钮,用于终止当前任务,重新开始新的任务; 3.将检测旁白的进度条优化为真进度条;
parent
742031dc
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
24 additions
and
17 deletions
+24
-17
detect_with_asr.py
detect_with_asr.py
+3
-3
detect_with_ocr.py
detect_with_ocr.py
+6
-4
judge_subtitle.py
judge_subtitle.py
+11
-7
narratage_detection.py
narratage_detection.py
+3
-3
speech_synthesis.py
speech_synthesis.py
+1
-0
try_with_gui.py
try_with_gui.py
+0
-0
No files found.
detect_with_asr.py
View file @
dda3b840
...
@@ -73,7 +73,7 @@ def concat_wav(root):
...
@@ -73,7 +73,7 @@ def concat_wav(root):
return
output_file
return
output_file
def
detect_with_asr
(
video_path
,
book_path
,
start_time
=
0
,
end_time
=-
1
):
def
detect_with_asr
(
video_path
,
book_path
,
start_time
=
0
,
end_time
=-
1
,
state
=
None
):
# 临时存储各种中间产物的文件夹
# 临时存储各种中间产物的文件夹
tmp_root
=
'./tmp'
tmp_root
=
'./tmp'
if
not
os
.
path
.
exists
(
tmp_root
):
if
not
os
.
path
.
exists
(
tmp_root
):
...
@@ -102,9 +102,9 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1):
...
@@ -102,9 +102,9 @@ def detect_with_asr(video_path, book_path, start_time=0, end_time=-1):
sys
.
path
.
append
(
"./PaddlePaddle_DeepSpeech2"
)
sys
.
path
.
append
(
"./PaddlePaddle_DeepSpeech2"
)
from
infer_path
import
predict_long_audio_with_paddle
from
infer_path
import
predict_long_audio_with_paddle
table_content
=
predict_long_audio_with_paddle
(
audio_path
,
book_name_xlsx
,
start_tim
e
)
table_content
=
predict_long_audio_with_paddle
(
audio_path
,
start_time
,
stat
e
)
write_to_sheet
(
book_name_xlsx
,
sheet_name_xlsx
,
table_content
)
write_to_sheet
(
book_name_xlsx
,
sheet_name_xlsx
,
table_content
)
state
[
0
]
=
1
# 删除中间文件
# 删除中间文件
# shutil.rmtree(tmp_root)
# shutil.rmtree(tmp_root)
...
...
detect_with_ocr.py
View file @
dda3b840
...
@@ -14,7 +14,7 @@ up_b, down_b = 0, 0
...
@@ -14,7 +14,7 @@ up_b, down_b = 0, 0
ocr
=
PaddleOCR
(
use_angle_cls
=
True
,
lang
=
"ch"
,
show_log
=
False
)
ocr
=
PaddleOCR
(
use_angle_cls
=
True
,
lang
=
"ch"
,
show_log
=
False
)
def
get_position
(
video_path
,
start_time
):
def
get_position
(
video_path
,
start_time
):
"""
"""
根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
:param video_path: 视频存储路径
:param video_path: 视频存储路径
...
@@ -130,7 +130,7 @@ def detect_subtitle(img):
...
@@ -130,7 +130,7 @@ def detect_subtitle(img):
return
None
return
None
def
process_video
(
video_path
,
begin
,
end
):
def
process_video
(
video_path
,
begin
,
end
,
state
):
"""
"""
处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
处理视频,主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
:param video_path: 待处理视频的路径
:param video_path: 待处理视频的路径
...
@@ -156,6 +156,7 @@ def process_video(video_path, begin, end):
...
@@ -156,6 +156,7 @@ def process_video(video_path, begin, end):
cnt
+=
1
cnt
+=
1
# 每秒取4帧画面左右
# 每秒取4帧画面左右
if
cnt
%
int
(
fps
/
4
)
==
0
:
if
cnt
%
int
(
fps
/
4
)
==
0
:
state
[
0
]
=
float
(
cnt
/
video
.
get
(
cv2
.
CAP_PROP_FRAME_COUNT
))
if
state
[
0
]
<
0.99
else
0.99
subTitle
=
detect_subtitle
(
frame
)
subTitle
=
detect_subtitle
(
frame
)
# 第一次找到字幕
# 第一次找到字幕
if
lastSubTitle
is
None
and
subTitle
is
not
None
:
if
lastSubTitle
is
None
and
subTitle
is
not
None
:
...
@@ -215,7 +216,7 @@ def write_excel_xlsx(path, sheet_name, value):
...
@@ -215,7 +216,7 @@ def write_excel_xlsx(path, sheet_name, value):
workbook
.
save
(
path
)
workbook
.
save
(
path
)
def
detect_with_ocr
(
video_path
,
book_path
,
start_time
,
end_time
):
def
detect_with_ocr
(
video_path
,
book_path
,
start_time
,
end_time
,
state
):
book_name_xlsx
=
book_path
book_name_xlsx
=
book_path
sheet_name_xlsx
=
"旁白插入位置建议"
sheet_name_xlsx
=
"旁白插入位置建议"
...
@@ -225,10 +226,11 @@ def detect_with_ocr(video_path, book_path, start_time, end_time):
...
@@ -225,10 +226,11 @@ def detect_with_ocr(video_path, book_path, start_time, end_time):
# 获取并构建输出信息
# 获取并构建输出信息
table_head
=
[[
"起始时间"
,
"终止时间"
,
"字幕"
,
'建议'
,
'解说脚本'
]]
table_head
=
[[
"起始时间"
,
"终止时间"
,
"字幕"
,
'建议'
,
'解说脚本'
]]
table_content
=
table_head
+
process_video
(
video_path
,
start_time
,
end_time
)
table_content
=
table_head
+
process_video
(
video_path
,
start_time
,
end_time
,
state
)
# 输出旁白位置推荐信息到表格
# 输出旁白位置推荐信息到表格
write_excel_xlsx
(
book_name_xlsx
,
sheet_name_xlsx
,
table_content
)
write_excel_xlsx
(
book_name_xlsx
,
sheet_name_xlsx
,
table_content
)
state
[
0
]
=
1
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
judge_subtitle.py
View file @
dda3b840
...
@@ -68,7 +68,6 @@ def detect_movie(video_path, start, interval):
...
@@ -68,7 +68,6 @@ def detect_movie(video_path, start, interval):
interval
=
interval
*
fps
interval
=
interval
*
fps
random_number
=
50
random_number
=
50
ans
=
[
False
]
*
3
ans
=
[
False
]
*
3
print
(
ans
)
for
i
in
range
(
3
):
for
i
in
range
(
3
):
random_list
=
random_int_list
(
start
,
start
+
interval
,
random_number
)
random_list
=
random_int_list
(
start
,
start
+
interval
,
random_number
)
start
=
start
+
interval
start
=
start
+
interval
...
@@ -82,15 +81,20 @@ def detect_movie(video_path, start, interval):
...
@@ -82,15 +81,20 @@ def detect_movie(video_path, start, interval):
if
ans
[
i
]:
if
ans
[
i
]:
print
(
random_point
)
print
(
random_point
)
break
break
if
not
ans
[
i
]:
print
(
'{}-{}时间段内未检测到字幕'
.
format
(
start
,
start
+
interval
))
if
i
==
1
and
Counter
(
ans
)
.
most_common
(
1
)[
0
][
0
]
is
False
:
break
video
.
release
()
video
.
release
()
print
(
ans
)
print
(
ans
)
return
Counter
(
ans
)
.
most_common
(
1
)[
0
][
0
]
return
Counter
(
ans
)
.
most_common
(
1
)[
0
][
0
]
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
video_path
=
r'D:\heelo\hysxm.mp4'
pass
start_time
=
time
.
time
()
# video_path = r'D:\heelo\hysxm.mp4'
start
=
90
# start_time = time.time()
interval
=
120
# start = 90
print
(
detect_movie
(
video_path
,
start
,
interval
))
# interval = 120
print
(
time
.
time
()
-
start_time
)
# print(detect_movie(video_path, start, interval))
# print(time.time() - start_time)
narratage_detection.py
View file @
dda3b840
...
@@ -18,7 +18,7 @@ def trans_to_seconds(timepoint):
...
@@ -18,7 +18,7 @@ def trans_to_seconds(timepoint):
return
time_in_seconds
return
time_in_seconds
def
detect
(
video_path
,
start_time
,
end_time
,
book_path
):
def
detect
(
video_path
,
start_time
,
end_time
,
book_path
,
state
):
print
(
"开始检测"
)
print
(
"开始检测"
)
if
book_path
is
None
:
if
book_path
is
None
:
book_path
=
os
.
path
.
basename
(
video_path
)
.
split
(
'.'
)[
0
]
+
".xlsx"
book_path
=
os
.
path
.
basename
(
video_path
)
.
split
(
'.'
)[
0
]
+
".xlsx"
...
@@ -30,9 +30,9 @@ def detect(video_path, start_time, end_time, book_path):
...
@@ -30,9 +30,9 @@ def detect(video_path, start_time, end_time, book_path):
has_subtitle
=
detect_movie
(
video_path
,
start_time
,
60
)
has_subtitle
=
detect_movie
(
video_path
,
start_time
,
60
)
if
has_subtitle
:
if
has_subtitle
:
detect_with_ocr
(
video_path
,
book_path
,
start_time
,
end_time
)
detect_with_ocr
(
video_path
,
book_path
,
start_time
,
end_time
,
state
)
else
:
else
:
detect_with_asr
(
video_path
,
book_path
,
start_time
,
end_time
)
detect_with_asr
(
video_path
,
book_path
,
start_time
,
end_time
,
state
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
speech_synthesis.py
View file @
dda3b840
...
@@ -160,6 +160,7 @@ def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
...
@@ -160,6 +160,7 @@ def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
for
i
,
text
in
enumerate
(
narratages
):
for
i
,
text
in
enumerate
(
narratages
):
wav_path
=
os
.
path
.
join
(
root_path
,
'
%.2
f.wav'
%
start_timepoint
[
i
])
wav_path
=
os
.
path
.
join
(
root_path
,
'
%.2
f.wav'
%
start_timepoint
[
i
])
speech_synthesis
(
text
,
wav_path
,
speed
)
speech_synthesis
(
text
,
wav_path
,
speed
)
time
.
sleep
(
1
)
print
(
"目前正在处理{}"
.
format
(
wav_path
))
print
(
"目前正在处理{}"
.
format
(
wav_path
))
state
[
0
]
=
float
((
i
+
1
)
/
len
(
narratages
))
state
[
0
]
=
float
((
i
+
1
)
/
len
(
narratages
))
...
...
try_with_gui.py
View file @
dda3b840
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment