Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
accessibility_movie_1
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
翟艳秋(20软)
accessibility_movie_1
Commits
9671a120
Commit
9671a120
authored
Mar 31, 2022
by
翟艳秋(20软)
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1. [modified] 将detect_with_ocr中确认当前帧是否超过限定时间段的判断提前到取帧之前;
2. [modified] 修改detect_with_asr中的旁白区间的写入判断,避免在视频开头处插入旁白推荐字数错误的情况
parent
6cd70d8a
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
31 additions
and
29 deletions
+31
-29
infer_path.py
PaddlePaddle_DeepSpeech2/infer_path.py
+7
-2
detect_with_ocr.py
detect_with_ocr.py
+15
-19
main_gui.py
main_gui.py
+5
-5
speech_synthesis.py
speech_synthesis.py
+4
-3
No files found.
PaddlePaddle_DeepSpeech2/infer_path.py
View file @
9671a120
...
...
@@ -53,6 +53,8 @@ def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, st
texts
=
''
narratages
=
[]
last_time
=
0
# 已检测到字幕
subtitle_detected
=
False
# 执行识别
for
i
,
audio_path
in
enumerate
(
audios_path
):
print
(
"{}开始处理{}"
.
format
(
paddle
.
get_device
(),
audio_path
))
...
...
@@ -68,8 +70,10 @@ def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, st
device
=
paddle
.
get_device
()
)
if
text
:
if
i
==
0
or
(
i
>
0
and
time_stamps
[
i
][
0
]
-
last_time
>=
1
):
recommend_lens
=
int
((
time_stamps
[
i
][
0
]
-
last_time
)
*
normal_speed
)
if
not
subtitle_detected
or
(
subtitle_detected
and
time_stamps
[
i
][
0
]
-
last_time
>=
1
):
recommend_lens
=
int
((
time_stamps
[
i
][
0
]
-
last_time
)
*
normal_speed
)
if
subtitle_detected
else
int
(
(
time_stamps
[
i
][
0
]
+
pre_time
)
*
normal_speed
)
print
(
"插入旁白,推荐字数为
%
d"
%
recommend_lens
)
# narratages.append(["", "", "", "插入旁白,推荐字数为%d" % recommend_lens])
write_to_sheet
(
book_name
,
sheet_name
,
[
""
,
""
,
""
,
"插入旁白,推荐字数为
%
d"
%
recommend_lens
])
# narratages.append([round(time_stamps[i][0] + pre_time, 2), round(time_stamps[i][1] + pre_time, 2),
...
...
@@ -77,6 +81,7 @@ def predict_long_audio_with_paddle(wav_path, pre_time, book_name, sheet_name, st
write_to_sheet
(
book_name
,
sheet_name
,
[
round
(
time_stamps
[
i
][
0
]
+
pre_time
,
2
),
round
(
time_stamps
[
i
][
1
]
+
pre_time
,
2
),
text
,
''
])
last_time
=
time_stamps
[
i
][
1
]
subtitle_detected
=
True
print
(
"第
%
d个分割音频 对应时间为
%.2
f-
%.2
f 识别结果:
%
s"
%
(
i
,
time_stamps
[
i
][
0
]
+
pre_time
,
time_stamps
[
i
][
1
]
+
pre_time
,
text
))
state
[
0
]
=
float
((
i
+
1
)
/
len
(
audios_path
))
if
state
[
0
]
is
None
or
state
[
0
]
<
0.99
else
0.99
...
...
detect_with_ocr.py
View file @
9671a120
...
...
@@ -14,7 +14,7 @@ from detect_with_asr import create_sheet, write_to_sheet
up_b
,
down_b
=
0
,
0
# 初始化ocr工具
ocr
=
PaddleOCR
(
use_angle_cls
=
True
,
lang
=
"ch"
,
show_log
=
False
)
ocr
=
PaddleOCR
(
use_angle_cls
=
True
,
lang
=
"ch"
,
show_log
=
False
,
use_gpu
=
False
)
# 正常语速为4字/秒
normal_speed
=
4
...
...
@@ -45,7 +45,7 @@ def get_position(video_path, start_time):
continue
img
=
img
[
height
:]
res
=
ocr
.
ocr
(
img
,
cls
=
True
)
sorted
(
res
,
key
=
lambda
x
:
x
[
0
][
0
][
1
])
sorted
(
res
,
key
=
lambda
text
:
text
[
0
][
0
][
1
])
bottom_position
=
None
if
len
(
res
)
==
0
:
continue
...
...
@@ -86,8 +86,8 @@ def get_position(video_path, start_time):
if
txt_cnt
==
3
:
break
print
(
subtitle_position
)
up_b
,
down_b
=
max
(
subtitle_position
,
key
=
subtitle_position
.
get
)
return
up_b
+
height
,
down_b
+
height
up_b
ounding
,
down_bounding
=
max
(
subtitle_position
,
key
=
subtitle_position
.
get
)
return
up_b
ounding
+
height
,
down_bounding
+
height
def
erasePunc
(
txt
):
...
...
@@ -144,11 +144,9 @@ def detect_subtitle(img):
img
=
img
[
int
(
up_b
)
-
30
:
int
(
down_b
)
+
30
]
# img = cv2.resize(img, (int(img.shape[1] * 0.5), int(img.shape[0] * 0.5)))
res
=
ocr
.
ocr
(
img
,
cls
=
True
)
sorted
(
res
,
key
=
lambda
x
:
x
[
0
][
0
][
1
])
bottom_position
=
None
sorted
(
res
,
key
=
lambda
text
:
text
[
0
][
0
][
1
])
if
len
(
res
)
==
0
:
return
None
# log = []
possible_txt
=
[]
for
x
in
res
:
rect
,
(
txt
,
confidence
)
=
x
...
...
@@ -196,6 +194,16 @@ def process_video(video_path, begin, end, book_path, sheet_name, state):
if
frame
is
None
:
break
cnt
+=
1
# 判断当前帧是否已超限制
if
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
>
end
:
if
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
-
end_time
>
1
:
print
(
'--------------------------------------------------'
)
recommend_lens
=
int
((
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
-
end_time
)
*
normal_speed
)
write_to_sheet
(
book_path
,
sheet_name
,
[
''
,
''
,
''
,
'插入旁白,推荐字数为
%
d'
%
recommend_lens
])
# 判断当前是否有字幕需要被保存下来
if
end_time
<
start_time
:
write_to_sheet
(
book_path
,
sheet_name
,
[
round
(
start_time
,
2
),
round
(
end
,
2
),
lastSubTitle
,
''
])
break
# 每秒取4帧画面左右
if
cnt
%
int
(
fps
/
4
)
==
0
:
state
[
0
]
=
float
((
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
-
begin
)
/
(
end
-
begin
))
\
...
...
@@ -239,18 +247,6 @@ def process_video(video_path, begin, end, book_path, sheet_name, state):
continue
# 当前字幕与上一段字幕不一样
lastSubTitle
=
subTitle
if
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
>
end
:
if
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
-
end_time
>
1
:
print
(
'--------------------------------------------------'
)
# 还没有字幕被分析出来
# if len(res) == 0:
recommend_lens
=
int
((
video
.
get
(
cv2
.
CAP_PROP_POS_MSEC
)
/
1000
-
end_time
)
*
normal_speed
)
# else:
# recommend_lens = int(res[-1][0] * normal_speed) if len(res) == 1 else int(
# (res[-1][0] - res[-2][1]) * normal_speed)
# narratage_recommend.append(['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
write_to_sheet
(
book_path
,
sheet_name
,
[
''
,
''
,
''
,
'插入旁白,推荐字数为
%
d'
%
recommend_lens
])
break
def
detect_with_ocr
(
video_path
,
book_path
,
start_time
,
end_time
,
state
):
...
...
main_gui.py
View file @
9671a120
...
...
@@ -33,7 +33,7 @@ def create_detail_day() -> str:
return
daytime
def
make_print_to_file
(
path
=
'./'
):
def
make_print_to_file
(
path
:
str
=
'./'
):
"""将print的内容输出到log文件夹中
:param path:设置的log文件夹路径
...
...
@@ -132,7 +132,7 @@ def find_save_file():
outputFilePath
.
set
(
book_path
)
def
trans_to_seconds
(
timePoint
)
:
def
trans_to_seconds
(
timePoint
:
str
)
->
float
:
"""将用户输入的时间字符串转换为秒数
:param timePoint: 时间字符串
...
...
@@ -147,7 +147,7 @@ def trans_to_seconds(timePoint):
return
time_in_seconds
def
check_timePoint
(
timePoint
)
->
bool
:
def
check_timePoint
(
timePoint
:
str
)
->
bool
:
"""检查时间字符串格式是否正确
:param timePoint: 时间字符串
...
...
@@ -179,7 +179,7 @@ def check_timePoint(timePoint) -> bool:
return
False
def
start_process
(
p
,
p_label
,
state
,
intervals
=
100
):
def
start_process
(
p
,
p_label
,
state
:
list
,
intervals
:
int
=
100
):
"""启动进度条
:param p: 进度条组件
...
...
@@ -433,7 +433,7 @@ def start_synthesis():
messagebox
.
showwarning
(
"警告"
,
"请选择音频存放路径"
)
return
elif
not
os
.
path
.
exists
(
audio_dir
):
messagebox
.
showwarning
(
"警告"
,
"当前音频存放路径有误,请检查一遍
。
"
)
messagebox
.
showwarning
(
"警告"
,
"当前音频存放路径有误,请检查一遍"
)
return
if
len
(
caption_path
)
==
0
:
messagebox
.
showwarning
(
"警告"
,
"请选择字幕文件存放路径"
)
...
...
speech_synthesis.py
View file @
9671a120
...
...
@@ -3,7 +3,7 @@ import os
import
argparse
import
time
from
azure.cognitiveservices.speech
import
AudioDataStream
,
SpeechConfig
,
SpeechSynthesizer
,
ResultReason
from
azure.cognitiveservices.speech
import
SpeechConfig
,
SpeechSynthesizer
,
ResultReason
from
azure.cognitiveservices.speech.audio
import
AudioOutputConfig
import
openpyxl
...
...
@@ -259,8 +259,6 @@ def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state
adjust_volume
(
origin_wav_path
,
start_timestamp
,
end_timestamp
)
# 将旁白混入原音频
mix_speech
(
adjusted_wav_path
,
narratage_paths
,
start_timestamp
)
if
state
is
not
None
:
state
[
0
]
=
1.00
# 删除临时语音文件、提取出来的原视频音频以及调整后的视频音频
if
os
.
path
.
exists
(
tmp_file
):
...
...
@@ -269,6 +267,9 @@ def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state
os
.
remove
(
origin_wav_path
)
os
.
remove
(
adjusted_wav_path
)
if
state
is
not
None
:
state
[
0
]
=
1.00
if
__name__
==
'__main__'
:
pass
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment