Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
accessibility_movie_1
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
翟艳秋(20软)
accessibility_movie_1
Commits
fad7c317
You need to sign in or sign up before continuing.
Commit
fad7c317
authored
Jan 25, 2022
by
翟艳秋(20软)
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
1. [add] 原音频中添加旁白音频,生成混合音频;
2. [modified] 将进度条状态调整为小数点后2位; 3. [modified] 检测字幕有无情况的检测范围修正为[实际开始, 实际结束]
parent
7d7ed791
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
111 additions
and
46 deletions
+111
-46
detect_with_ocr.py
detect_with_ocr.py
+1
-2
judge_subtitle.py
judge_subtitle.py
+3
-4
narratage_detection.py
narratage_detection.py
+1
-1
speech_synthesis.py
speech_synthesis.py
+85
-19
split_wav.py
split_wav.py
+1
-1
try_with_gui.py
try_with_gui.py
+20
-19
No files found.
detect_with_ocr.py
View file @
fad7c317
...
...
@@ -222,7 +222,6 @@ def write_excel_xlsx(path, sheet_name, value):
for
i
in
range
(
0
,
index
):
for
j
in
range
(
0
,
len
(
value
[
i
])):
sheet
.
cell
(
row
=
i
+
1
,
column
=
j
+
1
,
value
=
str
(
value
[
i
][
j
]))
print
(
value
[
i
][
j
])
if
value
[
i
][
j
]
==
''
or
'插入旁白'
in
str
(
value
[
i
][
j
])
or
value
[
i
][
j
]
==
'翻译'
:
sheet
.
cell
(
row
=
i
+
1
,
column
=
j
+
1
)
.
fill
=
PatternFill
(
fill_type
=
'solid'
,
fgColor
=
'ffff00'
)
workbook
.
save
(
path
)
...
...
@@ -242,7 +241,7 @@ def detect_with_ocr(video_path, book_path, start_time, end_time, state):
# 输出旁白位置推荐信息到表格
write_excel_xlsx
(
book_name_xlsx
,
sheet_name_xlsx
,
table_content
)
state
[
0
]
=
1
state
[
0
]
=
1
.00
if
__name__
==
'__main__'
:
...
...
judge_subtitle.py
View file @
fad7c317
...
...
@@ -54,7 +54,7 @@ def detect_subtitle(frame):
return
False
def
detect_movie
(
video_path
,
start
,
interval
):
def
detect_movie
(
video_path
,
start
,
end
,
interval
):
"""
使用整部视频进行测试,确定视频是否提供字幕
:param video_path: 视频的地址
...
...
@@ -64,9 +64,8 @@ def detect_movie(video_path, start, interval):
"""
video
=
cv2
.
VideoCapture
(
video_path
)
fps
=
np
.
ceil
(
video
.
get
(
cv2
.
CAP_PROP_FPS
))
end_time
=
video
.
get
(
cv2
.
CAP_PROP_FRAME_COUNT
)
/
fps
if
start
+
interval
*
3
>
end_time
:
interval
=
int
((
end_time
-
start
)
/
3
)
if
start
+
interval
*
3
>
end
:
interval
=
int
((
end
-
start
)
/
3
)
start
=
start
*
fps
interval
=
interval
*
fps
random_number
=
50
...
...
narratage_detection.py
View file @
fad7c317
...
...
@@ -30,7 +30,7 @@ def detect(video_path, start_time, end_time, book_path, state, subtitle=None):
# 根据用户的选择来确定电影是否有字幕,如果“未知”,则自动检测
if
subtitle
==
0
:
has_subtitle
=
detect_movie
(
video_path
,
start_time
,
180
)
has_subtitle
=
detect_movie
(
video_path
,
start_time
,
end_time
,
180
)
elif
subtitle
==
1
:
has_subtitle
=
True
else
:
...
...
speech_synthesis.py
View file @
fad7c317
...
...
@@ -8,6 +8,7 @@ from azure.cognitiveservices.speech.audio import AudioOutputConfig
import
openpyxl
tmp_file
=
'tmp.wav'
adjusted_wav_path
=
"adjusted.wav"
normal_speed
=
4
normal_interval
=
0.1
...
...
@@ -88,21 +89,31 @@ def get_narratage_text(sheet_content, speed):
start_time
=
sheet_content
[
'起始时间'
]
end_time
=
sheet_content
[
'终止时间'
]
narratage_start_time
=
[]
narratage_end_time
=
[]
narratage_text
=
[]
for
i
,
text
in
enumerate
(
narratage
):
print
(
i
,
text
)
if
text
is
not
None
:
if
text
==
'翻译'
:
narratage_text
.
append
(
subtitle
[
i
])
narratage_start_time
.
append
(
float
(
start_time
[
i
]))
narratage_end_time
.
append
(
float
(
end_time
[
i
]))
else
:
# 如果旁白中有换行符,即分为n段,则按照换行符进行分割,并间隔0.5s
text_split
=
text
.
split
(
'
\n
'
)
cur_start
=
float
(
end_time
[
i
-
1
])
+
0.1
if
i
>
0
else
0
if
subtitle
[
i
]
is
None
:
cur_start
=
float
(
end_time
[
i
-
1
])
+
0.1
if
i
>
0
else
0
cur_end
=
float
(
start_time
[
i
+
1
])
else
:
cur_start
=
float
(
start_time
[
i
])
cur_end
=
float
(
end_time
[
i
])
for
x
in
text_split
:
cur_end
=
max
(
cur_end
,
cur_start
+
(
len
(
x
)
/
normal_speed
+
normal_interval
)
/
speed
)
narratage_text
.
append
(
x
)
narratage_start_time
.
append
(
cur_start
)
narratage_end_time
.
append
(
cur_end
)
cur_start
=
cur_start
+
(
len
(
x
)
/
normal_speed
+
normal_interval
)
/
speed
return
narratage_text
,
narratage_start_time
return
narratage_text
,
narratage_start_time
,
narratage_end_time
def
second_to_str
(
seconds
):
...
...
@@ -135,13 +146,44 @@ def export_caption(sheet_content, caption_file):
f
.
write
(
x
+
"
\n\n
"
)
def
ss_and_export
(
sheet_path
,
output_dir
,
speed
,
caption_file
,
state
):
def
adjust_volume
(
origin
,
start_timestamp
,
end_timestamp
):
global
adjusted_wav_path
adjusted_wav_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
origin
),
adjusted_wav_path
)
n
=
len
(
start_timestamp
)
command_line
=
"ffmpeg -i {} -af
\"
"
.
format
(
origin
)
for
i
in
range
(
n
):
command_line
+=
"volume=enable='between(t,{},{})':volume=0.3"
.
format
(
start_timestamp
[
i
],
end_timestamp
[
i
])
if
i
!=
n
-
1
:
command_line
+=
","
command_line
+=
"
\"
-y {}"
.
format
(
adjusted_wav_path
)
os
.
system
(
command_line
)
def
mix_speech
(
origin
,
narratage_paths
,
start_timestamps
):
composed_wav_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
origin
),
"composed.wav"
)
print
(
composed_wav_path
)
command_line
=
'ffmpeg -i {}'
.
format
(
origin
)
for
i
,
narratage_path
in
enumerate
(
narratage_paths
):
command_line
+=
" -i {}"
.
format
(
narratage_path
)
command_line
+=
" -filter_complex
\"
"
for
i
,
start_timestamp
in
enumerate
(
start_timestamps
):
command_line
+=
"[{}]adelay=delays={}:all=1[aud{}];"
.
format
(
i
+
1
,
int
(
start_timestamp
*
1000
),
i
+
1
)
command_line
+=
"[0]"
command_line
=
command_line
+
""
.
join
([
"[aud{}]"
.
format
(
str
(
i
+
1
))
for
i
in
range
(
len
(
start_timestamps
))])
command_line
+=
"amix=inputs={}
\"
-vsync 2 -y {}"
.
format
(
len
(
start_timestamps
)
+
1
,
composed_wav_path
)
os
.
system
(
command_line
)
print
(
command_line
)
def
ss_and_export
(
video_path
,
sheet_path
,
output_dir
,
speed
,
caption_file
,
state
=
None
):
"""
生成语音并导出字幕
:param video_path: 原视频的位置
:param sheet_path: 校对过的旁白脚本表格文件
:param output_dir: 存放音频文件的
:param speed:
:param caption_file:
:param output_dir: 存放音频文件的文件夹
:param speed: 旁白语速
:param caption_file: 输出的字幕文件存放位置
:param state: 用于与界面中的进度条状态进行通讯
:return:
"""
...
...
@@ -156,34 +198,58 @@ def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
# 读取表格,并获取旁白及对应插入位置
sheet_content
=
read_sheet
(
book_path
)
narratages
,
start_time
point
=
get_narratage_text
(
sheet_content
,
speed
)
narratages
,
start_time
stamp
,
end_timestamp
=
get_narratage_text
(
sheet_content
,
speed
)
export_caption
(
sheet_content
,
caption_file
)
print
(
"已导出
旁白
文件"
)
print
(
"已导出
字幕
文件"
)
narratage_paths
=
[]
# 生成旁白解说语音
for
i
,
text
in
enumerate
(
narratages
):
wav_path
=
os
.
path
.
join
(
root_path
,
'
%.2
f.wav'
%
start_timepoint
[
i
])
wav_path
=
os
.
path
.
join
(
root_path
,
'
%.2
f.wav'
%
start_timestamp
[
i
])
narratage_paths
.
append
(
wav_path
)
speech_synthesis
(
text
,
wav_path
,
speed
)
time
.
sleep
(
1
)
print
(
"目前正在处理{}"
.
format
(
wav_path
))
state
[
0
]
=
float
((
i
+
1
)
/
len
(
narratages
))
if
state
is
not
None
:
state
[
0
]
=
float
((
i
+
1
)
/
len
(
narratages
))
*
0.97
# 合成总音频,并入原视频音频中
# 删除临时语音文件
# 提取原音频
from
split_wav
import
extract_audio
origin_wav_path
=
extract_audio
(
video_path
,
output_dir
,
0
,
-
1
)
# 调整原音频中旁白对应位置的音量
adjust_volume
(
origin_wav_path
,
start_timestamp
,
end_timestamp
)
# 将旁白混入原音频
mix_speech
(
adjusted_wav_path
,
narratage_paths
,
start_timestamp
)
if
state
is
not
None
:
state
[
0
]
=
1.00
# 删除临时语音文件、提取出来的原视频音频以及调整后的视频音频
if
os
.
path
.
exists
(
tmp_file
):
time
.
sleep
(
1
)
os
.
remove
(
tmp_file
)
os
.
remove
(
origin_wav_path
)
os
.
remove
(
adjusted_wav_path
)
if
__name__
==
'__main__'
:
# 定义参数
parser
=
argparse
.
ArgumentParser
(
description
=
'Speech Synthesis guideness'
)
parser
.
add_argument
(
"--output_dir"
,
required
=
True
,
type
=
str
,
help
=
"音频输出位置路径"
)
parser
.
add_argument
(
"--sheet_path"
,
required
=
True
,
type
=
str
,
help
=
'旁白解说表格存储路径'
)
parser
.
add_argument
(
"--caption_file"
,
required
=
True
,
type
=
str
,
help
=
"输出的字幕文件存储路径"
)
parser
.
add_argument
(
"--speed"
,
type
=
float
,
default
=
1.0
,
help
=
"设置语速,默认为1.0"
)
args
=
parser
.
parse_args
()
# parser = argparse.ArgumentParser(description='Speech Synthesis guideness')
# parser.add_argument("--video_path", required=True, type=str, help="原视频位置")
# parser.add_argument("--output_dir", required=True, type=str, help="音频输出位置路径")
# parser.add_argument("--sheet_path", required=True, type=str, help='旁白解说表格存储路径')
# parser.add_argument("--caption_file", required=True, type=str, help="输出的字幕文件存储路径")
# parser.add_argument("--speed", type=float, default=1.0, help="设置语速,默认为1.0")
# args = parser.parse_args()
# video_path, sheet_path, output_dir, speed, caption_file = args.video_path,\
# args.sheet_path, args.output_dir, args.speed, args.caption_file
video_path
=
'D:/heelo/hysxm_3.mp4'
sheet_path
=
'D:/heelo/hysxm_3.xlsx'
output_dir
=
'D:/AddCaption/hysxm_3'
speed
=
1.25
caption_file
=
'D:/AddCaption/hysxm_3/hysxm_3.srt'
# 主函数执行
ss_and_export
(
args
.
output_dir
,
args
.
sheet_path
,
args
.
speed
,
args
.
caption_file
)
ss_and_export
(
video_path
=
video_path
,
sheet_path
=
sheet_path
,
output_dir
=
output_dir
,
speed
=
speed
,
caption_file
=
caption_file
)
split_wav.py
View file @
fad7c317
...
...
@@ -52,7 +52,7 @@ def split_audio():
# 从音频中提取人声
def
extrac_speech
():
def
extrac
t
_speech
():
from
spleeter.audio.adapter
import
AudioAdapter
from
spleeter.separator
import
Separator
separator
=
Separator
(
'spleeter:2stems'
,
multiprocess
=
False
)
...
...
try_with_gui.py
View file @
fad7c317
...
...
@@ -84,12 +84,12 @@ def start_process(p, p_label, state, intervals=100):
while
True
:
# 当前进度不为None且与上一进度不一样且当前进度比进度条的状态要多时,对进度条状态进行更新
if
state
[
0
]
and
state
[
0
]
!=
lastState
and
state
[
0
]
*
100
>
p
[
'value'
]:
p
[
'value'
]
=
int
(
state
[
0
]
*
100
)
p
[
'value'
]
=
round
(
state
[
0
]
*
100
,
2
)
lastState
=
state
[
0
]
p_label
[
'text'
]
=
str
(
int
(
p
[
'value'
]
))
+
"
%
"
if
p
[
'value'
]
==
100
:
p_label
[
'text'
]
=
str
(
round
(
p
[
'value'
],
2
))
+
"
%
"
if
p
[
'value'
]
==
100
.0
:
p
.
stop
()
p
[
'value'
]
=
100
p
[
'value'
]
=
100
.0
break
print
(
"进度条停止"
)
...
...
@@ -184,9 +184,9 @@ def set_caption_file():
设置字幕文件存储路径(使用存放音频的文件夹作为默认文件夹、旁白表格名作为默认字幕名)
:return:
"""
defaultName
=
os
.
path
.
basename
(
narratage
Path
.
get
())
.
split
(
'.'
)[
0
]
+
".srt"
defaultName
=
os
.
path
.
basename
(
video
Path
.
get
())
.
split
(
'.'
)[
0
]
+
".srt"
defaultDir
=
audioDir
.
get
()
caption_path
=
filedialog
.
asksaveasfilename
(
title
=
u'保存文件至'
,
caption_path
=
filedialog
.
asksaveasfilename
(
title
=
u'保存
字幕
文件至'
,
initialdir
=
defaultDir
,
initialfile
=
defaultName
,
filetype
=
[(
'字幕文件'
,
".srt"
)])
...
...
@@ -206,6 +206,7 @@ def start_synthesis():
开始合成语音
:return:
"""
video_path
=
videoPath
.
get
()
audio_dir
=
audioDir
.
get
()
sheet_path
=
narratagePath
.
get
()
speed
=
float
(
audio_speed
.
get
()
.
split
(
'('
)[
0
])
...
...
@@ -236,7 +237,7 @@ def start_synthesis():
threads
=
[
threading
.
Thread
(
target
=
start_process
,
args
=
(
progressbar_2
,
progress_2
,
state
,
100000
),
name
=
"startProgress2"
),
threading
.
Thread
(
target
=
ss_and_export
,
args
=
(
sheet_path
,
audio_dir
,
speed
,
caption_path
,
state
),
name
=
"ssAndExport"
)]
args
=
(
video_path
,
sheet_path
,
audio_dir
,
speed
,
caption_path
,
state
),
name
=
"ssAndExport"
)]
for
t
in
threads
:
t
.
start
()
for
t
in
threads
:
...
...
@@ -380,12 +381,20 @@ stopDetection.config(state=tk.DISABLED)
"""
"""
语音相关设置,包含以下内容:
- 旁白脚本表格|表格路径|上传文件按钮
- 原视频|视频路径|上传文件按钮
- 旁白脚本表格|表格路径|上传文件按钮
- 旁白语速选择
"""
audio_info
=
ttk
.
LabelFrame
(
tab2
,
text
=
" 语音相关设置 "
)
audio_info
.
place
(
relx
=
0.05
,
rely
=
0.05
,
relwidth
=
0.9
,
relheight
=
0.4
)
audio_info
.
place
(
relx
=
0.05
,
rely
=
0.05
,
relwidth
=
0.9
,
relheight
=
0.3
)
video_label
=
ttk
.
Label
(
audio_info
,
text
=
"原视频"
)
video_label
.
grid
(
column
=
0
,
row
=
0
)
videoPath
=
tk
.
StringVar
()
videoPath_input
=
ttk
.
Entry
(
audio_info
,
width
=
30
,
textvariable
=
videoPath
)
videoPath_input
.
grid
(
column
=
1
,
row
=
0
)
upload_button_3
=
ttk
.
Button
(
audio_info
,
text
=
"上传文件"
,
command
=
confirm_video_path
)
upload_button_3
.
grid
(
column
=
2
,
row
=
0
)
narratage_label
=
ttk
.
Label
(
audio_info
,
text
=
"旁白脚本表格"
)
narratage_label
.
grid
(
column
=
0
,
row
=
1
)
...
...
@@ -404,14 +413,6 @@ speedChosen['values'] = (
speedChosen
.
current
(
0
)
speedChosen
.
grid
(
column
=
1
,
row
=
2
,
sticky
=
"W"
)
video_label
=
ttk
.
Label
(
audio_info
,
text
=
"原视频"
)
video_label
.
grid
(
column
=
0
,
row
=
0
)
videoPath
=
tk
.
StringVar
()
videoPath_input
=
ttk
.
Entry
(
audio_info
,
width
=
30
,
textvariable
=
videoPath
)
videoPath_input
.
grid
(
column
=
1
,
row
=
0
)
upload_button_3
=
ttk
.
Button
(
audio_info
,
text
=
"上传文件"
,
command
=
confirm_video_path
)
upload_button_3
.
grid
(
column
=
2
,
row
=
0
)
"""
语音合成步骤,包含以下内容:
- 输出音频存放于|路径文本框|打开文件夹
...
...
@@ -420,7 +421,7 @@ upload_button_3.grid(column=2, row=0)
- 停止合成按钮
"""
synthesis_command
=
ttk
.
LabelFrame
(
tab2
,
text
=
" 语音合成步骤 "
)
synthesis_command
.
place
(
relx
=
0.05
,
rely
=
0.
55
,
relwidth
=
0.9
,
relheight
=
0.4
)
synthesis_command
.
place
(
relx
=
0.05
,
rely
=
0.
45
,
relwidth
=
0.9
,
relheight
=
0.5
)
audioDir_label
=
ttk
.
Label
(
synthesis_command
,
text
=
"输出音频存放于"
)
audioDir_label
.
grid
(
column
=
0
,
row
=
0
)
...
...
@@ -430,7 +431,7 @@ audioDir_input.grid(column=1, row=0)
save_button_2
=
ttk
.
Button
(
synthesis_command
,
text
=
"打开文件夹"
,
command
=
find_save_dir
)
save_button_2
.
grid
(
column
=
2
,
row
=
0
)
caption_label
=
ttk
.
Label
(
synthesis_command
,
text
=
"输出字幕文件"
)
caption_label
=
ttk
.
Label
(
synthesis_command
,
text
=
"输出字幕文件
于
"
)
caption_label
.
grid
(
column
=
0
,
row
=
1
)
captionPath
=
tk
.
StringVar
()
captionPath_input
=
ttk
.
Entry
(
synthesis_command
,
width
=
30
,
textvariable
=
captionPath
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment