Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
accessibility_movie_2
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
赵心治
accessibility_movie_2
Commits
4514a70e
Commit
4514a70e
authored
Nov 16, 2022
by
翟艳秋(20软)
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
合成音频使用用户选中的语速
parent
5d3fc565
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
48 additions
and
42 deletions
+48
-42
assemble_dialog.py
assemble_dialog.py
+4
-3
main_window.py
main_window.py
+6
-7
speech_synthesis.py
speech_synthesis.py
+38
-32
No files found.
assemble_dialog.py
View file @
4514a70e
...
@@ -65,12 +65,13 @@ class Assemble_Dialog(QDialog, Ui_Dialog):
...
@@ -65,12 +65,13 @@ class Assemble_Dialog(QDialog, Ui_Dialog):
print
(
"start_assemble"
)
print
(
"start_assemble"
)
video_path
=
self
.
lineEdit
.
text
()
video_path
=
self
.
lineEdit
.
text
()
# 默认 输出的音频是工程目录+/output
# 默认 输出的音频是工程目录+/output
audio_dir
=
self
.
projectContext
.
project_base_dir
+
"output/"
audio_dir
=
os
.
path
.
join
(
self
.
projectContext
.
project_base_dir
,
"output"
)
sheet_path
=
self
.
lineEdit_2
.
text
()
sheet_path
=
self
.
lineEdit_2
.
text
()
speaker_info
=
self
.
lineEdit_3
.
text
()
speaker_info
=
self
.
lineEdit_3
.
text
()
speed_info
=
self
.
lineEdit_4
.
text
()
speed_info
=
self
.
lineEdit_4
.
text
()
#todo 后续变成常量存起来,或者做成配置
#todo 后续变成常量存起来,或者做成配置
caption_path
=
replace_path_suffix
(
self
.
lineEdit
.
text
(),
".srt"
)
# caption_path = replace_path_suffix(self.lineEdit.text(), ".srt")
caption_path
=
os
.
path
.
join
(
audio_dir
,
os
.
path
.
basename
(
video_path
)
.
split
(
'.'
)[
0
]
+
".srt"
)
print
(
"video_path: "
,
video_path
)
print
(
"video_path: "
,
video_path
)
print
(
"audio_dir: "
,
audio_dir
)
print
(
"audio_dir: "
,
audio_dir
)
...
@@ -78,7 +79,7 @@ class Assemble_Dialog(QDialog, Ui_Dialog):
...
@@ -78,7 +79,7 @@ class Assemble_Dialog(QDialog, Ui_Dialog):
print
(
"speed_info: "
,
speed_info
)
print
(
"speed_info: "
,
speed_info
)
print
(
"caption_path: "
,
caption_path
)
print
(
"caption_path: "
,
caption_path
)
print
(
"speaker_info: "
,
speaker_info
)
print
(
"speaker_info: "
,
speaker_info
)
self
.
start_assemble_signal
.
emit
([
video_path
,
audio_dir
,
sheet_path
,
speed_info
,
caption_path
,
speaker_info
])
self
.
start_assemble_signal
.
emit
([
video_path
,
audio_dir
,
sheet_path
,
speed_info
,
caption_path
,
speaker_info
])
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
app
=
QApplication
(
sys
.
argv
)
app
=
QApplication
(
sys
.
argv
)
...
...
main_window.py
View file @
4514a70e
...
@@ -508,16 +508,15 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -508,16 +508,15 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
export_timer
.
stop
()
self
.
export_timer
.
stop
()
print
(
"===已有线程结束了 in
%
s ==="
%
(
type
))
print
(
"===已有线程结束了 in
%
s ==="
%
(
type
))
self
.
statusbarLabel
.
setText
(
"
%
s完成"
%
(
type
))
self
.
progressBar
.
setValue
(
100
)
self
.
progressLabel
.
setText
(
f
"100
%
"
)
self
.
projectContext
.
nd_process
=
1
for
t
in
self
.
threads
:
for
t
in
self
.
threads
:
if
t
.
exitcode
!=
0
:
if
t
.
exitcode
!=
0
:
print
(
"Exception in"
,
t
.
getName
())
print
(
"Exception in"
,
t
.
getName
())
self
.
show_warning_msg_box
(
"运行出错,请联系开发者处理"
)
self
.
show_warning_msg_box
(
"运行出错,请联系开发者处理"
)
print
(
"当前已有的检测结果"
,
self
.
projectContext
.
all_elements
)
return
return
self
.
statusbarLabel
.
setText
(
"
%
s完成"
%
(
type
))
self
.
progressBar
.
setValue
(
100
)
self
.
progressLabel
.
setText
(
f
"100
%
"
)
self
.
projectContext
.
nd_process
=
1
def
deal_synthesis_callback_slot
(
self
,
threads
,
state
):
def
deal_synthesis_callback_slot
(
self
,
threads
,
state
):
self
.
statusbarLabel
.
setText
(
" 准备合成:"
)
self
.
statusbarLabel
.
setText
(
" 准备合成:"
)
...
@@ -1165,8 +1164,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
...
@@ -1165,8 +1164,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
projectContext
.
save_project
(
False
)
self
.
projectContext
.
save_project
(
False
)
def
export_all
(
self
):
def
export_all
(
self
):
#
暂时存放音频的文件夹被命名为tmp
#
存放合成音频的文件夹被命名为output
output_dir
=
os
.
path
.
join
(
self
.
projectContext
.
project_base_dir
,
"
tmp
"
)
output_dir
=
os
.
path
.
join
(
self
.
projectContext
.
project_base_dir
,
"
output
"
)
if
os
.
path
.
exists
(
output_dir
)
and
len
(
os
.
listdir
(
output_dir
))
>
0
:
if
os
.
path
.
exists
(
output_dir
)
and
len
(
os
.
listdir
(
output_dir
))
>
0
:
self
.
export
.
export_slot
(
self
.
projectContext
.
video_path
,
output_dir
)
self
.
export
.
export_slot
(
self
.
projectContext
.
video_path
,
output_dir
)
else
:
else
:
...
...
speech_synthesis.py
View file @
4514a70e
...
@@ -89,7 +89,6 @@ def speech_synthesis(text: str, output_file: str, speaker: Speaker, speed: float
...
@@ -89,7 +89,6 @@ def speech_synthesis(text: str, output_file: str, speaker: Speaker, speed: float
speaker (Speaker): 说话人
speaker (Speaker): 说话人
speed (float, optional): 指定的音频语速. Defaults to 1.0.
speed (float, optional): 指定的音频语速. Defaults to 1.0.
"""
"""
audio_path
=
tmp_file
speech_config
=
SpeechConfig
(
speech_config
=
SpeechConfig
(
subscription
=
"db34d38d2d3447d482e0f977c66bd624"
,
subscription
=
"db34d38d2d3447d482e0f977c66bd624"
,
region
=
"eastus"
region
=
"eastus"
...
@@ -102,7 +101,6 @@ def speech_synthesis(text: str, output_file: str, speaker: Speaker, speed: float
...
@@ -102,7 +101,6 @@ def speech_synthesis(text: str, output_file: str, speaker: Speaker, speed: float
if
not
os
.
path
.
exists
(
os
.
path
.
dirname
(
output_file
)):
# 如果路径不存在
if
not
os
.
path
.
exists
(
os
.
path
.
dirname
(
output_file
)):
# 如果路径不存在
print
(
"output_file路径不存在,创建:"
,
os
.
path
.
dirname
(
output_file
))
print
(
"output_file路径不存在,创建:"
,
os
.
path
.
dirname
(
output_file
))
os
.
makedirs
(
os
.
path
.
dirname
(
output_file
))
os
.
makedirs
(
os
.
path
.
dirname
(
output_file
))
audio_config
=
AudioOutputConfig
(
filename
=
audio_path
)
synthesizer
=
SpeechSynthesizer
(
speech_config
=
speech_config
,
audio_config
=
None
)
synthesizer
=
SpeechSynthesizer
(
speech_config
=
speech_config
,
audio_config
=
None
)
ssml_string
=
f
"""
ssml_string
=
f
"""
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{speech_config.speech_synthesis_language}">
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{speech_config.speech_synthesis_language}">
...
@@ -169,7 +167,7 @@ def read_sheet(book_path: str, sheet_name: str = "") -> dict:
...
@@ -169,7 +167,7 @@ def read_sheet(book_path: str, sheet_name: str = "") -> dict:
return
sheet_content
return
sheet_content
def
get_narratage_text
(
sheet_content
:
dict
,
speed
:
float
)
->
Tuple
[
list
,
list
,
list
]:
def
get_narratage_text
(
sheet_content
:
dict
)
->
Tuple
[
list
,
list
,
list
]:
"""获取旁白解说文本及起止时间
"""获取旁白解说文本及起止时间
Args:
Args:
...
@@ -183,40 +181,49 @@ def get_narratage_text(sheet_content: dict, speed: float) -> Tuple[list, list, l
...
@@ -183,40 +181,49 @@ def get_narratage_text(sheet_content: dict, speed: float) -> Tuple[list, list, l
subtitle
=
sheet_content
[
'字幕'
]
subtitle
=
sheet_content
[
'字幕'
]
start_time
=
sheet_content
[
'起始时间'
]
start_time
=
sheet_content
[
'起始时间'
]
end_time
=
sheet_content
[
'终止时间'
]
end_time
=
sheet_content
[
'终止时间'
]
speeds
=
sheet_content
[
"语速"
]
narratage_start_time
=
[]
narratage_start_time
=
[]
narratage_end_time
=
[]
narratage_end_time
=
[]
narratage_text
=
[]
narratage_text
=
[]
narratage_speed
=
[]
for
i
,
text
in
enumerate
(
narratage
):
for
i
,
text
in
enumerate
(
narratage
):
# 这里的speed是x.x倍速
speed
=
float
(
speeds
[
i
]
.
split
(
'('
)[
0
])
if
text
is
not
None
:
if
text
is
not
None
:
if
text
==
'翻译'
:
if
text
==
'翻译'
:
narratage_text
.
append
(
subtitle
[
i
])
narratage_text
.
append
(
subtitle
[
i
])
narratage_start_time
.
append
(
float
(
start_time
[
i
]))
narratage_end_time
.
append
(
float
(
end_time
[
i
]))
else
:
else
:
# 如果旁白中有换行符,即分为n段,则按照换行符进行分割,并间隔0.5s
narratage_text
.
append
(
text
)
text_split
=
text
.
split
(
'
\n
'
)
"""以下为之前自动根据表格生成旁白对应起始时间和终止时间的方法,目前不需要了
# 如果旁白有对应的时间戳(是这段大旁白里的特定位置)
"""
if
start_time
[
i
]
is
not
None
and
end_time
[
i
]
is
not
None
:
# # 如果旁白中有换行符,即分为n段,则按照换行符进行分割,并间隔0.5s
cur_start
=
float
(
start_time
[
i
])
# text_split = text.split('\n')
cur_end
=
float
(
end_time
[
i
])
# # 如果旁白有对应的时间戳(是这段大旁白里的特定位置)
elif
subtitle
[
i
]
is
None
:
# if start_time[i] is not None and end_time[i] is not None:
cur_start
=
float
(
end_time
[
i
-
1
])
+
0.1
if
i
>
0
else
0
# cur_start = float(start_time[i])
# 如果是最后一句旁白,后面没有字幕及时间戳了,就先把cur_end置为-1
# cur_end = float(end_time[i])
cur_end
=
float
(
# elif subtitle[i] is None:
start_time
[
i
+
1
])
if
i
+
1
<
len
(
start_time
)
else
-
1
# # 上一个字幕/旁白的终止时间后0.1s
else
:
# cur_start = float(end_time[i - 1]) + normal_interval if i > 0 else 0
# 有字幕,可覆盖字幕
# # 如果是最后一句旁白,后面没有字幕及时间戳了,就先把cur_end置为-1
cur_start
=
float
(
start_time
[
i
])
# cur_end = float(
cur_end
=
float
(
end_time
[
i
])
# start_time[i + 1]) if i + 1 < len(start_time) else -1
for
x
in
text_split
:
# else:
if
len
(
x
)
==
0
:
# # 有字幕,可覆盖字幕
continue
# cur_start = float(start_time[i])
cur_end
=
max
(
cur_end
,
cur_start
+
(
len
(
x
)
/
normal_speed
+
normal_interval
)
/
speed
)
# cur_end = float(end_time[i])
narratage_text
.
append
(
x
)
# for x in text_split:
narratage_start_time
.
append
(
cur_start
)
# if len(x) == 0:
narratage_end_time
.
append
(
cur_end
)
# continue
cur_start
=
cur_start
+
(
len
(
x
)
/
normal_speed
+
normal_interval
)
/
speed
# cur_end = max(cur_end, cur_start + (len(x) / (normal_speed * speed) + normal_interval))
return
narratage_text
,
narratage_start_time
,
narratage_end_time
# narratage_text.append(x)
# narratage_start_time.append(cur_start)
# narratage_end_time.append(cur_end)
# cur_start = cur_start + (len(x) / normal_speed + normal_interval) / speed
narratage_start_time
.
append
(
float
(
start_time
[
i
]))
narratage_end_time
.
append
(
float
(
end_time
[
i
]))
narratage_speed
.
append
(
speed
)
return
narratage_text
,
narratage_start_time
,
narratage_end_time
,
narratage_speed
def
second_to_str
(
seconds
:
float
)
->
str
:
def
second_to_str
(
seconds
:
float
)
->
str
:
...
@@ -331,8 +338,7 @@ def ss_and_export(video_path: str, sheet_path: str, output_dir: str, speed: floa
...
@@ -331,8 +338,7 @@ def ss_and_export(video_path: str, sheet_path: str, output_dir: str, speed: floa
# print("read sheet at time: ", datetime.datetime.now())
# print("read sheet at time: ", datetime.datetime.now())
sheet_content
=
read_sheet
(
book_path
)
sheet_content
=
read_sheet
(
book_path
)
# print("get narratage text at time: ", datetime.datetime.now())
# print("get narratage text at time: ", datetime.datetime.now())
narratages
,
start_timestamp
,
end_timestamp
=
get_narratage_text
(
narratages
,
start_timestamp
,
end_timestamp
,
cur_speed
=
get_narratage_text
(
sheet_content
)
sheet_content
,
speed
)
# print("export caption at time: ", datetime.datetime.now())
# print("export caption at time: ", datetime.datetime.now())
export_caption
(
sheet_content
,
caption_file
)
export_caption
(
sheet_content
,
caption_file
)
print
(
"已导出字幕文件"
)
print
(
"已导出字幕文件"
)
...
@@ -343,7 +349,7 @@ def ss_and_export(video_path: str, sheet_path: str, output_dir: str, speed: floa
...
@@ -343,7 +349,7 @@ def ss_and_export(video_path: str, sheet_path: str, output_dir: str, speed: floa
for
i
,
text
in
enumerate
(
narratages
):
for
i
,
text
in
enumerate
(
narratages
):
wav_path
=
root_path
+
'/
%.2
f.wav'
%
start_timestamp
[
i
]
wav_path
=
root_path
+
'/
%.2
f.wav'
%
start_timestamp
[
i
]
narratage_paths
.
append
(
wav_path
)
narratage_paths
.
append
(
wav_path
)
speech_synthesis
(
text
,
wav_path
,
chosen_speaker
,
speed
)
speech_synthesis
(
text
,
wav_path
,
chosen_speaker
,
cur_speed
[
i
]
)
print
(
"目前正在处理{}"
.
format
(
wav_path
))
print
(
"目前正在处理{}"
.
format
(
wav_path
))
if
state
is
not
None
:
if
state
is
not
None
:
state
[
0
]
=
float
((
i
+
1
)
/
len
(
narratages
))
*
0.97
state
[
0
]
=
float
((
i
+
1
)
/
len
(
narratages
))
*
0.97
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment