Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
accessibility_movie_2
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
赵心治
accessibility_movie_2
Commits
db7d6ee9
Commit
db7d6ee9
authored
Aug 25, 2023
by
陈晓勇(工程师)
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
dev
parent
4733180f
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
214 additions
and
225 deletions
+214
-225
deploy.bat
deploy.bat
+2
-2
detect_with_ocr.py
detect_with_ocr.py
+34
-4
excel_utils.py
excel_utils.py
+8
-6
main_window.py
main_window.py
+0
-0
main_window_ui.py
main_window_ui.py
+17
-8
management.py
management.py
+21
-7
myvideoslider.py
myvideoslider.py
+2
-0
conf.ini
res/conf.ini
+2
-2
speakers.json
res/speakers.json
+1
-140
setting_dialog.py
setting_dialog.py
+10
-7
speech_synthesis.py
speech_synthesis.py
+38
-34
start.py
start.py
+14
-1
start.spec
start.spec
+1
-1
utils.py
utils.py
+64
-13
No files found.
deploy.bat
View file @
db7d6ee9
@echo on
%1 mshta vbscript:CreateObject("Shell.Application").ShellExecute("cmd.exe","/c %~s0 ::","","runas",1)(window.close) && exit
@
REM @
echo on
@REM
%1 mshta vbscript:CreateObject("Shell.Application").ShellExecute("cmd.exe","/c %~s0 ::","","runas",1)(window.close) && exit
cd /d %~dp0
IF EXIST "%PROGRAMFILES(X86)%" (GOTO 64BIT) ELSE (GOTO 32BIT)
...
...
detect_with_ocr.py
View file @
db7d6ee9
...
...
@@ -27,7 +27,7 @@ import difflib
import
re
from
typing
import
Tuple
,
Union
from
utils
import
reverse_time_to_seconds
from
detect_with_asr
import
create_sheet
,
write_to_sheet
from
main_window
import
MainWindow
,
Element
# 字幕的上下边界
...
...
@@ -381,11 +381,26 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
lastConf
=
conf
def
add_to_list
(
mainWindow
:
MainWindow
,
element_type
:
str
,
li
:
list
):
st_time_sec
,
ed_time_sec
,
subtitle
,
suggest
=
li
st_time_sec
,
ed_time_sec
=
str
(
st_time_sec
),
str
(
ed_time_sec
)
# 默认使用配置文件中的语速
speed
=
mainWindow
.
projectContext
.
speaker_speed
aside_head_time
=
float
(
reverse_time_to_seconds
(
mainWindow
.
aside_head_time
))
if
mainWindow
.
aside_head_time
!=
None
else
float
(
0
)
st_time_sec
,
ed_time_sec
,
subtitle
,
suggest
=
li
print
(
">>>>>>>>start time:"
)
print
(
st_time_sec
)
if
(
st_time_sec
!=
None
and
st_time_sec
!=
""
and
aside_head_time
>
float
(
st_time_sec
)):
print
(
">>>>>>need del"
)
print
(
st_time_sec
)
if
not
mainWindow
.
add_head_aside
:
new_element
=
Element
(
'0.00'
,
""
,
""
,
"0/100"
,
""
,
speed
)
mainWindow
.
projectContext
.
aside_list
.
append
(
new_element
)
mainWindow
.
projectContext
.
all_elements
.
append
(
mainWindow
.
projectContext
.
aside_list
[
-
1
])
mainWindow
.
last_aside_index
=
len
(
mainWindow
.
projectContext
.
all_elements
)
-
1
mainWindow
.
add_head_aside
=
True
return
st_time_sec
,
ed_time_sec
=
str
(
st_time_sec
),
str
(
ed_time_sec
)
aside
=
""
i
=
len
(
mainWindow
.
projectContext
.
all_elements
)
if
element_type
==
"字幕"
:
...
...
@@ -393,12 +408,19 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
new_element
.
print_self
()
if
mainWindow
.
last_aside_index
!=
None
and
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
==
""
and
new_element
.
ed_time_sec
!=
""
:
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
=
new_element
.
st_time_sec
if
float
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
)
-
float
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
st_time_sec
)
<
2
:
# print(">>>>>>>>>>>remove short aside")
mainWindow
.
projectContext
.
aside_list
.
remove
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
])
mainWindow
.
projectContext
.
all_elements
.
remove
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
])
mainWindow
.
last_aside_index
=
None
mainWindow
.
projectContext
.
subtitle_list
.
append
(
new_element
)
mainWindow
.
projectContext
.
all_elements
.
append
(
mainWindow
.
projectContext
.
subtitle_list
[
-
1
])
else
:
if
i
==
0
:
st_time_sec
=
"0.01"
else
:
if
mainWindow
.
projectContext
.
all_elements
[
i
-
1
]
.
ed_time_sec
==
""
:
return
st_time_sec
=
"
%.2
f"
%
(
float
(
mainWindow
.
projectContext
.
all_elements
[
i
-
1
]
.
ed_time_sec
)
+
0.01
)
# 因为暂时没有用到ed_time_sec,所以直接赋值空吧
...
...
@@ -407,6 +429,14 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
new_element
.
print_self
()
if
mainWindow
.
last_aside_index
!=
None
and
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
==
""
and
new_element
.
ed_time_sec
!=
""
:
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
=
new_element
.
st_time_sec
if
float
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
ed_time_sec
)
-
float
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
]
.
st_time_sec
)
<
2
:
# print(">>>>>>>>>>>remove short aside")
mainWindow
.
projectContext
.
aside_list
.
remove
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
])
mainWindow
.
projectContext
.
all_elements
.
remove
(
mainWindow
.
projectContext
.
all_elements
[
mainWindow
.
last_aside_index
])
mainWindow
.
last_aside_index
=
None
new_element
.
suggest
=
"0/"
+
new_element
.
suggest
if
(
st_time_sec
!=
None
and
st_time_sec
!=
""
and
aside_head_time
>
float
(
st_time_sec
)):
return
mainWindow
.
projectContext
.
aside_list
.
append
(
new_element
)
mainWindow
.
projectContext
.
all_elements
.
append
(
mainWindow
.
projectContext
.
aside_list
[
-
1
])
mainWindow
.
last_aside_index
=
len
(
mainWindow
.
projectContext
.
all_elements
)
-
1
...
...
excel_utils.py
View file @
db7d6ee9
...
...
@@ -4,7 +4,7 @@ import openpyxl
from
management
import
Element
from
utils
import
reverse_time_to_seconds
from
utils
import
reverse_time_to_seconds
,
get_seconds
def
read_xls
(
file_path
):
print
(
"read_xls"
)
...
...
@@ -28,9 +28,9 @@ def read_xls(file_path):
cell_value
=
str
(
cell_value
)
if
cell_value
!=
None
else
""
# print(cell_value)
if
col_index
==
0
:
start_time
=
reverse_time_to
_seconds
(
cell_value
)
if
cell_value
!=
None
else
""
start_time
=
get
_seconds
(
cell_value
)
if
cell_value
!=
None
else
""
if
col_index
==
1
:
end_time
=
reverse_time_to
_seconds
(
cell_value
)
if
cell_value
!=
None
else
""
end_time
=
get
_seconds
(
cell_value
)
if
cell_value
!=
None
else
""
if
col_index
==
2
:
subtitle
=
cell_value
if
cell_value
!=
None
else
""
if
col_index
==
3
:
...
...
@@ -73,9 +73,9 @@ def read_xlsx(file_path):
cell_value
=
str
(
cell_value
)
if
cell_value
!=
None
else
""
# print(cell_value)
if
col_index
==
0
:
start_time
=
reverse_time_to
_seconds
(
cell_value
)
if
cell_value
!=
None
else
""
start_time
=
get
_seconds
(
cell_value
)
if
cell_value
!=
None
else
""
if
col_index
==
1
:
end_time
=
reverse_time_to
_seconds
(
cell_value
)
if
cell_value
!=
None
else
""
end_time
=
get
_seconds
(
cell_value
)
if
cell_value
!=
None
else
""
if
col_index
==
2
:
subtitle
=
cell_value
if
cell_value
!=
None
else
""
if
col_index
==
3
:
...
...
@@ -102,8 +102,10 @@ def checkLength(elements):
for
element
in
elements
:
# if int("".join(filter(str.isdigit, element.suggest))) < len(element.aside):
# raise Exception("旁白字数没有按照推荐要求")
if
int
(
element
.
suggest
)
<
len
(
element
.
aside
):
if
int
(
element
.
suggest
.
split
(
"/"
)[
1
]
)
<
len
(
element
.
aside
):
raise
Exception
(
"旁白字数没有按照推荐要求"
)
else
:
element
.
suggest
=
str
(
len
(
element
.
aside
))
+
"/"
+
element
.
suggest
.
split
(
"/"
)[
1
]
# elements = read_xlsx("C:/Users/AIA/Desktop/1/121/1.xlsx")
# print(len(elements))
...
...
main_window.py
View file @
db7d6ee9
This diff is collapsed.
Click to expand it.
main_window_ui.py
View file @
db7d6ee9
...
...
@@ -334,6 +334,7 @@ class Ui_MainWindow(object):
self
.
all_tableWidget
.
setObjectName
(
"all_tableWidget"
)
self
.
all_tableWidget
.
setColumnCount
(
0
)
self
.
all_tableWidget
.
setRowCount
(
0
)
# self.all_tableWidget.setStyleSheet(f"QTableWidget::item:selected:enabled {{ background-color: blue; color: white; }}")
self
.
all_tableWidget
.
setSelectionBehavior
(
QtWidgets
.
QAbstractItemView
.
SelectRows
)
self
.
horizontalLayout_4
.
addWidget
(
self
.
all_tableWidget
)
self
.
tabWidget
.
addTab
(
self
.
all_tab
,
""
)
...
...
@@ -459,6 +460,8 @@ class Ui_MainWindow(object):
self
.
menu
.
setObjectName
(
"menu"
)
self
.
menu_2
=
QtWidgets
.
QMenu
(
self
.
menubar
)
self
.
menu_2
.
setObjectName
(
"menu_2"
)
self
.
menu_7
=
QtWidgets
.
QMenu
(
self
.
menubar
)
self
.
menu_7
.
setObjectName
(
"menu_7"
)
# self.menu_3 = QtWidgets.QMenu(self.menubar)
# self.menu_3.setObjectName("menu_3")
self
.
menu_4
=
QtWidgets
.
QMenu
(
self
.
menubar
)
...
...
@@ -499,14 +502,16 @@ class Ui_MainWindow(object):
self
.
action_4
.
setEnabled
(
False
)
self
.
action_5
=
QtWidgets
.
QAction
(
"旁白导入"
,
self
,
triggered
=
self
.
import_excel
)
self
.
action_5
.
setEnabled
(
False
)
self
.
action_6
=
QtWidgets
.
QAction
(
"字幕上边界
++
"
,
self
,
triggered
=
self
.
up_ocr
)
self
.
action_6
=
QtWidgets
.
QAction
(
"字幕上边界
上移
"
,
self
,
triggered
=
self
.
up_ocr
)
self
.
action_6
.
setEnabled
(
True
)
self
.
action_7
=
QtWidgets
.
QAction
(
"字幕上边界
--
"
,
self
,
triggered
=
self
.
down_ocr
)
self
.
action_7
=
QtWidgets
.
QAction
(
"字幕上边界
下移
"
,
self
,
triggered
=
self
.
down_ocr
)
self
.
action_7
.
setEnabled
(
True
)
self
.
action_8
=
QtWidgets
.
QAction
(
"字幕下边界
++
"
,
self
,
triggered
=
self
.
up_ocr_bottom
)
self
.
action_8
=
QtWidgets
.
QAction
(
"字幕下边界
上移
"
,
self
,
triggered
=
self
.
up_ocr_bottom
)
self
.
action_8
.
setEnabled
(
True
)
self
.
action_9
=
QtWidgets
.
QAction
(
"字幕下边界
--
"
,
self
,
triggered
=
self
.
down_ocr_bottom
)
self
.
action_9
=
QtWidgets
.
QAction
(
"字幕下边界
下移
"
,
self
,
triggered
=
self
.
down_ocr_bottom
)
self
.
action_9
.
setEnabled
(
True
)
self
.
action_10
=
QtWidgets
.
QAction
(
"片头旁白定位"
,
self
,
triggered
=
self
.
confirm_head_aside
)
self
.
action_10
.
setEnabled
(
True
)
# self.action_3.setObjectName("action_3")
# self.action_4 = QtWidgets.QAction(MainWindow)
...
...
@@ -542,13 +547,16 @@ class Ui_MainWindow(object):
# self.menu_3.addSeparator()
self
.
menubar
.
addAction
(
self
.
menu
.
menuAction
())
self
.
menubar
.
addAction
(
self
.
menu_2
.
menuAction
())
self
.
menubar
.
addAction
(
self
.
menu_7
.
menuAction
())
self
.
menubar
.
addAction
(
self
.
action_3
)
self
.
menubar
.
addAction
(
self
.
action_4
)
self
.
menubar
.
addAction
(
self
.
action_5
)
self
.
menubar
.
addAction
(
self
.
action_6
)
self
.
menubar
.
addAction
(
self
.
action_7
)
self
.
menubar
.
addAction
(
self
.
action_8
)
self
.
menubar
.
addAction
(
self
.
action_9
)
self
.
menu_7
.
addAction
(
self
.
action_6
)
self
.
menu_7
.
addAction
(
self
.
action_7
)
self
.
menu_7
.
addAction
(
self
.
action_8
)
self
.
menu_7
.
addAction
(
self
.
action_9
)
self
.
menu_7
.
addAction
(
self
.
action_10
)
# self.menubar.addAction(self.menu_5.menuAction())
# self.menubar.addAction(self.menu_6.menuAction())
# self.menubar.addAction(self.menu_3.menuAction())
...
...
@@ -572,6 +580,7 @@ class Ui_MainWindow(object):
self
.
pb_label
.
setText
(
_translate
(
"MainWindow"
,
"刻度"
))
self
.
menu
.
setTitle
(
_translate
(
"MainWindow"
,
"文件"
))
self
.
menu_2
.
setTitle
(
_translate
(
"MainWindow"
,
"编辑"
))
self
.
menu_7
.
setTitle
(
_translate
(
"MainWindow"
,
"旁白检测准备"
))
# self.menu_3.setTitle(_translate("MainWindow", "功能按键"))
self
.
menu_4
.
setTitle
(
_translate
(
"MainWindow"
,
"旁白区间检测"
))
self
.
menu_5
.
setTitle
(
_translate
(
"MainWindow"
,
"旁白音频合成"
))
...
...
management.py
View file @
db7d6ee9
...
...
@@ -9,7 +9,7 @@ import openpyxl
import
constant
from
openpyxl.styles
import
PatternFill
,
Alignment
from
utils
import
replace_path_suffix
,
transfer_second_to_time
,
reverse_time_to_seconds
from
utils
import
replace_path_suffix
,
transfer_second_to_time
,
reverse_time_to_seconds
,
get_seconds
,
transfer_second_to_all_time
from
speech_synthesis
import
Speaker
class
RunThread
(
threading
.
Thread
):
"""复写线程类,用于解决主线程无法捕捉子线程中异常的问题
...
...
@@ -186,7 +186,7 @@ class ProjectContext:
self
.
excel_path
=
info
[
"excel_path"
]
self
.
speaker_info
=
info
[
"speaker_info"
][
"speaker_id"
]
self
.
speaker_speed
=
info
[
"speaker_info"
][
"speaker_speed"
]
self
.
speaker_type
=
info
[
"speaker_info"
][
"speaker_type"
]
if
"speaker_type"
in
info
[
"speaker_info"
]
else
"
科大讯飞
"
self
.
speaker_type
=
info
[
"speaker_info"
][
"speaker_type"
]
if
"speaker_type"
in
info
[
"speaker_info"
]
else
"
浙大内部tts
"
self
.
detected
=
info
[
"detection_info"
][
"detected"
]
self
.
nd_process
=
info
[
"detection_info"
][
"nd_process"
]
self
.
last_time
=
info
[
"detection_info"
][
"last_time"
]
...
...
@@ -232,7 +232,6 @@ class ProjectContext:
# 先备份文件,再覆盖主文件,可选是否需要备份,默认需要备份
# 20221030:添加旁白检测的进度
def
save_project
(
self
,
need_save_new
:
bool
=
False
)
->
str
:
print
(
"22222sava"
)
self
.
save_conf
()
# all_element = sorted(all_element, key=lambda x: float(x.st_time_sec))
print
(
"current excel_path:"
,
self
.
excel_path
)
...
...
@@ -261,7 +260,7 @@ class ProjectContext:
def
refresh_element
(
self
,
row
,
aside
:
str
):
self
.
all_elements
[
int
(
row
)]
.
aside
=
aside
if
not
self
.
initial_ing
:
if
not
self
.
initial_ing
:
save_excel_to_path
(
self
.
all_elements
,
self
.
excel_path
,
self
.
write_header
,
self
.
excel_sheet_name
)
def
refresh_speed
(
self
,
row
,
speed
:
str
)
->
None
:
...
...
@@ -375,8 +374,9 @@ class ProjectContext:
for
speaker
in
content
[
"speaker_zju_details"
]:
speaker_name
.
append
(
","
.
join
([
speaker
[
"name"
],
speaker
[
"gender"
],
speaker
[
"age_group"
]]))
if
self
.
speaker_info
is
None
:
if
self
.
speaker_info
is
None
or
self
.
speaker_info
==
""
:
self
.
speaker_info
=
speaker_name
[
0
]
print
(
">>>>>>>>>>>>>>>>>get all info :"
+
self
.
speaker_info
)
return
tuple
(
speaker_name
)
def
init_speakers
(
self
):
...
...
@@ -457,9 +457,23 @@ def write_to_sheet(path: str, sheet_name: str, valuelist: list):
value
=
[
""
if
x
==
None
else
x
for
x
in
value
]
# value.insert(1, transfer_second_to_time(value[0])) if value[0] != "" else value.insert(1, "")
# value.insert(3, transfer_second_to_time(value[2])) if value[2] != "" else value.insert(3, "")
value
[
0
]
=
transfer_second_to_time
(
value
[
0
])
if
value
[
0
]
!=
""
else
""
value
[
1
]
=
transfer_second_to_time
(
value
[
1
])
if
value
[
1
]
!=
""
else
""
# value[0] = get_seconds(value[0]) if value[0] != "" else ""
# value[1] = get_seconds(value[1]) if value[1] != "" else ""
value
[
0
]
=
transfer_second_to_all_time
(
value
[
0
])
if
value
[
0
]
!=
""
else
""
value
[
1
]
=
transfer_second_to_all_time
(
value
[
1
])
if
value
[
1
]
!=
""
else
""
suggest
=
value
[
3
]
print
(
">>>>>>>>>>>>>>>>>>>suggest:"
+
suggest
)
if
suggest
!=
None
and
suggest
!=
""
:
arrays
=
suggest
.
split
(
"/"
)
if
len
(
arrays
)
==
2
:
value
[
3
]
=
str
(
len
(
value
[
4
]))
+
"/"
+
arrays
[
1
]
print
(
">>>>>>>>>>>>v3:"
+
value
[
3
])
else
:
value
[
3
]
=
str
(
len
(
value
[
4
]))
+
"/"
+
arrays
[
0
]
print
(
">>>>>>>>>>>>v3:"
+
value
[
3
])
index
=
len
(
value
)
cur_row
=
sheet
.
max_row
for
j
in
range
(
0
,
index
):
...
...
myvideoslider.py
View file @
db7d6ee9
...
...
@@ -13,3 +13,4 @@ class myVideoSlider(QSlider):
# self.setValue(int(value)/9)
value
=
round
(
value
/
self
.
width
()
*
self
.
maximum
())
# 根据鼠标点击的位置和slider的长度算出百分比
self
.
ClickedValue
.
emit
(
value
)
self
.
setFocus
()
\ No newline at end of file
res/conf.ini
View file @
db7d6ee9
{"video_path":
null,
"excel_path":
null,
"detection_info":
{"detected":
false,
"nd_process":
0.0,
"last_time":
0.0,
"caption_boundings":
[]
,
"has_subtitle":
true},
"speaker_info":
{"speaker_type":
"\u6d59\u5927\u5185\u90e8tts",
"speaker_id":
"eagle\uff0c\u5973\uff0c\u5e74\u8f7b\u4eba",
"speaker_speed":
"1.00(4\u5b57/\u79d2)"}}
\ No newline at end of file
{"video_path":
null,
"excel_path":
null,
"detection_info":
{"detected":
false,
"nd_process":
0.0,
"last_time":
0.0,
"caption_boundings":
[]
,
"has_subtitle":
true},
"speaker_info":
{"speaker_type":
"",
"speaker_id":
"eagle\uff0c\u5973\uff0c\u5e74\u8f7b\u4eba",
"speaker_speed":
"1.00(4\u5b57/\u79d2)"}}
\ No newline at end of file
res/speakers.json
View file @
db7d6ee9
{
"speaker_details"
:
[{
"id"
:
0
,
"name"
:
"晓辰"
,
"language"
:
"中文(普通话,简体)"
,
"age_group"
:
"年轻人"
,
"gender"
:
"女"
,
"description"
:
"休闲、放松的语音,用于自发性对话和会议听录。"
,
"audio_path"
:
"./res/speaker_audio/Xiaochen.wav"
,
"speaker_code"
:
"zh-CN-XiaochenNeural"
},
{
"id"
:
1
,
"name"
:
"晓涵"
,
"language"
:
"中文(普通话,简体)"
,
"age_group"
:
"年轻人"
,
"gender"
:
"女"
,
"description"
:
"温暖、甜美、富有感情的声音,可用于许多对话场景。"
,
"audio_path"
:
"./res/speaker_audio/Xiaohan.wav"
,
"speaker_code"
:
"zh-CN-XiaohanNeural"
},
{
"id"
:
2
,
"name"
:
"晓墨"
,
"language"
:
"中文(普通话,简体)"
,
"age_group"
:
"年轻人"
,
"gender"
:
"女"
,
"description"
:
"清晰、放松的声音,具有丰富的角色扮演和情感,适合音频书籍。"
,
"audio_path"
:
"./res/speaker_audio/Xiaomo.wav"
,
"speaker_code"
:
"zh-CN-XiaomoNeural"
},
{
"id"
:
7
,
"name"
:
"晓晓"
,
"language"
:
"中文(普通话,简体)"
,
"age_group"
:
"年轻人"
,
"gender"
:
"女"
,
"description"
:
"活泼、温暖的声音,具有多种场景风格和情感。"
,
"audio_path"
:
"./res/speaker_audio/Xiaoxiao.wav"
,
"speaker_code"
:
"zh-CN-XiaoxiaoNeural"
},
{
"id"
:
8
,
"name"
:
"晓萱"
,
"language"
:
"中文(普通话,简体)"
,
"age_group"
:
"年轻人"
,
"gender"
:
"女"
,
"description"
:
"自信、有能力的声音,具有丰富的角色扮演和情感,适合音频书籍。"
,
"audio_path"
:
"./res/speaker_audio/Xiaoxuan.wav"
,
"speaker_code"
:
"zh-CN-XiaoxuanNeural"
},
{
"id"
:
9
,
"name"
:
"晓颜"
,
"language"
:
"中文(普通话,简体)"
,
"age_group"
:
"年轻人"
,
"gender"
:
"女"
,
"description"
:
"训练有素、舒适的语音,用于客户服务和对话场景。"
,
"audio_path"
:
"./res/speaker_audio/Xiaoyan.wav"
,
"speaker_code"
:
"zh-CN-XiaoyanNeural"
},
{
"id"
:
3
,
"name"
:
"晓秋"
,
"language"
:
"中文(普通话,简体)"
,
"age_group"
:
"中年人"
,
"gender"
:
"女"
,
"description"
:
"智能、舒适的语音,适合阅读长内容。"
,
"audio_path"
:
"./res/speaker_audio/Xiaoqiu.wav"
,
"speaker_code"
:
"zh-CN-XiaoqiuNeural"
},
{
"id"
:
4
,
"name"
:
"晓秋"
,
"language"
:
"中文(普通话,简体)"
,
"age_group"
:
"中年人"
,
"gender"
:
"女"
,
"description"
:
"智能、舒适的语音,适合阅读长内容。"
,
"audio_path"
:
"./res/speaker_audio/Xiaoqiu.wav"
,
"speaker_code"
:
"zh-CN-XiaoqiuNeural"
},
{
"id"
:
5
,
"name"
:
"晓睿"
,
"language"
:
"中文(普通话,简体)"
,
"age_group"
:
"老年"
,
"gender"
:
"女"
,
"description"
:
"成熟、睿智的声音,具有丰富的情感,适合音频书籍。"
,
"audio_path"
:
"./res/speaker_audio/Xiaorui.wav"
,
"speaker_code"
:
"zh-CN-XiaoruiNeural"
},
{
"id"
:
6
,
"name"
:
"晓双"
,
"language"
:
"中文(普通话,简体)"
,
"age_group"
:
"儿童"
,
"gender"
:
"女"
,
"description"
:
"可爱、愉悦的语音,可应用于许多儿童相关场景。"
,
"audio_path"
:
"./res/speaker_audio/Xiaoshuang.wav"
,
"speaker_code"
:
"zh-CN-XiaoshuangNeural"
},
{
"id"
:
10
,
"name"
:
"晓悠"
,
"language"
:
"中文(普通话,简体)"
,
"age_group"
:
"儿童"
,
"gender"
:
"女"
,
"description"
:
"天使般的清晰声音,可以应用于许多儿童相关场景。"
,
"audio_path"
:
"./res/speaker_audio/Xiaoyou.wav"
,
"speaker_code"
:
"zh-CN-XiaoyouNeural"
},
{
"id"
:
11
,
"name"
:
"云希"
,
"language"
:
"中文(普通话,简体)"
,
"age_group"
:
"年轻人"
,
"gender"
:
"男"
,
"description"
:
"活泼、阳光的声音,具有丰富的情感,可用于许多对话场景。"
,
"audio_path"
:
"./res/speaker_audio/Yunxi.wav"
,
"speaker_code"
:
"zh-CN-YunxiNeural"
},
{
"id"
:
12
,
"name"
:
"云扬"
,
"language"
:
"中文(普通话,简体)"
,
"age_group"
:
"年轻人"
,
"gender"
:
"男"
,
"description"
:
"专业、流利的声音,具有多种场景风格。"
,
"audio_path"
:
"./res/speaker_audio/Yunyang.wav"
,
"speaker_code"
:
"zh-CN-YunyangNeural"
},
{
"id"
:
13
,
"name"
:
"云野"
,
"language"
:
"中文(普通话,简体)"
,
"age_group"
:
"中年人"
,
"gender"
:
"男"
,
"description"
:
"成熟、放松的声音,具有多种情感,适合音频书籍。"
,
"audio_path"
:
"./res/speaker_audio/Yunye.wav"
,
"speaker_code"
:
"zh-CN-YunyeNeural"
}
"speaker_details"
:
[
],
"speaker_zju_details"
:
[{
"id"
:
0
,
...
...
setting_dialog.py
View file @
db7d6ee9
...
...
@@ -39,6 +39,7 @@ class Setting_Dialog(QDialog, Ui_Dialog):
self
.
comboBox_2
.
clear
()
# todo 把所有说话人都加上来
self
.
speaker_li
=
projectContext
.
get_all_speaker_info
()
# self.speaker_li = []
self
.
speaker_zju_li
=
projectContext
.
get_all_speaker_zju_info
()
#本地tts
self
.
speed_list_zju
=
[
"1.00(4字/秒)"
,
"1.10(4.5字/秒)"
,
"1.25(5字/秒)"
,
"1.50(6字/秒)"
,
"1.75(7字/秒)"
,
"2.00(8字/秒)"
,
"2.50(10字/秒)"
]
#本地tts
...
...
@@ -46,7 +47,8 @@ class Setting_Dialog(QDialog, Ui_Dialog):
# self.comboBox.addItem(i)
self
.
speed_li_2
=
[
"1.00(4字/秒)"
,
"1.10(4.5字/秒)"
,
"1.25(5字/秒)"
,
"1.50(6字/秒)"
,
"1.75(7字/秒)"
,
"2.00(8字/秒)"
,
"2.50(10字/秒)"
]
# self.comboBox_2.addItems(self.speed_li_2)
self
.
speaker_types
=
[
"科大讯飞"
,
"浙大内部tts"
]
# self.speaker_types = ["科大讯飞", "浙大内部tts"]
self
.
speaker_types
=
[
"浙大内部tts"
]
self
.
comboBox_0
.
addItems
(
self
.
speaker_types
)
print
(
projectContext
.
speaker_type
)
if
projectContext
.
speaker_type
is
None
or
projectContext
.
speaker_type
==
""
:
...
...
@@ -54,7 +56,7 @@ class Setting_Dialog(QDialog, Ui_Dialog):
else
:
self
.
comboBox_0
.
setCurrentIndex
(
self
.
speaker_types
.
index
(
projectContext
.
speaker_type
))
if
self
.
comboBox_0
.
currentIndex
()
=
=
0
:
#讯飞
if
self
.
comboBox_0
.
currentIndex
()
!
=
0
:
#讯飞
self
.
comboBox
.
addItems
(
self
.
speaker_li
)
self
.
comboBox_2
.
addItems
(
self
.
speed_li_2
)
else
:
...
...
@@ -67,13 +69,13 @@ class Setting_Dialog(QDialog, Ui_Dialog):
self
.
comboBox
.
setCurrentIndex
(
0
)
else
:
print
(
projectContext
.
speaker_info
)
self
.
comboBox
.
setCurrentIndex
(
self
.
speaker_li
.
index
(
projectContext
.
speaker_info
)
if
self
.
comboBox_0
.
currentIndex
()
=
=
0
else
self
.
speaker_zju_li
.
index
(
projectContext
.
speaker_info
))
self
.
comboBox
.
setCurrentIndex
(
self
.
speaker_li
.
index
(
projectContext
.
speaker_info
)
if
self
.
comboBox_0
.
currentIndex
()
!
=
0
else
self
.
speaker_zju_li
.
index
(
projectContext
.
speaker_info
))
print
(
projectContext
.
speaker_speed
)
if
projectContext
.
speaker_speed
is
None
or
projectContext
.
speaker_speed
==
""
:
self
.
comboBox_2
.
setCurrentIndex
(
0
)
else
:
self
.
comboBox_2
.
setCurrentIndex
(
self
.
speed_li_2
.
index
(
projectContext
.
speaker_speed
)
if
self
.
comboBox_0
.
currentIndex
()
=
=
0
else
self
.
speed_list_zju
.
index
(
projectContext
.
speaker_speed
))
self
.
comboBox_2
.
setCurrentIndex
(
self
.
speed_li_2
.
index
(
projectContext
.
speaker_speed
)
if
self
.
comboBox_0
.
currentIndex
()
!
=
0
else
self
.
speed_list_zju
.
index
(
projectContext
.
speaker_speed
))
finally
:
self
.
refresh_flag
=
False
...
...
@@ -84,7 +86,8 @@ class Setting_Dialog(QDialog, Ui_Dialog):
self
.
comboBox
.
clear
()
self
.
comboBox_2
.
clear
()
self
.
projectContext
.
speaker_type
=
self
.
comboBox_0
.
currentText
()
if
self
.
comboBox_0
.
currentIndex
()
==
0
:
# if self.comboBox_0.currentIndex() ==0:
if
self
.
comboBox_0
.
currentIndex
()
!=
0
:
print
(
"讯飞"
)
self
.
comboBox
.
addItems
(
self
.
speaker_li
)
self
.
comboBox_2
.
addItems
(
self
.
speed_li_2
)
...
...
@@ -106,12 +109,12 @@ class Setting_Dialog(QDialog, Ui_Dialog):
if
self
.
projectContext
.
speaker_info
is
None
or
self
.
projectContext
.
speaker_info
==
""
:
self
.
comboBox
.
setCurrentIndex
(
0
)
else
:
self
.
comboBox
.
setCurrentIndex
(
self
.
speaker_li
.
index
(
self
.
projectContext
.
speaker_info
)
if
self
.
comboBox_0
.
currentIndex
()
=
=
0
else
self
.
speaker_zju_li
.
index
(
self
.
projectContext
.
speaker_info
))
self
.
comboBox
.
setCurrentIndex
(
self
.
speaker_li
.
index
(
self
.
projectContext
.
speaker_info
)
if
self
.
comboBox_0
.
currentIndex
()
!
=
0
else
self
.
speaker_zju_li
.
index
(
self
.
projectContext
.
speaker_info
))
if
self
.
projectContext
.
speaker_speed
is
None
or
self
.
projectContext
.
speaker_speed
==
""
:
self
.
comboBox_2
.
setCurrentIndex
(
0
)
else
:
self
.
comboBox_2
.
setCurrentIndex
(
self
.
speed_li_2
.
index
(
self
.
projectContext
.
speaker_speed
)
if
self
.
comboBox_0
.
currentIndex
()
=
=
0
else
self
.
speed_list_zju
.
index
(
self
.
projectContext
.
speaker_speed
))
self
.
comboBox_2
.
setCurrentIndex
(
self
.
speed_li_2
.
index
(
self
.
projectContext
.
speaker_speed
)
if
self
.
comboBox_0
.
currentIndex
()
!
=
0
else
self
.
speed_list_zju
.
index
(
self
.
projectContext
.
speaker_speed
))
def
speaker_change_slot
(
self
):
"""切换说话人
...
...
speech_synthesis.py
View file @
db7d6ee9
...
...
@@ -23,8 +23,8 @@ from typing import Tuple
import
datetime
import
numpy
as
np
from
azure.cognitiveservices.speech
import
SpeechConfig
,
SpeechSynthesizer
,
ResultReason
,
AudioDataStream
from
azure.cognitiveservices.speech.audio
import
AudioOutputConfig
#
from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer, ResultReason, AudioDataStream
#
from azure.cognitiveservices.speech.audio import AudioOutputConfig
import
openpyxl
import
shutil
from
vits_chinese
import
tts
...
...
@@ -82,8 +82,11 @@ def choose_speaker(speaker_name: str) -> Speaker:
Returns:
Speaker: 返回对应说话人,如果没有这个说话人则报错
"""
print
(
">>>>>>>>>>>>>speakerName:"
+
speaker_name
)
for
speaker
in
speakers
:
print
(
">>>>>>>>>>>>>speaker:"
+
speaker
.
name
)
if
speaker
.
name
==
speaker_name
:
return
speaker
raise
ValueError
...
...
@@ -106,41 +109,42 @@ def speech_synthesis(text: str, output_file: str, speaker: Speaker, speed: float
if
speaker
.
speaker_type
!=
None
and
speaker
.
speaker_type
==
"1"
:
tts
(
text
,
speed
,
output_file
)
else
:
speech_config
=
SpeechConfig
(
subscription
=
"db34d38d2d3447d482e0f977c66bd624"
,
region
=
"eastus"
)
print
(
"1"
)
# speech_config = SpeechConfig(
# subscription="db34d38d2d3447d482e0f977c66bd624",
# region="eastus"
# )
speech_config
.
speech_synthesis_language
=
"zh-CN"
speech_config
.
speech_synthesis_voice_name
=
speaker
.
speaker_code
#
speech_config.speech_synthesis_language = "zh-CN"
#
speech_config.speech_synthesis_voice_name = speaker.speaker_code
# 先把合成的语音文件输出得到tmp.wav中,便于可能的调速需求
#
#
先把合成的语音文件输出得到tmp.wav中,便于可能的调速需求
synthesizer
=
SpeechSynthesizer
(
speech_config
=
speech_config
,
audio_config
=
None
)
ssml_string
=
f
"""
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{speech_config.speech_synthesis_language}">
<voice name="{speaker.speaker_code}">
<prosody rate="{round((speed - 1.0) * 100, 2)}
%
">
{text}
</prosody>
</voice>
</speak>"""
result
=
synthesizer
.
speak_ssml_async
(
ssml_string
)
.
get
()
stream
=
AudioDataStream
(
result
)
stream
.
save_to_wav_file
(
output_file
)
print
(
result
.
reason
)
while
result
.
reason
==
ResultReason
.
Canceled
:
cancellation_details
=
result
.
cancellation_details
print
(
"取消的原因"
,
cancellation_details
.
reason
,
cancellation_details
.
error_details
)
time
.
sleep
(
1
)
synthesizer
.
stop_speaking
()
del
synthesizer
synthesizer
=
SpeechSynthesizer
(
speech_config
=
speech_config
,
audio_config
=
None
)
result
=
synthesizer
.
speak_ssml_async
(
ssml_string
)
.
get
()
stream
=
AudioDataStream
(
result
)
stream
.
save_to_wav_file
(
output_file
)
print
(
result
.
reason
)
#
synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None)
#
ssml_string = f"""
#
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{speech_config.speech_synthesis_language}">
#
<voice name="{speaker.speaker_code}">
#
<prosody rate="{round((speed - 1.0) * 100, 2)}%">
#
{text}
#
</prosody>
#
</voice>
#
</speak>"""
#
result = synthesizer.speak_ssml_async(ssml_string).get()
#
stream = AudioDataStream(result)
#
stream.save_to_wav_file(output_file)
#
print(result.reason)
#
while result.reason == ResultReason.Canceled:
#
cancellation_details = result.cancellation_details
#
print("取消的原因", cancellation_details.reason, cancellation_details.error_details)
#
time.sleep(1)
#
synthesizer.stop_speaking()
#
del synthesizer
#
synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None)
#
result = synthesizer.speak_ssml_async(ssml_string).get()
#
stream = AudioDataStream(result)
#
stream.save_to_wav_file(output_file)
#
print(result.reason)
# detached
def
change_speed_and_volume
(
wav_path
:
str
,
speed
:
float
=
1.0
):
...
...
start.py
View file @
db7d6ee9
...
...
@@ -56,7 +56,20 @@ def change_project_path(path):
if
__name__
==
'__main__'
:
try
:
# subprocess.call(['deploy.bat'])
if
not
is_lav_filters_installed
()
or
not
is_file_copyed
():
# if not is_lav_filters_installed() or not is_file_copyed():
# QCoreApplication.setAttribute(Qt.AA_EnableHighDpiScaling)
# QCoreApplication.setAttribute(Qt.AA_UseHighDpiPixmaps)
# app = QApplication(sys.argv)
# app.setWindowIcon(QIcon("./res/images/eagle_2.ico"))
# apply_stylesheet(app, theme='dark_amber.xml')
# mainWindow = MainWindow(project_path)
# QtWidgets.QMessageBox.critical(mainWindow,'警告','视频解码器未正常安装',QtWidgets.QMessageBox.Yes)
# else:
# QCoreApplication.setAttribute(Qt.AA_EnableHighDpiScaling)
# QCoreApplication.setAttribute(Qt.AA_UseHighDpiPixmaps)
# currentExitCode = MainWindow.EXIT_CODE_REBOOT
if
not
os
.
path
.
exists
(
"C:
\
LavFilters"
)
and
not
os
.
path
.
exists
(
"C:
\
Program Files (x86)
\
LAV Filters"
):
QCoreApplication
.
setAttribute
(
Qt
.
AA_EnableHighDpiScaling
)
QCoreApplication
.
setAttribute
(
Qt
.
AA_UseHighDpiPixmaps
)
app
=
QApplication
(
sys
.
argv
)
...
...
start.spec
View file @
db7d6ee9
...
...
@@ -3,7 +3,7 @@
block_cipher
=
None
env_dir
=
'C:/Users/AIA/.conda/envs/testmovie/Lib/site-packages/'
missingPkgs
=
[
'Microsoft.CognitiveServices.Speech.core.dll'
,
'decorator.py'
,
'google'
,
'paddle
'
,
'paddle
ocr'
,
'PIL'
,
'requests'
,
'urllib3'
,
'http'
,
'idna'
,
'certifi'
,
'setuptools'
,
'astor'
,
'charset_normalizer'
]
missingPkgs
=
[
'Microsoft.CognitiveServices.Speech.core.dll'
,
'decorator.py'
,
'google'
,
'paddleocr'
,
'PIL'
,
'requests'
,
'urllib3'
,
'http'
,
'idna'
,
'certifi'
,
'setuptools'
,
'astor'
,
'charset_normalizer'
]
def
add_missing_packages
(
lst
):
pkgs
=
[]
...
...
utils.py
View file @
db7d6ee9
...
...
@@ -38,6 +38,29 @@ def validate_and_get_filepath(file_info) -> Tuple[str, bool]:
return
""
,
False
return
file_info
[
0
][
0
],
True
def
get_seconds
(
time_str
:
str
):
try
:
# print(">>>>>>>>>>reverse time")
# print(time_str)
if
time_str
is
None
or
time_str
==
""
:
return
time_str
parts
=
time_str
.
split
(
":"
)
if
len
(
parts
)
!=
3
:
return
time_str
hour
=
int
(
parts
[
0
])
minutes
=
int
(
parts
[
1
])
seconds_parts
=
parts
[
2
]
.
split
(
"."
)
if
len
(
seconds_parts
)
!=
2
:
return
time_str
seconds
=
int
(
seconds_parts
[
0
])
milliseconds
=
int
(
seconds_parts
[
1
])
total_seconds
=
hour
*
3600
+
minutes
*
60
+
seconds
+
milliseconds
/
1000
return
str
(
total_seconds
)
except
Exception
as
e
:
print
(
e
)
return
time_str
def
trans_to_seconds
(
timePoint
:
str
)
->
float
:
"""将用户输入的时间字符串转换为秒数
...
...
@@ -47,17 +70,43 @@ def trans_to_seconds(timePoint: str) -> float:
Returns:
float: 时间字符串对应秒数
"""
time_in_seconds
=
0
timePoints
=
timePoint
.
split
(
':'
)
units
=
1
for
i
in
range
(
len
(
timePoints
)
-
1
,
-
1
,
-
1
):
time_in_seconds
+=
units
*
float
(
timePoints
[
i
])
units
*=
60
return
time_in_seconds
# time_in_seconds = 0
# timePoints = timePoint.split(':')
# units = 1
# for i in range(len(timePoints) - 1, -1, -1):
# time_in_seconds += units * float(timePoints[i])
# units *= 60
# return time_in_seconds
try
:
return
float
(
reverse_time_to_seconds
(
timePoint
))
except
Exception
as
e
:
return
None
def
transfer_second_to_time
(
sec
:
str
)
->
str
:
"""将秒数转换为"hh:mm:ss.xxx"格式的时间字符串
Args:
sec (str): 待转换的描述
Returns:
str: "hh:mm:ss.xxx"格式的时间字符串
"""
try
:
duration
=
int
(
float
(
sec
))
hour
=
int
(
duration
/
3600
)
minutes
=
int
((
duration
%
3600
)
/
60
)
seconds
=
int
(
duration
%
60
)
# msec = round((float(sec) - hour * 3600 - minutes * 60 - seconds) * 1000)
# time = "%02d:%02d:%02d.%03d" % (hour, minutes, seconds, msec)
time
=
"
%02
d:
%02
d:
%02
d"
%
(
hour
,
minutes
,
seconds
)
return
time
except
Exception
as
e
:
print
(
e
)
return
sec
def
transfer_second_to_all_time
(
sec
:
str
)
->
str
:
"""将秒数转换为"hh:mm:ss.xxx"格式的时间字符串
Args:
sec (str): 待转换的描述
...
...
@@ -71,6 +120,7 @@ def transfer_second_to_time(sec: str) -> str:
seconds
=
int
(
duration
%
60
)
msec
=
round
((
float
(
sec
)
-
hour
*
3600
-
minutes
*
60
-
seconds
)
*
1000
)
time
=
"
%02
d:
%02
d:
%02
d.
%03
d"
%
(
hour
,
minutes
,
seconds
,
msec
)
# time = "%02d:%02d:%02d" % (hour, minutes, seconds)
return
time
except
Exception
as
e
:
print
(
e
)
...
...
@@ -97,13 +147,13 @@ def reverse_time_to_seconds(time_str: str) -> str:
hour
=
int
(
parts
[
0
])
minutes
=
int
(
parts
[
1
])
seconds_parts
=
parts
[
2
]
.
split
(
"."
)
if
len
(
seconds_parts
)
!=
2
:
return
time_str
# if len(seconds_parts) != 2:
# return time_str
seconds
=
int
(
seconds_parts
[
0
])
milliseconds
=
int
(
seconds_parts
[
1
])
#
milliseconds = int(seconds_parts[1])
total_seconds
=
hour
*
3600
+
minutes
*
60
+
seconds
+
milliseconds
/
1000
# total_seconds = hour * 3600 + minutes * 60 + seconds + milliseconds / 1000
total_seconds
=
hour
*
3600
+
minutes
*
60
+
seconds
print
(
str
(
total_seconds
))
return
str
(
total_seconds
)
except
Exception
as
e
:
...
...
@@ -189,7 +239,8 @@ def get_progress_with_cmd(cmd: str, state=None):
if
result
is
not
None
:
elapsed_time
=
result
.
groupdict
()[
'time'
]
# 此处可能会出现进度超过100%,未对数值进行纠正
progress
=
trans_to_seconds
(
elapsed_time
)
/
trans_to_seconds
(
duration
)
# progress = trans_to_seconds(elapsed_time) / trans_to_seconds(duration)
progress
=
float
(
get_seconds
(
elapsed_time
))
/
float
(
get_seconds
(
duration
))
state
[
0
]
=
pre
+
progress
*
0.2
print
(
elapsed_time
)
print
(
progress
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment