Commit b3f15065 authored by xuanweiace's avatar xuanweiace

Merge branch 'feat_1' of http://gitlab.uiiai.com/xuanweiace/accessibility_movie_2 into feat_1

Conflicts: main_window.py
parents d2fb7787 a5f2f081
......@@ -17,11 +17,12 @@ class Detect_Dialog(QDialog, Ui_Dialog):
self.setupUi(self)
self.setWindowTitle("检测")
self.buttonBox.button(QDialogButtonBox.StandardButton.Ok).setText("开始检测")
self.buttonBox.button(QDialogButtonBox.StandardButton.Cancel).setText("取消")
self.pushButton.clicked.connect(self.openFile)
self.pushButton_2.clicked.connect(self.openTableFile)
self.buttonBox.button(QDialogButtonBox.StandardButton.Ok).clicked.connect(self.start_detect)
def openFile(self):
file_info = QFileDialog.getOpenFileNames(self, '选择视频', os.getcwd(), "MP4(*.mp4);;Text Files(*.txt)")
file_info = QFileDialog.getOpenFileNames(self, '选择视频', os.getcwd(), "Video Files(*.mp4 *.rmvb *mkv *avi)")
file_name, ok = validate_and_get_filepath(file_info)
if ok:
self.lineEdit.setText(file_name)
......
......@@ -67,7 +67,7 @@ def get_position(video_path: str, start_time: float) -> Tuple[float, float]:
# cv2.imshow('img', gray)
# cv2.imshow(img)
cnt += 1
if img is None or cnt > 1000:
if img is None or cnt > 10000:
break
if cnt % int(fps / 3) != 0:
continue
......@@ -238,16 +238,18 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
start_time = 0
end_time = 0
video.set(cv2.CAP_PROP_POS_MSEC, begin * 1000)
pre_state = state[0]
while True:
_, frame = video.read()
if frame is None:
break
cnt += 1
cur_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
# 判断当前帧是否已超限制
if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 > end:
if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time > 1:
if cur_time > end:
if cur_time - end_time > 1:
print('--------------------------------------------------')
recommend_lens = int((video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time) * normal_speed)
recommend_lens = int((cur_time - end_time) * normal_speed)
# write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白,推荐字数为%d' % recommend_lens])
......@@ -258,17 +260,23 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
break
# 每秒取4帧画面左右
if cnt % int(fps / 4) == 0:
state[0] = float((video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - begin) / (end - begin)) \
if state[0] is None or state[0] < 0.99 else 0.99
# 更新当前工程的检测进度
if pre_state is None:
state[0] = float((cur_time - begin) / (end - begin))
else:
state[0] = min(0.9999, pre_state + float((cur_time - begin) / (end - begin)))
mainWindow.projectContext.nd_process = state[0]
mainWindow.projectContext.last_time = cur_time
subTitle = detect_subtitle(frame)
if subTitle is not None:
subTitle = normalize(subTitle)
# 第一次找到字幕
if lastSubTitle is None and subTitle is not None:
start_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
start_time = cur_time
# 字幕消失
elif lastSubTitle is not None and subTitle is None:
end_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
end_time = cur_time
res.append([start_time, end_time, lastSubTitle])
if len(res) == 1 or res[-1][0] - res[-2][1] >= 1:
print('--------------------------------------------------')
......@@ -279,11 +287,11 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
print(start_time, end_time, lastSubTitle)
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list(mainWindow,"字幕", [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
# 两句话连在一起,但是两句话不一样
add_to_list(mainWindow, "字幕", [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
elif lastSubTitle is not None and subTitle is not None:
# 两句话连在一起,但是两句话不一样
if string_similar(lastSubTitle, subTitle) < 0.7:
end_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
end_time = cur_time
res.append([start_time, end_time, lastSubTitle])
if len(res) == 1 or res[-1][0] - res[-2][1] >= 1:
print('--------------------------------------------------')
......@@ -343,12 +351,17 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
book_name_xlsx = book_path
sheet_name_xlsx = "旁白插入位置建议"
context = mainWindow.projectContext
# 获取字幕在画面中的上下边界,方便在后续视频遍历过程中直接对字幕对应区域进行分析
global up_b, down_b
# print("get the bounding of the narratage at time: ", datetime.datetime.now())
# 此处start_time + 300是为了节省用户调整视频开始时间的功夫(强行跳过前5分钟)
up_b, down_b = get_position(video_path, start_time +300)
if context.detected:
up_b, down_b = context.caption_boundings[0], context.caption_boundings[1]
else:
# 此处start_time + 300是为了节省用户调整视频开始时间的功夫(强行跳过前5分钟)
up_b, down_b = get_position(video_path, start_time +300)
context.caption_boundings = [up_b, down_b]
context.detected = True
# 获取并构建输出信息
table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']]
# print("create sheet at time: ", datetime.datetime.now())
......
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="20"
height="20"
viewBox="0 0 5.2916664 5.2916664"
version="1.1"
id="svg8"
inkscape:version="0.92.4 5da689c313, 2019-01-14"
sodipodi:docname="slider.svg"
inkscape:export-filename="/home/yeison/Development/piton/art/icon_lite.png"
inkscape:export-xdpi="96"
inkscape:export-ydpi="96">
<defs
id="defs2" />
<sodipodi:namedview
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="28.704913"
inkscape:cx="8.5671075"
inkscape:cy="8.8021939"
inkscape:document-units="px"
inkscape:current-layer="layer1"
showgrid="true"
inkscape:window-width="1920"
inkscape:window-height="1015"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="1"
inkscape:showpageshadow="false"
units="px"
inkscape:pagecheckerboard="false"
showguides="true"
inkscape:snap-bbox="true"
inkscape:bbox-paths="true"
inkscape:bbox-nodes="true"
inkscape:snap-bbox-edge-midpoints="true"
inkscape:snap-bbox-midpoints="true"
inkscape:snap-nodes="true"
inkscape:object-paths="true"
inkscape:snap-intersection-paths="true"
inkscape:snap-smooth-nodes="true"
inkscape:snap-midpoints="true"
inkscape:snap-global="true"
fit-margin-top="0"
fit-margin-left="0"
fit-margin-right="0"
fit-margin-bottom="0"
inkscape:guide-bbox="true">
<inkscape:grid
type="xygrid"
id="grid974"
empspacing="8"
spacingx="0.26458332"
spacingy="0.26458332"
dotted="false"
visible="true"
enabled="true"
snapvisiblegridlinesonly="true"
originx="0"
originy="0" />
</sodipodi:namedview>
<metadata
id="metadata5">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title />
</cc:Work>
</rdf:RDF>
</metadata>
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1"
transform="translate(0,-291.70835)">
<g
id="g847"
transform="matrix(0.05207439,0,0,0.05207453,-0.90125164,282.41203)">
<g
id="g851">
<g
id="g1059"
transform="matrix(1.9986219,0,0,1.9986185,17.324484,-313.52314)">
<path
inkscape:transform-center-y="3.175"
style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.07000433;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers"
d="M 25.399999,271.60002 -8.0000008e-7,246.20002 H 50.799999 Z"
id="path883"
inkscape:connector-curvature="0"
sodipodi:nodetypes="cccc" />
<path
sodipodi:nodetypes="cccc"
inkscape:connector-curvature="0"
id="path880"
d="m 25.399999,271.60002 25.399999,25.4 H 0 Z"
inkscape:transform-center-y="-3.1749995"
style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.07000433;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
<rect
ry="5.0534658"
y="253.84885"
x="7.6487389"
height="35.528759"
width="35.528786"
id="rect870"
style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.06184419;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
<circle
r="25.396828"
cy="271.60001"
cx="25.4"
id="path872"
style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.07635882;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
<circle
transform="rotate(-45)"
cx="-174.08969"
cy="210.01071"
r="12.656071"
id="path876"
style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.07399406;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
<path
inkscape:transform-center-x="-3.1749999"
sodipodi:nodetypes="cccc"
inkscape:connector-curvature="0"
id="path904"
d="m 25.4,271.60002 -25.40000040000004,25.4 v -50.8 z"
style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.07000433;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
<path
inkscape:transform-center-x="3.175"
style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.07000433;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers"
d="m 25.399999,271.60002 25.4,-25.4 v 50.8 z"
id="path906"
inkscape:connector-curvature="0"
sodipodi:nodetypes="cccc" />
<rect
ry="5.0514922"
y="256.39301"
x="2.5663135"
height="30.440479"
width="45.693634"
id="rect837"
style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.0657438;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
<rect
style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.0657438;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers"
id="rect831"
width="45.693588"
height="30.44051"
x="248.76645"
y="-40.633385"
ry="5.051497"
transform="rotate(90)" />
</g>
</g>
</g>
<path
style="opacity:1;fill:#ffc107;fill-opacity:1;stroke:none;stroke-width:0.38596651;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 50.206421,401.67683 c 110.217209,0.71279 55.108609,0.3564 0,0 z"
id="rect997"
inkscape:connector-curvature="0" />
<path
style="opacity:1;fill:#0000ff;fill-opacity:1;stroke:none;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers"
d="M 10,0.01367188 C 4.4846749,0.01360343 0.01360343,4.4846749 0.01367188,10 0.0136035,15.515325 4.484675,19.986396 10,19.986328 15.515325,19.986396 19.986396,15.515325 19.986328,10 19.986396,4.484675 15.515325,0.0136035 10,0.01367188 Z"
transform="matrix(0.26458332,0,0,0.26458332,0,291.70835)"
id="path826"
inkscape:connector-curvature="0"
sodipodi:nodetypes="ccccc" />
</g>
</svg>
This diff is collapsed.
......@@ -112,6 +112,28 @@
<property name="autoFillBackground">
<bool>true</bool>
</property>
<widget class="QSlider" name="verticalSlider">
<property name="geometry">
<rect>
<x>560</x>
<y>310</y>
<width>16</width>
<height>161</height>
</rect>
</property>
<property name="maximum">
<number>100</number>
</property>
<property name="singleStep">
<number>5</number>
</property>
<property name="value">
<number>100</number>
</property>
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
</widget>
</widget>
</item>
<item>
......@@ -119,17 +141,6 @@
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<widget class="QPushButton" name="btn_open">
<property name="maximumSize">
<size>
<width>100</width>
<height>25</height>
</size>
</property>
<property name="text">
<string>打开表格文件</string>
</property>
</widget>
<widget class="QPushButton" name="btn_play">
<property name="minimumSize">
<size>
......@@ -175,85 +186,20 @@ QPushButton:pressed {
<string>播放</string>
</property>
</widget>
<widget class="QPushButton" name="btn_stop">
<property name="minimumSize">
<size>
<width>50</width>
<height>50</height>
</size>
</property>
<property name="maximumSize">
<size>
<width>50</width>
<height>50</height>
</size>
</property>
<property name="styleSheet">
<string notr="true">QPushButton {
color: #333;
border: 2px groove gray;
border-radius: 25px;
border-style: outset;
background: qradialgradient(
cx: 0.3, cy: -0.4, fx: 0.3, fy: -0.4,
radius: 1.35, stop: 0 #fff, stop: 1 #888
);
padding: 5px;
}
QPushButton:hover {
background: qradialgradient(
cx: 0.3, cy: -0.4, fx: 0.3, fy: -0.4,
radius: 1.35, stop: 0 #fff, stop: 1 #bbb
);
}
QPushButton:pressed {
border-style: inset;
background: qradialgradient(
cx: 0.4, cy: -0.1, fx: 0.4, fy: -0.1,
radius: 1.35, stop: 0 #fff, stop: 1 #ddd
);
}</string>
</property>
<widget class="QLabel" name="label_2">
<property name="text">
<string>暂停</string>
<string>00:00/00:00</string>
</property>
</widget>
<widget class="QSlider" name="sld_audio">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Fixed">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="minimumSize">
<size>
<width>50</width>
<height>25</height>
</size>
</property>
<widget class="QPushButton" name="pushButton">
<property name="maximumSize">
<size>
<width>150</width>
<height>25</height>
<width>50</width>
<height>16777215</height>
</size>
</property>
<property name="value">
<number>99</number>
</property>
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
</widget>
<widget class="QLabel" name="lab_audio">
<property name="text">
<string>volume:100%</string>
</property>
</widget>
<widget class="QLabel" name="label_2">
<property name="text">
<string>00:00/12:34</string>
<string>音量</string>
</property>
</widget>
</widget>
......@@ -279,7 +225,7 @@ QPushButton:pressed {
<enum>QTabWidget::Triangular</enum>
</property>
<property name="currentIndex">
<number>2</number>
<number>1</number>
</property>
<property name="iconSize">
<size>
......@@ -503,7 +449,7 @@ QPushButton:pressed {
<x>0</x>
<y>0</y>
<width>939</width>
<height>26</height>
<height>22</height>
</rect>
</property>
<widget class="QMenu" name="menu">
......
......@@ -7,6 +7,8 @@
# WARNING! All changes made in this file will be lost!
from PyQt5 import QtCore, QtGui, QtWidgets
from PaintQSlider import PaintQSlider
import qtawesome
class Ui_MainWindow(object):
......@@ -61,16 +63,19 @@ class Ui_MainWindow(object):
self.splitter = QtWidgets.QSplitter(self.verticalWidget_3)
self.splitter.setOrientation(QtCore.Qt.Horizontal)
self.splitter.setObjectName("splitter")
self.btn_open = QtWidgets.QPushButton(self.splitter)
self.btn_open.setMaximumSize(QtCore.QSize(100, 25))
self.btn_open.setObjectName("btn_open")
self.btn_play = QtWidgets.QPushButton(self.splitter)
self.btn_play.setMinimumSize(QtCore.QSize(50, 50))
self.btn_play.setMaximumSize(QtCore.QSize(50, 50))
self.label_2 = QtWidgets.QLabel(self.splitter)
self.label_2.setObjectName("label_2")
# self.btn_open = QtWidgets.QPushButton(self.splitter)
# self.btn_open.setMaximumSize(QtCore.QSize(100, 25))
# self.btn_open.setObjectName("btn_open")
self.btn_play = QtWidgets.QPushButton(qtawesome.icon('fa.play-circle', color='#FFFFFF', font=50), "", self.splitter)
self.btn_play.setIconSize(QtCore.QSize(30, 30))
self.btn_play.setMinimumSize(QtCore.QSize(30, 30))
self.btn_play.setMaximumSize(QtCore.QSize(30, 30))
self.btn_play.setStyleSheet("QPushButton {\n"
" color: #333;\n"
" border: 2px groove gray;\n"
" border-radius: 25px;\n"
" border-radius: 15px;\n"
" border-style: outset;\n"
" background: qradialgradient(\n"
" cx: 0.3, cy: -0.4, fx: 0.3, fy: -0.4,\n"
......@@ -94,36 +99,6 @@ class Ui_MainWindow(object):
" );\n"
" }")
self.btn_play.setObjectName("btn_play")
self.btn_stop = QtWidgets.QPushButton(self.splitter)
self.btn_stop.setMinimumSize(QtCore.QSize(50, 50))
self.btn_stop.setMaximumSize(QtCore.QSize(50, 50))
self.btn_stop.setStyleSheet("QPushButton {\n"
" color: #333;\n"
" border: 2px groove gray;\n"
" border-radius: 25px;\n"
" border-style: outset;\n"
" background: qradialgradient(\n"
" cx: 0.3, cy: -0.4, fx: 0.3, fy: -0.4,\n"
" radius: 1.35, stop: 0 #fff, stop: 1 #888\n"
" );\n"
" padding: 5px;\n"
" }\n"
"\n"
"QPushButton:hover {\n"
" background: qradialgradient(\n"
" cx: 0.3, cy: -0.4, fx: 0.3, fy: -0.4,\n"
" radius: 1.35, stop: 0 #fff, stop: 1 #bbb\n"
" );\n"
" }\n"
"\n"
"QPushButton:pressed {\n"
" border-style: inset;\n"
" background: qradialgradient(\n"
" cx: 0.4, cy: -0.1, fx: 0.4, fy: -0.1,\n"
" radius: 1.35, stop: 0 #fff, stop: 1 #ddd\n"
" );\n"
" }")
self.btn_stop.setObjectName("btn_stop")
self.sld_audio = QtWidgets.QSlider(self.splitter)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Fixed)
sizePolicy.setHorizontalStretch(0)
......@@ -132,13 +107,61 @@ class Ui_MainWindow(object):
self.sld_audio.setSizePolicy(sizePolicy)
self.sld_audio.setMinimumSize(QtCore.QSize(50, 25))
self.sld_audio.setMaximumSize(QtCore.QSize(150, 25))
self.sld_audio.setProperty("value", 99)
self.sld_audio.setSingleStep(5)
# self.sld_audio.setTickPosition(QtWidgets.QSlider.TicksBelow)
# self.sld_audio.setTickInterval(5)
self.sld_audio.setProperty("value", 100)
self.sld_audio.setOrientation(QtCore.Qt.Horizontal)
self.sld_audio.setObjectName("sld_audio")
self.sld_audio.setStyleSheet('''
QSlider:horizontal {
min-height: 24px;
max-height: 24px;
}
QSlider:vertical {
min-width: 24px;
max-width: 24px;
}
QSlider::groove:horizontal {
height: 4px;
background: #393939;
margin: 0 12px;
}
QSlider::groove:vertical {
width: 4px;
background: #393939;
margin: 12px 0;
border-radius: 24px;
}
QSlider::handle:horizontal {
image: url(images/slider.svg);
width: 12px;
height: 12px;
margin: -24px -12px;
}
QSlider::handle:vertical {
image: url(images/slider.svg);
border-radius: 24px;
width: 12px;
height: 12px;
margin: -12px -24px;
}
QSlider::add-page {
background: #232629;
}
QSlider::sub-page {
background: #ffd740;
}
''')
self.lab_audio = QtWidgets.QLabel(self.splitter)
self.lab_audio.setObjectName("lab_audio")
self.label_2 = QtWidgets.QLabel(self.splitter)
self.label_2.setObjectName("label_2")
self.verticalLayout_3.addWidget(self.splitter)
self.verticalLayout_3.setStretch(0, 8)
self.shuiping.addWidget(self.verticalWidget_3)
......@@ -208,10 +231,10 @@ class Ui_MainWindow(object):
self.scrollArea.setWidgetResizable(False)
self.scrollArea.setObjectName("scrollArea")
self.scrollAreaWidgetContents = myWidgetContents()
self.scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, 0, 827, 64))
self.scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, 0, 800, 40))
self.scrollAreaWidgetContents.setObjectName("scrollAreaWidgetContents")
self.sld_video = myVideoSlider(self.scrollAreaWidgetContents)
self.sld_video.setGeometry(QtCore.QRect(10, 30, 811, 20))
self.sld_video.setGeometry(QtCore.QRect(10, 20, 790, 30))
self.sld_video.setMinimumSize(QtCore.QSize(410, 0))
self.sld_video.setMaximumSize(QtCore.QSize(16777215, 20))
self.sld_video.setMaximum(100)
......@@ -313,7 +336,7 @@ class Ui_MainWindow(object):
self.menu.addAction(self.action_save)
self.menu_2.addAction(self.action_undo)
self.menu_2.addAction(self.action_redo)
self.menu_2.addAction(self.action_view_history)
# self.menu_2.addAction(self.action_view_history)
self.menu_2.addSeparator()
self.menu_2.addAction(self.action_insert_aside_from_now)
self.menu_2.addAction(self.action_operate)
......@@ -326,17 +349,14 @@ class Ui_MainWindow(object):
self.menubar.addAction(self.menu_3.menuAction())
self.retranslateUi(MainWindow)
self.tabWidget.setCurrentIndex(2)
self.tabWidget.setCurrentIndex(0)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
self.btn_open.setText(_translate("MainWindow", "打开表格文件"))
self.btn_play.setText(_translate("MainWindow", "播放"))
self.btn_stop.setText(_translate("MainWindow", "暂停"))
self.lab_audio.setText(_translate("MainWindow", "volume:100%"))
self.label_2.setText(_translate("MainWindow", "00:00/12:34"))
self.label_2.setText(_translate("MainWindow", "00:00/00:00"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.all_tab), _translate("MainWindow", "字幕旁白"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.zm_tab), _translate("MainWindow", "字幕"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.pb_tab), _translate("MainWindow", "旁白"))
......
......@@ -118,14 +118,12 @@ class ProjectContext:
self.project_base_dir = None
self.video_path = None
self.excel_path = None
self.conf_path = 'conf.ini'
self.subtitle_list = []
self.aside_list = []
self.all_elements = []
f = open("./conf.ini", "r", encoding='utf-8')
rl = f.readlines()
f.close()
self.speaker_info = rl[0].strip()
self.speaker_speed = rl[1].strip()
self.speaker_info = None
self.speaker_speed = None
# 一些常量
self.header = ["起始时间", "终止时间", "字幕", '建议', '解说脚本', "语速"]
self.aside_header = ["起始时间", "终止时间", '建议', '解说脚本',"语速"]
......@@ -140,7 +138,16 @@ class ProjectContext:
self.speakers = []
self.init_speakers()
# 字幕检测进度,主要是待检测视频的初始时间
self.detected = False
self.nd_process = 0.00
self.last_time = 0.00
self.caption_boundings = []
self.has_subtitle = True
# 第一时间加载配置(这里主要是说话人的相关配置)
self.load_conf()
def clear(self):
self.subtitle_list = []
......@@ -148,33 +155,70 @@ class ProjectContext:
self.all_elements = []
self.history_records = []
self.records_pos = 0
def Init(self, project_dir, video_name):
if len(project_dir) == 0 or project_dir is None:
return
# 有的时候路径是 '/F:/out1/test.xlsx',有的时候是'F:/out1/test.xlsx'
if project_dir[0] == '/':
project_dir = project_dir[1:]
self.project_base_dir = project_dir
# self.video_path = os.path.join(project_dir, video_name)
self.video_path = project_dir + "/" + video_name
print("video_pathvideo_path: ", self.video_path)
print("video_path", self.video_path)
self.excel_path = replace_path_suffix(self.video_path, ".xlsx")
self.load_conf()
def load_conf(self):
this_conf_path = os.path.join(self.project_base_dir, 'conf.ini') if self.project_base_dir is not None else self.conf_path
# 如果当前工程里还没有对应的配置文件,那么选择使用全局的配置文件进行初始化,否则就使用当前工程的配置文件
if os.path.exists(this_conf_path):
self.conf_path = this_conf_path
with open(self.conf_path, 'r', encoding='utf8') as f:
info = json.load(f)
video_path = info["video_path"]
excel_path = info["excel_path"]
self.speaker_info = info["speaker_info"]["speaker_id"]
self.speaker_speed = info["speaker_info"]["speaker_speed"]
if video_path == self.video_path and excel_path == self.excel_path:
self.detected = info["detection_info"]["detected"]
self.nd_process = info["detection_info"]["nd_process"]
self.last_time = info["detection_info"]["last_time"]
self.caption_boundings = info["detection_info"]["caption_boundings"]
self.has_subtitle = info["detection_info"]["has_subtitle"]
# def Init(self, project_dir, video_path, excel_path):
# self.project_base_dir = project_dir
# self.video_path = video_path
# self.excel_path = excel_path
def save_conf(self):
with open('./conf.ini', 'w', encoding='utf-8') as f:
f.writelines([self.speaker_info + '\n', self.speaker_speed])
with open(self.conf_path, 'w', encoding='utf-8') as f:
# 将context里包含的一些信息保留下来,包括工程的检测进度、检测中间产物(excel)、视频路径、说话人信息
info = {
"video_path": self.video_path,
"excel_path": self.excel_path,
"detection_info": {
"detected": self.detected,
"nd_process": self.nd_process,
"last_time": self.last_time,
"caption_boundings": self.caption_boundings,
"has_subtitle": self.has_subtitle
},
"speaker_info": {
"speaker_id": self.speaker_info,
"speaker_speed": self.speaker_speed
}
}
f.write(json.dumps(info))
def setVideoPath(self, video_path):
self.video_path = video_path
def setExcelPath(self, excel_path):
self.excel_path = excel_path
# 目前只是把excel保存到文件中
# 先备份文件,再覆盖主文件,可选是否需要备份,默认需要备份
# 20221030:添加旁白检测的进度
def save_project(self, need_save_new: bool=False) -> str:
self.save_conf()
# all_element = sorted(all_element, key=lambda x: float(x.st_time_sec))
print("current excel_path:", self.excel_path)
if self.excel_path == None:
......@@ -239,13 +283,17 @@ class ProjectContext:
self.all_elements.append(self.aside_list[-1])
# print("[load_excel_from_path] ", end='')
# self.all_elements[-1].print_self()
# 现在仅支持对修改操作的记录
def history_push(self, row, old, new):
print(old, new)
if self.records_pos == len(self.history_records):
self.history_records.append(OperateRecord(row, Operation.Modify, old, new))
else:
self.history_records[self.records_pos] = OperateRecord(row, Operation.Modify, old, new)
self.records_pos += 1
def history_pop(self)-> OperateRecord:
if len(self.history_records) == 0:
return None
......@@ -279,6 +327,8 @@ class ProjectContext:
for speaker in content["speaker_details"]:
speaker_name.append(
",".join([speaker["name"], speaker["gender"], speaker["age_group"]]))
if self.speaker_info is None:
self.speaker_info = speaker_name[0]
return tuple(speaker_name)
def init_speakers(self):
......
......@@ -24,30 +24,44 @@ def detect(video_path: str, start_time: float, end_time: float, book_path: str,
state (optional): 任务进行状态. Defaults to None.
subtitle (int, optional): 视频是否有字幕,共三种情况(0:未知,1:有字幕,2:无字幕). Defaults to 0.
"""
print("开始检测")
print("start_time", start_time)
print("end_time", end_time)
if book_path is None:
book_path = os.path.basename(video_path).split('.')[0] + ".xlsx"
else:
book_path = book_path
context = mainWindow.projectContext
# 未检测过
if not context.detected:
print("开始检测")
print("start_time", start_time)
print("end_time", end_time)
if book_path is None:
book_path = os.path.basename(video_path).split('.')[0] + ".xlsx"
else:
book_path = book_path
# 根据用户的选择来确定电影是否有字幕,如果“未知”,则自动检测
if subtitle == 0:
from judge_subtitle import detect_movie
# print("detect if there is narratage at time: ", datetime.datetime.now())
has_subtitle = detect_movie(video_path, start_time, end_time, 180)
elif subtitle == 1:
has_subtitle = True
else:
has_subtitle = False
# 根据用户的选择来确定电影是否有字幕,如果“未知”,则自动检测
if subtitle == 0:
from judge_subtitle import detect_movie
# print("detect if there is narratage at time: ", datetime.datetime.now())
has_subtitle = detect_movie(video_path, start_time, end_time, 180)
elif subtitle == 1:
has_subtitle = True
else:
has_subtitle = False
context.has_subtitle = has_subtitle
if has_subtitle:
from detect_with_ocr import detect_with_ocr
detect_with_ocr(video_path, book_path, start_time, end_time, state, mainWindow)
# else:
# from detect_with_asr import detect_with_asr
# detect_with_asr(video_path, book_path, start_time, end_time, state)
if has_subtitle:
from detect_with_ocr import detect_with_ocr
detect_with_ocr(video_path, book_path, start_time, end_time, state, mainWindow)
# else:
# from detect_with_asr import detect_with_asr
# detect_with_asr(video_path, book_path, start_time, end_time, state, mainWindow)
else:
# 之前检测过
has_subtitle = context.has_subtitle
start_time = context.last_time
if has_subtitle:
# 更新当前进度
state[0] = context.nd_process
from detect_with_ocr import detect_with_ocr
detect_with_ocr(video_path, book_path, start_time, end_time, state, mainWindow)
if __name__ == '__main__':
......
import sys
import os
from PyQt5.QtCore import *;
from PyQt5.QtGui import *;
from PyQt5.QtWidgets import *;
from PyQt5.QtCore import *
from PyQt5.QtGui import *
from PyQt5.QtWidgets import *
import utils
from operation_dialog_ui import Ui_Dialog
#todo 注意,删除行,添加行,暂不支持【撤销与重做】功能!!!
# todo 注意,删除行,添加行,暂不支持【撤销与重做】功能!!!
class Operation_Dialog(QDialog, Ui_Dialog):
#开始检测信号,传参分别是movie路径和输出表格路径
......@@ -26,7 +28,8 @@ class Operation_Dialog(QDialog, Ui_Dialog):
self.pushButton_3.clicked.connect(self.fill_row_info_slot)
self.buttonBox.setEnabled(False)
self.buttonBox.button(QDialogButtonBox.StandardButton.Ok).clicked.connect(self.start_operation_slot)
self.buttonBox.button(QDialogButtonBox.StandardButton.Ok).clicked.connect(
self.start_operation_slot)
# 字幕/旁白 选择框
self.comboBox.currentIndexChanged.connect(self.zmpb_change_slot)
# 增加一行/删除一行 选择框
......@@ -50,13 +53,14 @@ class Operation_Dialog(QDialog, Ui_Dialog):
else:
self.lineEdit_3.setEnabled(False)
self.lineEdit_4.setEnabled(False)
self.lineEdit_5.setEnabled(False)
# 如果是删除,则只需要【行数】即可
def adddel_change_slot(self):
if self.comboBox_2.currentText() in ["增加一行", "修改一行"]:
self.zmpb_change_slot()
else:
for i in range(1,len(self.lineEdits)):
for i in range(1, len(self.lineEdits)):
self.lineEdits[i].setEnabled(False)
# 修改完后需要重新检测
......@@ -71,7 +75,8 @@ class Operation_Dialog(QDialog, Ui_Dialog):
row_number = int(self.lineEdit.text())
assert 1 <= row_number <= rowCount
except Exception as e:
self.mainWindow.prompt_dialog.show_with_msg("校验失败!总行数为[%d],你的输入为[%s]!!"%(rowCount, self.lineEdit.text()))
self.mainWindow.prompt_dialog.show_with_msg(
"校验失败!总行数为[%d],你的输入为[%s]!!" % (rowCount, self.lineEdit.text()))
return False
# 校验时间填写是否是hh:mm:ss格式的
try:
......@@ -91,7 +96,7 @@ class Operation_Dialog(QDialog, Ui_Dialog):
# 这些是只有【add】才需要检测的
if self.comboBox_2.currentText() == "增加一行":
#校验起始时间、结束时间
# 校验起始时间、结束时间
start_time_f, end_time_f = 0.0, 0.0
try:
start_time_f = float(utils.trans_to_seconds(self.lineEdit_2.text()))
......@@ -99,17 +104,19 @@ class Operation_Dialog(QDialog, Ui_Dialog):
end_time_f = float(utils.trans_to_seconds(self.lineEdit_3.text()))
assert start_time_f < end_time_f
except Exception as e:
self.mainWindow.prompt_dialog.show_with_msg("校验失败!起始时间或结束时间输入有误!!" )
self.mainWindow.prompt_dialog.show_with_msg(
"校验失败!起始时间或结束时间输入有误!!")
return False
# 校验推荐字数
if self.comboBox.currentText() == "旁白":
try:
suggest_words_count = int(self.lineEdit_5.text())
assert suggest_words_count <= 100
except Exception as e:
self.mainWindow.prompt_dialog.show_with_msg("校验失败!推荐字数填入有误!!")
return False
# if self.comboBox.currentText() == "旁白":
# try:
# suggest_words_count = int(self.lineEdit_5.text())
# assert suggest_words_count <= 100
# except Exception as e:
# self.mainWindow.prompt_dialog.show_with_msg(
# "校验失败!推荐字数填入有误!!")
# return False
# 这些是只有【modify】才需要检测的
if self.comboBox_2.currentText() == "修改一行":
try:
......@@ -121,15 +128,15 @@ class Operation_Dialog(QDialog, Ui_Dialog):
else:
assert self.comboBox.currentText() == "字幕"
except Exception as e:
self.mainWindow.prompt_dialog.show_with_msg("校验失败!待修改的行不是[%s]"%(self.comboBox.currentText()))
return False
self.mainWindow.prompt_dialog.show_with_msg(
"校验失败!待修改的行不是[%s]" % (self.comboBox.currentText()))
return False
# 检测通过
self.mainWindow.prompt_dialog.show_with_msg("校验成功!!")
self.buttonBox.setEnabled(True)
self.set_all_user_component_status(False)
def set_all_user_component_status(self, status: bool):
for lineEdit in self.lineEdits:
lineEdit.setEnabled(status)
......@@ -149,7 +156,7 @@ class Operation_Dialog(QDialog, Ui_Dialog):
if self.comboBox.currentText() == "字幕":
suggest = ""
aside = ""
else: # 如果是旁白
else: # 如果是旁白
end_time = ""
subtitle = ""
suggest = "插入旁白,推荐字数为" + suggest
......@@ -179,7 +186,8 @@ class Operation_Dialog(QDialog, Ui_Dialog):
self.lineEdit_2.setText(str(utils.transfer_second_to_time(elem.st_time_sec)))
self.lineEdit_3.setText(str(utils.transfer_second_to_time(elem.ed_time_sec)))
self.lineEdit_4.setText(elem.subtitle)
self.lineEdit_5.setText(elem.suggest[elem.suggest.index("推荐字数为") + 5:])
self.lineEdit_5.setText(
elem.suggest[elem.suggest.index("推荐字数为") + 5:])
self.lineEdit_6.setText(elem.aside)
# 如果是旁白的话
......@@ -190,9 +198,10 @@ class Operation_Dialog(QDialog, Ui_Dialog):
print("exception:", e)
pass
if __name__ == '__main__':
app = QApplication(sys.argv)
app.setWindowIcon(QIcon("./images/eagle_2.ico"))
dialog = Operation_Dialog()
dialog.show()
sys.exit(app.exec_())
\ No newline at end of file
sys.exit(app.exec_())
......@@ -90,9 +90,6 @@ class Ui_Dialog(object):
self.label_11 = QtWidgets.QLabel(Dialog)
self.label_11.setGeometry(QtCore.QRect(380, 310, 81, 20))
self.label_11.setObjectName("label_11")
self.label_12 = QtWidgets.QLabel(Dialog)
self.label_12.setGeometry(QtCore.QRect(250, 270, 251, 20))
self.label_12.setObjectName("label_12")
self.pushButton = QtWidgets.QPushButton(Dialog)
self.pushButton.setGeometry(QtCore.QRect(350, 380, 93, 28))
self.pushButton.setObjectName("pushButton")
......@@ -141,7 +138,6 @@ class Ui_Dialog(object):
self.label_9.setText(_translate("Dialog", "*请填数字,最多保留两位小数"))
self.label_10.setText(_translate("Dialog", "*请填文字"))
self.label_11.setText(_translate("Dialog", "*请填文字"))
self.label_12.setText(_translate("Dialog", "*请填数字,必须是不超过100的正整数"))
self.pushButton.setText(_translate("Dialog", "检测"))
self.pushButton_2.setText(_translate("Dialog", "修改"))
self.label_13.setText(_translate("Dialog", "*需要填在【字幕旁白】页面中的行数"))
......
'''
用于渲染最终成果,将之前临时生成的音频插入到原音频中|生成一条纯旁白的音频|插入到原视频中
'''
import librosa
import numpy as np
import os
import soundfile
import subprocess
import time
from PyQt5.QtCore import *;
from PyQt5.QtGui import *;
from PyQt5.QtWidgets import *;
from management import RunThread
from speech_synthesis import ffmpeg_path
class ExportProcessor(QWidget):
show_warning_signal = pyqtSignal(str)
export_callback_signal = pyqtSignal(list, list)
def __init__(self):
super(ExportProcessor, self).__init__()
self.state = [None]
self.threads = []
def export_slot(self, video_path, output_dir):
t = RunThread(funcName=self.start_export,
args=(video_path, output_dir),
name="export")
t.setDaemon(True)
self.threads.append(t)
for t in self.threads:
t.start()
print("===子线程已经开启 in export===")
self.export_callback_signal.emit(self.threads, self.state)
def start_export(self, video_path, output_dir):
mixed_audio_path = aggrevate_audios(video_path, output_dir, self.state)
export_video(video_path, mixed_audio_path, output_dir, self.state)
# 生成一条无声的音频,然后把旁白音频逐个按照时间位置放进去,得到仅含旁白的音频和旁白+原声的音频
def aggrevate_audios(video_path: str, output_dir: str, state=None):
# 这个模块最多只有80%的进度
if state is None:
state = [None]
# 生成等长的空白音频
from split_wav import extract_audio
origin_wav_path = extract_audio(video_path, output_dir, 0, -1)
origin_wav, freq = librosa.load(origin_wav_path)
blank_audio = np.zeros_like(origin_wav)
# 将生成的旁白音频放入空白音频中,并将原音频的对应位置音量降低为原来的30%
files = os.listdir(output_dir)
for i, f in enumerate(files):
fname = '.'.join(f.split('.')[:-1])
try:
st_time = float(fname)
cur_audio, _ = librosa.load(os.path.join(output_dir, f))
# print(len(cur_audio))
st_index = int(st_time * freq)
audio_len = len(cur_audio)
blank_audio[st_index: st_index + audio_len] = cur_audio
origin_wav[st_index: st_index + audio_len] *= 0.3
state[0] = float((i + 1) / len(files)) * 0.7
except:
continue
narratage_only_path = os.path.join(output_dir, "narratage.wav")
soundfile.write(narratage_only_path, blank_audio, freq)
state[0] = 0.75
# 得到合成后的音频
mixed_audio = origin_wav + blank_audio
mixed_audio_path = os.path.join(output_dir, "mixed.wav")
soundfile.write(mixed_audio_path, mixed_audio, freq)
state[0] = 0.8
return mixed_audio_path
def export_video(video_path: str, mixed_audio_path: str, output_dir: str, state=None):
if state is None:
state = [None]
# 生成合成音频+原视频的新视频
if os.path.basename(video_path).split('.')[-1] == 'rmvb':
video_name = os.path.basename(video_path).split('.')[0]
mixed_movie_path = os.path.join(output_dir, "new_" + video_name + ".mp4")
command_line = f'{ffmpeg_path} -i {video_path} -i {mixed_audio_path} -map 0:v:0 -map 1:a:0 -vcodec h264 {mixed_movie_path} -y'
else:
mixed_movie_path = os.path.join(output_dir, "new_" + os.path.basename(video_path))
command_line = f'{ffmpeg_path} -i {video_path} -i {mixed_audio_path} -map 0:v:0 -map 1:a:0 -vcodec copy {mixed_movie_path} -y'
subprocess.call(command_line)
state[0] = 1.00
if __name__ == '__main__':
pass
# start_time = time.time()
# video_path = r'D:/Downloads/zhanlang/zhanlang.rmvb'
# output_dir = r'D:/AddCaption/last_version/accessibility_movie/zhanlang'
# mixed_audio_path = aggrevate_audios(video_path, output_dir)
# export_video(video_path, mixed_audio_path, output_dir)
# print(time.time() - start_time)
\ No newline at end of file
......@@ -20,25 +20,41 @@ class Setting_Dialog(QDialog, Ui_Dialog):
self.setWindowTitle("设置")
self.projectContext = projectContext
# todo 把所有说话人都加上来
li = self.projectContext.get_all_speaker_info()
for i in li:
self.speaker_li = self.projectContext.get_all_speaker_info()
for i in self.speaker_li:
self.comboBox.addItem(i)
li_2 = ["1.00(4字/秒)", "1.10(4.5字/秒)", "1.25(5字/秒)", "1.50(6字/秒)", "1.75(7字/秒)", "2.00(8字/秒)", "2.50(10字/秒)"]
self.comboBox_2.addItems(li_2)
self.speed_li_2 = ["1.00(4字/秒)", "1.10(4.5字/秒)", "1.25(5字/秒)", "1.50(6字/秒)", "1.75(7字/秒)", "2.00(8字/秒)", "2.50(10字/秒)"]
self.comboBox_2.addItems(self.speed_li_2)
self.comboBox.setCurrentIndex(li.index(self.projectContext.speaker_info))
self.comboBox_2.setCurrentIndex(li_2.index(self.projectContext.speaker_speed))
if self.projectContext.speaker_info is None:
self.comboBox.setCurrentIndex(0)
else:
self.comboBox.setCurrentIndex(self.speaker_li.index(self.projectContext.speaker_info))
if self.projectContext.speaker_speed is None:
self.comboBox_2.setCurrentIndex(0)
else:
self.comboBox_2.setCurrentIndex(self.speed_li_2.index(self.projectContext.speaker_speed))
self.comboBox.currentIndexChanged.connect(self.speaker_change_slot)
self.comboBox_2.currentIndexChanged.connect(self.speed_change_slot)
self.pushButton.clicked.connect(self.play_audio_slot)
def content_fresh(self):
if self.projectContext.speaker_info is None:
self.comboBox.setCurrentIndex(0)
else:
self.comboBox.setCurrentIndex(self.speaker_li.index(self.projectContext.speaker_info))
if self.projectContext.speaker_speed is None:
self.comboBox_2.setCurrentIndex(0)
else:
self.comboBox_2.setCurrentIndex(self.speed_li_2.index(self.projectContext.speaker_speed))
def speaker_change_slot(self):
self.projectContext.speaker_info = self.comboBox.currentText()
self.projectContext.save_conf()
# print("self.projectContext.speaker_info:", self.projectContext.speaker_info)
def speed_change_slot(self):
self.projectContext.speaker_speed = self.comboBox_2.currentText()
self.projectContext.save_conf()
......@@ -64,6 +80,12 @@ class Setting_Dialog(QDialog, Ui_Dialog):
global audioPlayed
winsound.PlaySound(audioPlayed, winsound.SND_PURGE)
event.accept()
def showDialog(self):
self.content_fresh()
self.show()
def thread_it(func, *args, name):
"""创建守护线程
......@@ -79,6 +101,7 @@ def thread_it(func, *args, name):
t.setDaemon(True)
# 启动
t.start()
if __name__ == '__main__':
app = QApplication(sys.argv)
app.setWindowIcon(QIcon("./images/eagle_2.ico"))
......
......@@ -22,7 +22,7 @@ from typing import Tuple
import datetime
import numpy as np
from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer, ResultReason
from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer, ResultReason, AudioDataStream
from azure.cognitiveservices.speech.audio import AudioOutputConfig
import openpyxl
......@@ -91,7 +91,9 @@ def speech_synthesis(text: str, output_file: str, speaker: Speaker, speed: float
"""
audio_path = tmp_file
speech_config = SpeechConfig(
subscription="db34d38d2d3447d482e0f977c66bd624", region="eastus")
subscription="db34d38d2d3447d482e0f977c66bd624",
region="eastus"
)
speech_config.speech_synthesis_language = "zh-CN"
speech_config.speech_synthesis_voice_name = speaker.speaker_code
......@@ -101,25 +103,33 @@ def speech_synthesis(text: str, output_file: str, speaker: Speaker, speed: float
print("output_file路径不存在,创建:", os.path.dirname(output_file))
os.makedirs(os.path.dirname(output_file))
audio_config = AudioOutputConfig(filename=audio_path)
synthesizer = SpeechSynthesizer(
speech_config=speech_config, audio_config=audio_config)
result = synthesizer.speak_text(text)
synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None)
ssml_string = f"""
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{speech_config.speech_synthesis_language}">
<voice name="{speaker.speaker_code}">
<prosody rate="{round((speed - 1.0) * 100, 2)}%">
{text}
</prosody>
</voice>
</speak>"""
result = synthesizer.speak_ssml_async(ssml_string).get()
stream = AudioDataStream(result)
stream.save_to_wav_file(output_file)
print(result.reason)
while result.reason == ResultReason.Canceled:
cancellation_details = result.cancellation_details
print("取消的原因", cancellation_details.reason)
print("取消的原因", cancellation_details.reason, cancellation_details.error_details)
time.sleep(1)
synthesizer.stop_speaking()
del synthesizer
synthesizer = SpeechSynthesizer(
speech_config=speech_config, audio_config=audio_config)
result = synthesizer.speak_text(text)
synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None)
result = synthesizer.speak_ssml_async(ssml_string).get()
stream = AudioDataStream(result)
stream.save_to_wav_file(output_file)
print(result.reason)
change_speed_and_volume(output_file, speed)
# detached
def change_speed_and_volume(wav_path: str, speed: float = 1.0):
"""调整语速,顺便把音量调大,语音合成的声音太小了
......@@ -248,40 +258,6 @@ def export_caption(sheet_content: dict, caption_file: str):
f.write(x + "\n\n")
def adjust_volume(origin: str, start_timestamp: list, end_timestamp: list):
"""调整原音频中待插入旁白位置的音量
Args:
origin (str): 原音频存储位置
start_timestamp (list): 旁白开始时间
end_timestamp (list): 旁白结束时间
"""
global adjusted_wav_path
adjusted_wav_path = os.path.join(os.path.dirname(origin), adjusted_wav_path)
n = len(start_timestamp)
groups = int(np.ceil(n / part_len))
start = 0
middle_wav, res_wav = origin, os.path.join(os.path.dirname(origin), "adjust0.wav")
for x in range(groups):
if x == groups - 1:
res_wav = adjusted_wav_path
st = start_timestamp[start: start + part_len] if start + part_len < n else start_timestamp[start: n]
et = end_timestamp[start: start + part_len] if start + part_len < n else start_timestamp[start: n]
command_line = "{} -i {} -af \"".format(ffmpeg_path, middle_wav)
for i in range(len(st)):
command_line += "volume=enable='between(t,{},{})':volume=0.3".format(st[i], et[i])
if i != len(st) - 1:
command_line += ","
command_line += "\" -y {}".format(res_wav)
print(command_line)
os.system(command_line)
if x != 0:
os.remove(middle_wav)
middle_wav = res_wav
res_wav = os.path.join(os.path.dirname(origin), "adjust{}.wav".format(x + 1))
start += part_len
def mix_speech(origin: str, narratage_paths: list, start_timestamps: list):
"""将合成音频与原音频混合
......@@ -372,26 +348,6 @@ def ss_and_export(video_path: str, sheet_path: str, output_dir: str, speed: floa
if state is not None:
state[0] = float((i + 1) / len(narratages)) * 0.97
# 合成总音频,并入原视频音频中
# 提取原音频
print("mix the final wav at time: ", datetime.datetime.now())
from split_wav import extract_audio
origin_wav_path = extract_audio(video_path, output_dir, 0, -1)
start = 0
n = len(start_timestamp)
part_len = 50
x = int(np.ceil(n / part_len))
start_timestamp = list(reversed(start_timestamp))
end_timestamp = list(reversed(end_timestamp))
narratage_paths = list(reversed(narratage_paths))
# 调整原音频中旁白对应位置的音量
adjust_volume(origin_wav_path, start_timestamp, end_timestamp)
print("--------------------------ok-----------------------------------")
# 将旁白混入原音频
mix_speech(adjusted_wav_path, narratage_paths, start_timestamp)
# print(middle_wav)
start += part_len
# 删除临时语音文件、提取出来的原视频音频以及调整后的视频音频
if os.path.exists(tmp_file):
time.sleep(1)
......@@ -403,13 +359,17 @@ def ss_and_export(video_path: str, sheet_path: str, output_dir: str, speed: floa
if __name__ == '__main__':
# video_path = r'D:/Downloads/zhanlang.rmvb'
# sheet_path = r'D:/Downloads/战狼.xlsx'
# output_dir = r'D:/AddCaption/last_version/accessibility_movie/zhanlang'
# speed = 1.25
# caption_file = './zhanlang/zhanlang.srt'
# speaker_name = '晓秋'
# ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, speaker_name)
import pprint
d = read_sheet("./test37second.xlsx")
pprint.pprint(d)
\ No newline at end of file
video_path = r'D:/Downloads/zhanlang.rmvb'
sheet_path = r'D:/Downloads/zhanlang/战狼.xlsx'
output_dir = r'D:/AddCaption/last_version/accessibility_movie/zhanlang'
speed = 1.25
caption_file = os.path.join(output_dir, os.path.basename(video_path) + ".srt")
speaker_name = '晓秋'
ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, speaker_name)
# import pprint
# d = read_sheet("./test37second.xlsx")
# pprint.pprint(d)
# init_speakers()
# speaker_name = "晓秋"
# speaker = choose_speaker(speaker_name)
# speech_synthesis("今天我们讲解的电影是何以笙箫默,它讲述了", r"D:\AddCaption\cur_version\accessibility_movie_2\test.wav", speaker, 0.5)
\ No newline at end of file
......@@ -6,7 +6,10 @@ from PyQt5.QtGui import *;
from PyQt5.QtWidgets import *;
from main_window import MainWindow
from qt_material import apply_stylesheet
import qdarkstyle
import os
os.environ['PYQTGRAPH_QT_LIB'] = 'PyQt5'
if __name__ == '__main__':
try:
......@@ -14,6 +17,8 @@ if __name__ == '__main__':
app = QApplication(sys.argv)
app.setWindowIcon(QIcon("./images/eagle_2.ico"))
mainWindow = MainWindow()
# apply_stylesheet(app, theme='dark_amber.xml')
# app.setStyleSheet(qdarkstyle.load_stylesheet(qt_api=os.environ['PYQTGRAPH_QT_LIB']))
mainWindow.show()
sys.exit(app.exec_())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment