Merge branch 'feat_1' of http://gitlab.uiiai.com/xuanweiace/accessibility_movie_2 into feat_1

Conflicts: main_window.py

Merge branch 'feat_1' of http://gitlab.uiiai.com/xuanweiace/accessibility_movie_2 into feat_1
b3f15065 · xuanweiace · d2fb7787 · a5f2f081 · b3f15065 · b3f15065
Commit b3f15065 authored Nov 05, 2022 by xuanweiace
14 changed files
--- a/detect_dialog.py
+++ b/detect_dialog.py
@@ -17,11 +17,12 @@ class Detect_Dialog(QDialog, Ui_Dialog):
        self.setupUi(self)
        self.setWindowTitle("检测")
        self.buttonBox.button(QDialogButtonBox.StandardButton.Ok).setText("开始检测")
+        self.buttonBox.button(QDialogButtonBox.StandardButton.Cancel).setText("取消")
        self.pushButton.clicked.connect(self.openFile)
        self.pushButton_2.clicked.connect(self.openTableFile)
        self.buttonBox.button(QDialogButtonBox.StandardButton.Ok).clicked.connect(self.start_detect)
    def openFile(self):
-        file_info = QFileDialog.getOpenFileNames(self, '选择视频', os.getcwd(), "MP4(*.mp4);;Text Files(*.txt)")
+        file_info = QFileDialog.getOpenFileNames(self, '选择视频', os.getcwd(), "Video Files(*.mp4 *.rmvb *mkv *avi)")
        file_name, ok = validate_and_get_filepath(file_info)
        if ok:
            self.lineEdit.setText(file_name)

--- a/detect_with_ocr.py
+++ b/detect_with_ocr.py
@@ -67,7 +67,7 @@ def get_position(video_path: str, start_time: float) -> Tuple[float, float]:
        # cv2.imshow('img', gray)
        # cv2.imshow(img)
        cnt += 1
-        if img is None or cnt > 1000:
+        if img is None or cnt > 10000:
            break
        if cnt % int(fps / 3) != 0:
            continue
@@ -238,16 +238,18 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
    start_time = 0
    end_time = 0
    video.set(cv2.CAP_PROP_POS_MSEC, begin * 1000)
+    pre_state = state[0]
    while True:
        _, frame = video.read()
        if frame is None:
            break
        cnt += 1
+        cur_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
        # 判断当前帧是否已超限制
-        if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 > end:
+        if cur_time > end:
-            if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time > 1:
+            if cur_time - end_time > 1:
                print('--------------------------------------------------')
-                recommend_lens = int((video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time) * normal_speed)
+                recommend_lens = int((cur_time - end_time) * normal_speed)
                # write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
                add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
@@ -258,17 +260,23 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
            break
        # 每秒取4帧画面左右
        if cnt % int(fps / 4) == 0:
-            state[0] = float((video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - begin) / (end - begin)) \
+            # 更新当前工程的检测进度
-                if state[0] is None or state[0] < 0.99 else 0.99
+            if pre_state is None:
+                state[0] = float((cur_time - begin) / (end - begin))
+            else:
+                state[0] = min(0.9999, pre_state + float((cur_time - begin) / (end - begin)))
+            mainWindow.projectContext.nd_process = state[0]
+            mainWindow.projectContext.last_time = cur_time
            subTitle = detect_subtitle(frame)
            if subTitle is not None:
                subTitle = normalize(subTitle)
            # 第一次找到字幕
            if lastSubTitle is None and subTitle is not None:
-                start_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
+                start_time = cur_time
            # 字幕消失
            elif lastSubTitle is not None and subTitle is None:
-                end_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
+                end_time = cur_time
                res.append([start_time, end_time, lastSubTitle])
                if len(res) == 1 or res[-1][0] - res[-2][1] >= 1:
                    print('--------------------------------------------------')
@@ -279,11 +287,11 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
                print(start_time, end_time, lastSubTitle)
                # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
-                add_to_list(mainWindow,"字幕", [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
+                add_to_list(mainWindow, "字幕", [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
-                # 两句话连在一起，但是两句话不一样
            elif lastSubTitle is not None and subTitle is not None:
+                # 两句话连在一起，但是两句话不一样
                if string_similar(lastSubTitle, subTitle) < 0.7:
-                    end_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
+                    end_time = cur_time
                    res.append([start_time, end_time, lastSubTitle])
                    if len(res) == 1 or res[-1][0] - res[-2][1] >= 1:
                        print('--------------------------------------------------')
@@ -343,12 +351,17 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
    book_name_xlsx = book_path
    sheet_name_xlsx = "旁白插入位置建议"
+    context = mainWindow.projectContext
    # 获取字幕在画面中的上下边界，方便在后续视频遍历过程中直接对字幕对应区域进行分析
    global up_b, down_b
-    # print("get the bounding of the narratage at time: ", datetime.datetime.now())
+    if context.detected:
+        up_b, down_b = context.caption_boundings[0], context.caption_boundings[1]
+    else:
        # 此处start_time + 300是为了节省用户调整视频开始时间的功夫（强行跳过前5分钟）
        up_b, down_b = get_position(video_path, start_time +300)
+        context.caption_boundings = [up_b, down_b]
+    context.detected = True
    # 获取并构建输出信息
    table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']]
    # print("create sheet at time: ", datetime.datetime.now())

--- a/images/slider.svg
+++ b/images/slider.svg
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="20"
+   height="20"
+   viewBox="0 0 5.2916664 5.2916664"
+   version="1.1"
+   id="svg8"
+   inkscape:version="0.92.4 5da689c313, 2019-01-14"
+   sodipodi:docname="slider.svg"
+   inkscape:export-filename="/home/yeison/Development/piton/art/icon_lite.png"
+   inkscape:export-xdpi="96"
+   inkscape:export-ydpi="96">
+  <defs
+     id="defs2" />
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="28.704913"
+     inkscape:cx="8.5671075"
+     inkscape:cy="8.8021939"
+     inkscape:document-units="px"
+     inkscape:current-layer="layer1"
+     showgrid="true"
+     inkscape:window-width="1920"
+     inkscape:window-height="1015"
+     inkscape:window-x="0"
+     inkscape:window-y="0"
+     inkscape:window-maximized="1"
+     inkscape:showpageshadow="false"
+     units="px"
+     inkscape:pagecheckerboard="false"
+     showguides="true"
+     inkscape:snap-bbox="true"
+     inkscape:bbox-paths="true"
+     inkscape:bbox-nodes="true"
+     inkscape:snap-bbox-edge-midpoints="true"
+     inkscape:snap-bbox-midpoints="true"
+     inkscape:snap-nodes="true"
+     inkscape:object-paths="true"
+     inkscape:snap-intersection-paths="true"
+     inkscape:snap-smooth-nodes="true"
+     inkscape:snap-midpoints="true"
+     inkscape:snap-global="true"
+     fit-margin-top="0"
+     fit-margin-left="0"
+     fit-margin-right="0"
+     fit-margin-bottom="0"
+     inkscape:guide-bbox="true">
+    <inkscape:grid
+       type="xygrid"
+       id="grid974"
+       empspacing="8"
+       spacingx="0.26458332"
+       spacingy="0.26458332"
+       dotted="false"
+       visible="true"
+       enabled="true"
+       snapvisiblegridlinesonly="true"
+       originx="0"
+       originy="0" />
+  </sodipodi:namedview>
+  <metadata
+     id="metadata5">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(0,-291.70835)">
+    <g
+       id="g847"
+       transform="matrix(0.05207439,0,0,0.05207453,-0.90125164,282.41203)">
+      <g
+         id="g851">
+        <g
+           id="g1059"
+           transform="matrix(1.9986219,0,0,1.9986185,17.324484,-313.52314)">
+          <path
+             inkscape:transform-center-y="3.175"
+             style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.07000433;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers"
+             d="M 25.399999,271.60002 -8.0000008e-7,246.20002 H 50.799999 Z"
+             id="path883"
+             inkscape:connector-curvature="0"
+             sodipodi:nodetypes="cccc" />
+          <path
+             sodipodi:nodetypes="cccc"
+             inkscape:connector-curvature="0"
+             id="path880"
+             d="m 25.399999,271.60002 25.399999,25.4 H 0 Z"
+             inkscape:transform-center-y="-3.1749995"
+             style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.07000433;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
+          <rect
+             ry="5.0534658"
+             y="253.84885"
+             x="7.6487389"
+             height="35.528759"
+             width="35.528786"
+             id="rect870"
+             style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.06184419;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
+          <circle
+             r="25.396828"
+             cy="271.60001"
+             cx="25.4"
+             id="path872"
+             style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.07635882;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
+          <circle
+             transform="rotate(-45)"
+             cx="-174.08969"
+             cy="210.01071"
+             r="12.656071"
+             id="path876"
+             style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.07399406;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
+          <path
+             inkscape:transform-center-x="-3.1749999"
+             sodipodi:nodetypes="cccc"
+             inkscape:connector-curvature="0"
+             id="path904"
+             d="m 25.4,271.60002 -25.40000040000004,25.4 v -50.8 z"
+             style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.07000433;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
+          <path
+             inkscape:transform-center-x="3.175"
+             style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.07000433;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers"
+             d="m 25.399999,271.60002 25.4,-25.4 v 50.8 z"
+             id="path906"
+             inkscape:connector-curvature="0"
+             sodipodi:nodetypes="cccc" />
+          <rect
+             ry="5.0514922"
+             y="256.39301"
+             x="2.5663135"
+             height="30.440479"
+             width="45.693634"
+             id="rect837"
+             style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.0657438;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers" />
+          <rect
+             style="opacity:1;fill:none;fill-opacity:0.49382719;stroke:#000000;stroke-width:0.0657438;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers"
+             id="rect831"
+             width="45.693588"
+             height="30.44051"
+             x="248.76645"
+             y="-40.633385"
+             ry="5.051497"
+             transform="rotate(90)" />
+        </g>
+      </g>
+    </g>
+    <path
+       style="opacity:1;fill:#ffc107;fill-opacity:1;stroke:none;stroke-width:0.38596651;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
+       d="m 50.206421,401.67683 c 110.217209,0.71279 55.108609,0.3564 0,0 z"
+       id="rect997"
+       inkscape:connector-curvature="0" />
+    <path
+       style="opacity:1;fill:#0000ff;fill-opacity:1;stroke:none;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;paint-order:stroke fill markers"
+       d="M 10,0.01367188 C 4.4846749,0.01360343 0.01360343,4.4846749 0.01367188,10 0.0136035,15.515325 4.484675,19.986396 10,19.986328 15.515325,19.986396 19.986396,15.515325 19.986328,10 19.986396,4.484675 15.515325,0.0136035 10,0.01367188 Z"
+       transform="matrix(0.26458332,0,0,0.26458332,0,291.70835)"
+       id="path826"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="ccccc" />
+  </g>
+</svg>
--- a/main_window.py
+++ b/main_window.py
--- a/main_window.ui
+++ b/main_window.ui
@@ -112,6 +112,28 @@
             <property name="autoFillBackground">
              <bool>true</bool>
             </property>
+             <widget class="QSlider" name="verticalSlider">
+              <property name="geometry">
+               <rect>
+                <x>560</x>
+                <y>310</y>
+                <width>16</width>
+                <height>161</height>
+               </rect>
+              </property>
+              <property name="maximum">
+               <number>100</number>
+              </property>
+              <property name="singleStep">
+               <number>5</number>
+              </property>
+              <property name="value">
+               <number>100</number>
+              </property>
+              <property name="orientation">
+               <enum>Qt::Vertical</enum>
+              </property>
+             </widget>
            </widget>
           </item>
           <item>
@@ -119,17 +141,6 @@
             <property name="orientation">
              <enum>Qt::Horizontal</enum>
             </property>
-             <widget class="QPushButton" name="btn_open">
-              <property name="maximumSize">
-               <size>
-                <width>100</width>
-                <height>25</height>
-               </size>
-              </property>
-              <property name="text">
-               <string>打开表格文件</string>
-              </property>
-             </widget>
             <widget class="QPushButton" name="btn_play">
              <property name="minimumSize">
               <size>
@@ -175,85 +186,20 @@ QPushButton:pressed {
               <string>播放</string>
              </property>
             </widget>
-             <widget class="QPushButton" name="btn_stop">
+             <widget class="QLabel" name="label_2">
-              <property name="minimumSize">
-               <size>
-                <width>50</width>
-                <height>50</height>
-               </size>
-              </property>
-              <property name="maximumSize">
-               <size>
-                <width>50</width>
-                <height>50</height>
-               </size>
-              </property>
-              <property name="styleSheet">
-               <string notr="true">QPushButton {
-    color: #333;
-    border: 2px groove gray;
-    border-radius: 25px;
-    border-style: outset;
-        background: qradialgradient(
-        cx: 0.3, cy: -0.4, fx: 0.3, fy: -0.4,
-        radius: 1.35, stop: 0 #fff, stop: 1 #888
-        );
-    padding: 5px;
-    }
-QPushButton:hover {
-    background: qradialgradient(
-        cx: 0.3, cy: -0.4, fx: 0.3, fy: -0.4,
-        radius: 1.35, stop: 0 #fff, stop: 1 #bbb
-        );
-    }
-QPushButton:pressed {
-    border-style: inset;
-    background: qradialgradient(
-        cx: 0.4, cy: -0.1, fx: 0.4, fy: -0.1,
-        radius: 1.35, stop: 0 #fff, stop: 1 #ddd
-        );
-    }</string>
-              </property>
              <property name="text">
-               <string>暂停</string>
+               <string>00:00/00:00</string>
              </property>
             </widget>
-             <widget class="QSlider" name="sld_audio">
+             <widget class="QPushButton" name="pushButton">
-              <property name="sizePolicy">
-               <sizepolicy hsizetype="Expanding" vsizetype="Fixed">
-                <horstretch>0</horstretch>
-                <verstretch>0</verstretch>
-               </sizepolicy>
-              </property>
-              <property name="minimumSize">
-               <size>
-                <width>50</width>
-                <height>25</height>
-               </size>
-              </property>
              <property name="maximumSize">
               <size>
-                <width>150</width>
+                <width>50</width>
-                <height>25</height>
+                <height>16777215</height>
               </size>
              </property>
-              <property name="value">
-               <number>99</number>
-              </property>
-              <property name="orientation">
-               <enum>Qt::Horizontal</enum>
-              </property>
-             </widget>
-             <widget class="QLabel" name="lab_audio">
              <property name="text">
-               <string>volume:100%</string>
+               <string>音量</string>
-              </property>
-             </widget>
-             <widget class="QLabel" name="label_2">
-              <property name="text">
-               <string>00:00/12:34</string>
              </property>
             </widget>
            </widget>
@@ -279,7 +225,7 @@ QPushButton:pressed {
           <enum>QTabWidget::Triangular</enum>
          </property>
          <property name="currentIndex">
-           <number>2</number>
+           <number>1</number>
          </property>
          <property name="iconSize">
           <size>
@@ -503,7 +449,7 @@ QPushButton:pressed {
     <x>0</x>
     <y>0</y>
     <width>939</width>
-     <height>26</height>
+     <height>22</height>
    </rect>
   </property>
   <widget class="QMenu" name="menu">

--- a/main_window_ui.py
+++ b/main_window_ui.py
@@ -7,6 +7,8 @@
 # WARNING! All changes made in this file will be lost!
 from PyQt5 import QtCore, QtGui, QtWidgets
+from PaintQSlider import PaintQSlider
+import qtawesome
 class Ui_MainWindow(object):
@@ -61,16 +63,19 @@ class Ui_MainWindow(object):
        self.splitter = QtWidgets.QSplitter(self.verticalWidget_3)
        self.splitter.setOrientation(QtCore.Qt.Horizontal)
        self.splitter.setObjectName("splitter")
-        self.btn_open = QtWidgets.QPushButton(self.splitter)
+        self.label_2 = QtWidgets.QLabel(self.splitter)
-        self.btn_open.setMaximumSize(QtCore.QSize(100, 25))
+        self.label_2.setObjectName("label_2")
-        self.btn_open.setObjectName("btn_open")
+        # self.btn_open = QtWidgets.QPushButton(self.splitter)
-        self.btn_play = QtWidgets.QPushButton(self.splitter)
+        # self.btn_open.setMaximumSize(QtCore.QSize(100, 25))
-        self.btn_play.setMinimumSize(QtCore.QSize(50, 50))
+        # self.btn_open.setObjectName("btn_open")
-        self.btn_play.setMaximumSize(QtCore.QSize(50, 50))
+        self.btn_play = QtWidgets.QPushButton(qtawesome.icon('fa.play-circle', color='#FFFFFF', font=50), "", self.splitter)
+        self.btn_play.setIconSize(QtCore.QSize(30, 30))
+        self.btn_play.setMinimumSize(QtCore.QSize(30, 30))
+        self.btn_play.setMaximumSize(QtCore.QSize(30, 30))
        self.btn_play.setStyleSheet("QPushButton {\n"
 "    color: #333;\n"
 "    border: 2px groove gray;\n"
-"    border-radius: 25px;\n"
+"    border-radius: 15px;\n"
 "    border-style: outset;\n"
 "        background: qradialgradient(\n"
 "        cx: 0.3, cy: -0.4, fx: 0.3, fy: -0.4,\n"
@@ -94,36 +99,6 @@ class Ui_MainWindow(object):
 "        );\n"
 "    }")
        self.btn_play.setObjectName("btn_play")
-        self.btn_stop = QtWidgets.QPushButton(self.splitter)
-        self.btn_stop.setMinimumSize(QtCore.QSize(50, 50))
-        self.btn_stop.setMaximumSize(QtCore.QSize(50, 50))
-        self.btn_stop.setStyleSheet("QPushButton {\n"
-"    color: #333;\n"
-"    border: 2px groove gray;\n"
-"    border-radius: 25px;\n"
-"    border-style: outset;\n"
-"        background: qradialgradient(\n"
-"        cx: 0.3, cy: -0.4, fx: 0.3, fy: -0.4,\n"
-"        radius: 1.35, stop: 0 #fff, stop: 1 #888\n"
-"        );\n"
-"    padding: 5px;\n"
-"    }\n"
-"\n"
-"QPushButton:hover {\n"
-"    background: qradialgradient(\n"
-"        cx: 0.3, cy: -0.4, fx: 0.3, fy: -0.4,\n"
-"        radius: 1.35, stop: 0 #fff, stop: 1 #bbb\n"
-"        );\n"
-"    }\n"
-"\n"
-"QPushButton:pressed {\n"
-"    border-style: inset;\n"
-"    background: qradialgradient(\n"
-"        cx: 0.4, cy: -0.1, fx: 0.4, fy: -0.1,\n"
-"        radius: 1.35, stop: 0 #fff, stop: 1 #ddd\n"
-"        );\n"
-"    }")
-        self.btn_stop.setObjectName("btn_stop")
        self.sld_audio = QtWidgets.QSlider(self.splitter)
        sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Fixed)
        sizePolicy.setHorizontalStretch(0)
@@ -132,13 +107,61 @@ class Ui_MainWindow(object):
        self.sld_audio.setSizePolicy(sizePolicy)
        self.sld_audio.setMinimumSize(QtCore.QSize(50, 25))
        self.sld_audio.setMaximumSize(QtCore.QSize(150, 25))
-        self.sld_audio.setProperty("value", 99)
+        self.sld_audio.setSingleStep(5)
+        # self.sld_audio.setTickPosition(QtWidgets.QSlider.TicksBelow)
+        # self.sld_audio.setTickInterval(5)
+        self.sld_audio.setProperty("value", 100)
        self.sld_audio.setOrientation(QtCore.Qt.Horizontal)
        self.sld_audio.setObjectName("sld_audio")
+        self.sld_audio.setStyleSheet('''
+        QSlider:horizontal {
+  min-height: 24px;
+  max-height: 24px;
+}
+QSlider:vertical {
+  min-width: 24px;
+  max-width: 24px;
+}
+QSlider::groove:horizontal {
+  height: 4px;
+  background: #393939;
+  margin: 0 12px;
+}
+QSlider::groove:vertical {
+  width: 4px;
+  background: #393939;
+  margin: 12px 0;
+  border-radius: 24px;
+}
+QSlider::handle:horizontal {
+  image: url(images/slider.svg);
+  width: 12px;
+  height: 12px;
+  margin: -24px -12px;
+}
+QSlider::handle:vertical {
+  image: url(images/slider.svg);
+  border-radius: 24px;
+  width: 12px;
+  height: 12px;
+  margin: -12px -24px;
+}
+QSlider::add-page {
+background: #232629;
+}
+QSlider::sub-page {
+background: #ffd740;
+}
+        ''')
        self.lab_audio = QtWidgets.QLabel(self.splitter)
        self.lab_audio.setObjectName("lab_audio")
-        self.label_2 = QtWidgets.QLabel(self.splitter)
-        self.label_2.setObjectName("label_2")
        self.verticalLayout_3.addWidget(self.splitter)
        self.verticalLayout_3.setStretch(0, 8)
        self.shuiping.addWidget(self.verticalWidget_3)
@@ -208,10 +231,10 @@ class Ui_MainWindow(object):
        self.scrollArea.setWidgetResizable(False)
        self.scrollArea.setObjectName("scrollArea")
        self.scrollAreaWidgetContents = myWidgetContents()
-        self.scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, 0, 827, 64))
+        self.scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, 0, 800, 40))
        self.scrollAreaWidgetContents.setObjectName("scrollAreaWidgetContents")
        self.sld_video = myVideoSlider(self.scrollAreaWidgetContents)
-        self.sld_video.setGeometry(QtCore.QRect(10, 30, 811, 20))
+        self.sld_video.setGeometry(QtCore.QRect(10, 20, 790, 30))
        self.sld_video.setMinimumSize(QtCore.QSize(410, 0))
        self.sld_video.setMaximumSize(QtCore.QSize(16777215, 20))
        self.sld_video.setMaximum(100)
@@ -313,7 +336,7 @@ class Ui_MainWindow(object):
        self.menu.addAction(self.action_save)
        self.menu_2.addAction(self.action_undo)
        self.menu_2.addAction(self.action_redo)
-        self.menu_2.addAction(self.action_view_history)
+        # self.menu_2.addAction(self.action_view_history)
        self.menu_2.addSeparator()
        self.menu_2.addAction(self.action_insert_aside_from_now)
        self.menu_2.addAction(self.action_operate)
@@ -326,17 +349,14 @@ class Ui_MainWindow(object):
        self.menubar.addAction(self.menu_3.menuAction())
        self.retranslateUi(MainWindow)
-        self.tabWidget.setCurrentIndex(2)
+        self.tabWidget.setCurrentIndex(0)
        QtCore.QMetaObject.connectSlotsByName(MainWindow)
    def retranslateUi(self, MainWindow):
        _translate = QtCore.QCoreApplication.translate
        MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
-        self.btn_open.setText(_translate("MainWindow", "打开表格文件"))
-        self.btn_play.setText(_translate("MainWindow", "播放"))
-        self.btn_stop.setText(_translate("MainWindow", "暂停"))
        self.lab_audio.setText(_translate("MainWindow", "volume:100%"))
-        self.label_2.setText(_translate("MainWindow", "00:00/12:34"))
+        self.label_2.setText(_translate("MainWindow", "00:00/00:00"))
        self.tabWidget.setTabText(self.tabWidget.indexOf(self.all_tab), _translate("MainWindow", "字幕旁白"))
        self.tabWidget.setTabText(self.tabWidget.indexOf(self.zm_tab), _translate("MainWindow", "字幕"))
        self.tabWidget.setTabText(self.tabWidget.indexOf(self.pb_tab), _translate("MainWindow", "旁白"))

--- a/management.py
+++ b/management.py
@@ -118,14 +118,12 @@ class ProjectContext:
        self.project_base_dir = None
        self.video_path = None
        self.excel_path = None
+        self.conf_path = 'conf.ini'
        self.subtitle_list = []
        self.aside_list = []
        self.all_elements = []
-        f = open("./conf.ini", "r", encoding='utf-8')
+        self.speaker_info = None
-        rl = f.readlines()
+        self.speaker_speed = None
-        f.close()
-        self.speaker_info = rl[0].strip()
-        self.speaker_speed = rl[1].strip()
        # 一些常量
        self.header = ["起始时间", "终止时间", "字幕", '建议', '解说脚本', "语速"]
        self.aside_header = ["起始时间", "终止时间", '建议', '解说脚本',"语速"]
@@ -141,6 +139,15 @@ class ProjectContext:
        self.speakers = []
        self.init_speakers()
+        # 字幕检测进度，主要是待检测视频的初始时间
+        self.detected = False
+        self.nd_process = 0.00
+        self.last_time = 0.00
+        self.caption_boundings = []
+        self.has_subtitle = True
+        # 第一时间加载配置（这里主要是说话人的相关配置）
+        self.load_conf()
    def clear(self):
        self.subtitle_list = []
@@ -148,33 +155,70 @@ class ProjectContext:
        self.all_elements = []
        self.history_records = []
        self.records_pos = 0
    def Init(self, project_dir, video_name):
+        if len(project_dir) == 0 or project_dir is None:
+            return 
        # 有的时候路径是 '/F:/out1/test.xlsx'，有的时候是'F:/out1/test.xlsx'
        if project_dir[0] == '/':
            project_dir = project_dir[1:]
        self.project_base_dir = project_dir
        # self.video_path = os.path.join(project_dir, video_name)
        self.video_path = project_dir + "/" + video_name
-        print("video_pathvideo_path: ", self.video_path)
+        print("video_path", self.video_path)
        self.excel_path = replace_path_suffix(self.video_path, ".xlsx")
+        self.load_conf()
+    def load_conf(self):
+        this_conf_path = os.path.join(self.project_base_dir, 'conf.ini') if self.project_base_dir is not None else self.conf_path
+        # 如果当前工程里还没有对应的配置文件，那么选择使用全局的配置文件进行初始化，否则就使用当前工程的配置文件
+        if os.path.exists(this_conf_path):
+            self.conf_path = this_conf_path
+        with open(self.conf_path, 'r', encoding='utf8') as f:
+            info = json.load(f)
+            video_path = info["video_path"]
+            excel_path = info["excel_path"]
+            self.speaker_info = info["speaker_info"]["speaker_id"]
+            self.speaker_speed = info["speaker_info"]["speaker_speed"]
+            if video_path == self.video_path and excel_path == self.excel_path:
+                self.detected = info["detection_info"]["detected"]
+                self.nd_process = info["detection_info"]["nd_process"]
+                self.last_time = info["detection_info"]["last_time"]
+                self.caption_boundings = info["detection_info"]["caption_boundings"]
+                self.has_subtitle = info["detection_info"]["has_subtitle"]
-    # def Init(self, project_dir, video_path, excel_path):
-    #     self.project_base_dir = project_dir
-    #     self.video_path = video_path
-    #     self.excel_path = excel_path
    def save_conf(self):
-        with open('./conf.ini', 'w', encoding='utf-8') as f:
+        with open(self.conf_path, 'w', encoding='utf-8') as f:
-            f.writelines([self.speaker_info + '\n', self.speaker_speed])
+            # 将context里包含的一些信息保留下来，包括工程的检测进度、检测中间产物（excel）、视频路径、说话人信息
+            info = {
+                "video_path": self.video_path,
+                "excel_path": self.excel_path,
+                "detection_info": {
+                    "detected": self.detected,
+                    "nd_process": self.nd_process,
+                    "last_time": self.last_time,
+                    "caption_boundings": self.caption_boundings,
+                    "has_subtitle": self.has_subtitle
+                },
+                "speaker_info": {
+                    "speaker_id": self.speaker_info,
+                    "speaker_speed": self.speaker_speed
+                }
+            }
+            f.write(json.dumps(info))
    def setVideoPath(self, video_path):
        self.video_path = video_path
    def setExcelPath(self, excel_path):
        self.excel_path = excel_path
    # 目前只是把excel保存到文件中
    # 先备份文件，再覆盖主文件，可选是否需要备份，默认需要备份
+    # 20221030：添加旁白检测的进度
    def save_project(self, need_save_new: bool=False) -> str:
+        self.save_conf()
        # all_element = sorted(all_element, key=lambda x: float(x.st_time_sec))
        print("current excel_path:", self.excel_path)
        if self.excel_path == None:
@@ -239,13 +283,17 @@ class ProjectContext:
                self.all_elements.append(self.aside_list[-1])
            # print("[load_excel_from_path] ", end='')
            # self.all_elements[-1].print_self()
    # 现在仅支持对修改操作的记录
    def history_push(self, row, old, new):
+        print(old, new)
        if self.records_pos == len(self.history_records):
            self.history_records.append(OperateRecord(row, Operation.Modify, old, new))
        else:
            self.history_records[self.records_pos] = OperateRecord(row, Operation.Modify, old, new)
        self.records_pos += 1
    def history_pop(self)-> OperateRecord:
        if len(self.history_records) == 0:
            return None
@@ -279,6 +327,8 @@ class ProjectContext:
        for speaker in content["speaker_details"]:
            speaker_name.append(
                "，".join([speaker["name"], speaker["gender"], speaker["age_group"]]))
+        if self.speaker_info is None:
+            self.speaker_info = speaker_name[0]
        return tuple(speaker_name)
    def init_speakers(self):

--- a/narratage_detection.py
+++ b/narratage_detection.py
@@ -24,6 +24,9 @@ def detect(video_path: str, start_time: float, end_time: float, book_path: str,
        state (optional): 任务进行状态. Defaults to None.
        subtitle (int, optional): 视频是否有字幕，共三种情况(0:未知，1:有字幕，2:无字幕). Defaults to 0.
    """
+    context = mainWindow.projectContext
+    # 未检测过
+    if not context.detected:
        print("开始检测")
        print("start_time", start_time)
        print("end_time", end_time)
@@ -42,12 +45,23 @@ def detect(video_path: str, start_time: float, end_time: float, book_path: str,
        else:
            has_subtitle = False
+        context.has_subtitle = has_subtitle
        if has_subtitle:
            from detect_with_ocr import detect_with_ocr
            detect_with_ocr(video_path, book_path, start_time, end_time, state, mainWindow)
        # else:
        #     from detect_with_asr import detect_with_asr
-    #     detect_with_asr(video_path, book_path, start_time, end_time, state)
+        #     detect_with_asr(video_path, book_path, start_time, end_time, state, mainWindow)
+    else:
+        # 之前检测过
+        has_subtitle = context.has_subtitle
+        start_time = context.last_time
+        if has_subtitle:
+            # 更新当前进度
+            state[0] = context.nd_process
+            from detect_with_ocr import detect_with_ocr
+            detect_with_ocr(video_path, book_path, start_time, end_time, state, mainWindow)
 if __name__ == '__main__':

--- a/operation_dialog.py
+++ b/operation_dialog.py
 import sys
 import os
-from PyQt5.QtCore import *;
+from PyQt5.QtCore import *
-from PyQt5.QtGui import *;
+from PyQt5.QtGui import *
-from PyQt5.QtWidgets import *;
+from PyQt5.QtWidgets import *
 import utils
 from operation_dialog_ui import Ui_Dialog
-#todo 注意，删除行，添加行，暂不支持【撤销与重做】功能！！！
+# todo 注意，删除行，添加行，暂不支持【撤销与重做】功能！！！
 class Operation_Dialog(QDialog, Ui_Dialog):
    #开始检测信号，传参分别是movie路径和输出表格路径
@@ -26,7 +28,8 @@ class Operation_Dialog(QDialog, Ui_Dialog):
        self.pushButton_3.clicked.connect(self.fill_row_info_slot)
        self.buttonBox.setEnabled(False)
-        self.buttonBox.button(QDialogButtonBox.StandardButton.Ok).clicked.connect(self.start_operation_slot)
+        self.buttonBox.button(QDialogButtonBox.StandardButton.Ok).clicked.connect(
+            self.start_operation_slot)
        # 字幕/旁白 选择框
        self.comboBox.currentIndexChanged.connect(self.zmpb_change_slot)
        # 增加一行/删除一行 选择框
@@ -50,13 +53,14 @@ class Operation_Dialog(QDialog, Ui_Dialog):
        else:
            self.lineEdit_3.setEnabled(False)
            self.lineEdit_4.setEnabled(False)
+            self.lineEdit_5.setEnabled(False)
    # 如果是删除，则只需要【行数】即可
    def adddel_change_slot(self):
        if self.comboBox_2.currentText() in ["增加一行", "修改一行"]:
            self.zmpb_change_slot()
        else:
-            for i in range(1,len(self.lineEdits)):
+            for i in range(1, len(self.lineEdits)):
                self.lineEdits[i].setEnabled(False)
    # 修改完后需要重新检测
@@ -71,7 +75,8 @@ class Operation_Dialog(QDialog, Ui_Dialog):
            row_number = int(self.lineEdit.text())
            assert 1 <= row_number <= rowCount
        except Exception as e:
-            self.mainWindow.prompt_dialog.show_with_msg("校验失败！总行数为[%d],你的输入为[%s]!!"%(rowCount, self.lineEdit.text()))
+            self.mainWindow.prompt_dialog.show_with_msg(
+                "校验失败！总行数为[%d],你的输入为[%s]!!" % (rowCount, self.lineEdit.text()))
            return False
        # 校验时间填写是否是hh:mm:ss格式的
        try:
@@ -91,7 +96,7 @@ class Operation_Dialog(QDialog, Ui_Dialog):
        # 这些是只有【add】才需要检测的
        if self.comboBox_2.currentText() == "增加一行":
-            #校验起始时间、结束时间
+            # 校验起始时间、结束时间
            start_time_f, end_time_f = 0.0, 0.0
            try:
                start_time_f = float(utils.trans_to_seconds(self.lineEdit_2.text()))
@@ -99,17 +104,19 @@ class Operation_Dialog(QDialog, Ui_Dialog):
                    end_time_f = float(utils.trans_to_seconds(self.lineEdit_3.text()))
                    assert start_time_f < end_time_f
            except Exception as e:
-                self.mainWindow.prompt_dialog.show_with_msg("校验失败！起始时间或结束时间输入有误!!" )
+                self.mainWindow.prompt_dialog.show_with_msg(
+                    "校验失败！起始时间或结束时间输入有误!!")
                return False
            # 校验推荐字数
-            if self.comboBox.currentText() == "旁白":
+            # if self.comboBox.currentText() == "旁白":
-                try:
+            #     try:
-                    suggest_words_count = int(self.lineEdit_5.text())
+            #         suggest_words_count = int(self.lineEdit_5.text())
-                    assert suggest_words_count <= 100
+            #         assert suggest_words_count <= 100
-                except Exception as e:
+            #     except Exception as e:
-                    self.mainWindow.prompt_dialog.show_with_msg("校验失败！推荐字数填入有误!!")
+            #         self.mainWindow.prompt_dialog.show_with_msg(
-                    return False
+            #             "校验失败！推荐字数填入有误!!")
+            #         return False
        # 这些是只有【modify】才需要检测的
        if self.comboBox_2.currentText() == "修改一行":
            try:
@@ -121,7 +128,8 @@ class Operation_Dialog(QDialog, Ui_Dialog):
                else:
                    assert self.comboBox.currentText() == "字幕"
            except Exception as e:
-                    self.mainWindow.prompt_dialog.show_with_msg("校验失败！待修改的行不是[%s]"%(self.comboBox.currentText()))
+                self.mainWindow.prompt_dialog.show_with_msg(
+                    "校验失败！待修改的行不是[%s]" % (self.comboBox.currentText()))
                return False
        # 检测通过
@@ -129,7 +137,6 @@ class Operation_Dialog(QDialog, Ui_Dialog):
        self.buttonBox.setEnabled(True)
        self.set_all_user_component_status(False)
    def set_all_user_component_status(self, status: bool):
        for lineEdit in self.lineEdits:
            lineEdit.setEnabled(status)
@@ -179,7 +186,8 @@ class Operation_Dialog(QDialog, Ui_Dialog):
                self.lineEdit_2.setText(str(utils.transfer_second_to_time(elem.st_time_sec)))
                self.lineEdit_3.setText(str(utils.transfer_second_to_time(elem.ed_time_sec)))
                self.lineEdit_4.setText(elem.subtitle)
-                self.lineEdit_5.setText(elem.suggest[elem.suggest.index("推荐字数为") + 5:])
+                self.lineEdit_5.setText(
+                    elem.suggest[elem.suggest.index("推荐字数为") + 5:])
                self.lineEdit_6.setText(elem.aside)
                # 如果是旁白的话
@@ -190,6 +198,7 @@ class Operation_Dialog(QDialog, Ui_Dialog):
                print("exception:", e)
                pass
 if __name__ == '__main__':
    app = QApplication(sys.argv)
    app.setWindowIcon(QIcon("./images/eagle_2.ico"))

--- a/operation_dialog_ui.py
+++ b/operation_dialog_ui.py
@@ -90,9 +90,6 @@ class Ui_Dialog(object):
        self.label_11 = QtWidgets.QLabel(Dialog)
        self.label_11.setGeometry(QtCore.QRect(380, 310, 81, 20))
        self.label_11.setObjectName("label_11")
-        self.label_12 = QtWidgets.QLabel(Dialog)
-        self.label_12.setGeometry(QtCore.QRect(250, 270, 251, 20))
-        self.label_12.setObjectName("label_12")
        self.pushButton = QtWidgets.QPushButton(Dialog)
        self.pushButton.setGeometry(QtCore.QRect(350, 380, 93, 28))
        self.pushButton.setObjectName("pushButton")
@@ -141,7 +138,6 @@ class Ui_Dialog(object):
        self.label_9.setText(_translate("Dialog", "*请填数字，最多保留两位小数"))
        self.label_10.setText(_translate("Dialog", "*请填文字"))
        self.label_11.setText(_translate("Dialog", "*请填文字"))
-        self.label_12.setText(_translate("Dialog", "*请填数字，必须是不超过100的正整数"))
        self.pushButton.setText(_translate("Dialog", "检测"))
        self.pushButton_2.setText(_translate("Dialog", "修改"))
        self.label_13.setText(_translate("Dialog", "*需要填在【字幕旁白】页面中的行数"))

--- a/render.py
+++ b/render.py
+'''
+    用于渲染最终成果，将之前临时生成的音频插入到原音频中|生成一条纯旁白的音频|插入到原视频中
+'''
+import librosa
+import numpy as np
+import os
+import soundfile
+import subprocess
+import time
+from PyQt5.QtCore import *;
+from PyQt5.QtGui import *;
+from PyQt5.QtWidgets import *;
+from management import RunThread
+from speech_synthesis import ffmpeg_path
+class ExportProcessor(QWidget):
+    show_warning_signal = pyqtSignal(str)
+    export_callback_signal = pyqtSignal(list, list)
+    def __init__(self):
+        super(ExportProcessor, self).__init__()
+        self.state = [None]
+        self.threads = []
+    def export_slot(self, video_path, output_dir):
+        t = RunThread(funcName=self.start_export,
+                       args=(video_path, output_dir),
+                       name="export")
+        t.setDaemon(True)
+        self.threads.append(t)
+        for t in self.threads:
+            t.start()
+        print("===子线程已经开启 in export===")
+        self.export_callback_signal.emit(self.threads, self.state)
+    def start_export(self, video_path, output_dir):
+        mixed_audio_path = aggrevate_audios(video_path, output_dir, self.state)
+        export_video(video_path, mixed_audio_path, output_dir, self.state)
+    # 生成一条无声的音频，然后把旁白音频逐个按照时间位置放进去，得到仅含旁白的音频和旁白+原声的音频
+def aggrevate_audios(video_path: str, output_dir: str, state=None):
+    # 这个模块最多只有80%的进度
+    if state is None:
+        state = [None]
+    # 生成等长的空白音频
+    from split_wav import extract_audio
+    origin_wav_path = extract_audio(video_path, output_dir, 0, -1)
+    origin_wav, freq = librosa.load(origin_wav_path)
+    blank_audio = np.zeros_like(origin_wav)
+    # 将生成的旁白音频放入空白音频中，并将原音频的对应位置音量降低为原来的30%
+    files = os.listdir(output_dir)
+    for i, f in enumerate(files):
+        fname = '.'.join(f.split('.')[:-1])
+        try:
+            st_time = float(fname)
+            cur_audio, _ = librosa.load(os.path.join(output_dir, f))
+            # print(len(cur_audio))
+            st_index = int(st_time * freq)
+            audio_len = len(cur_audio)
+            blank_audio[st_index: st_index + audio_len] = cur_audio
+            origin_wav[st_index: st_index + audio_len] *= 0.3
+            state[0] = float((i + 1) / len(files)) * 0.7
+        except:
+            continue
+    narratage_only_path = os.path.join(output_dir, "narratage.wav")
+    soundfile.write(narratage_only_path, blank_audio, freq)
+    state[0] = 0.75
+    # 得到合成后的音频
+    mixed_audio = origin_wav + blank_audio
+    mixed_audio_path = os.path.join(output_dir, "mixed.wav")
+    soundfile.write(mixed_audio_path, mixed_audio, freq)
+    state[0] = 0.8
+    return mixed_audio_path
+def export_video(video_path: str, mixed_audio_path: str, output_dir: str, state=None):
+    if state is None:
+        state = [None]
+    # 生成合成音频+原视频的新视频
+    if os.path.basename(video_path).split('.')[-1] == 'rmvb':
+        video_name = os.path.basename(video_path).split('.')[0]
+        mixed_movie_path = os.path.join(output_dir, "new_" + video_name + ".mp4")
+        command_line = f'{ffmpeg_path} -i {video_path} -i {mixed_audio_path} -map 0:v:0 -map 1:a:0 -vcodec h264 {mixed_movie_path} -y'
+    else:
+        mixed_movie_path = os.path.join(output_dir, "new_" + os.path.basename(video_path))
+        command_line = f'{ffmpeg_path} -i {video_path} -i {mixed_audio_path} -map 0:v:0 -map 1:a:0 -vcodec copy {mixed_movie_path} -y'
+    subprocess.call(command_line)
+    state[0] = 1.00
+if __name__ == '__main__':
+    pass
+    # start_time = time.time()
+    # video_path = r'D:/Downloads/zhanlang/zhanlang.rmvb'
+    # output_dir = r'D:/AddCaption/last_version/accessibility_movie/zhanlang'
+    # mixed_audio_path = aggrevate_audios(video_path, output_dir)
+    # export_video(video_path, mixed_audio_path, output_dir)
+    # print(time.time() - start_time)
\ No newline at end of file
--- a/setting_dialog.py
+++ b/setting_dialog.py
@@ -20,25 +20,41 @@ class Setting_Dialog(QDialog, Ui_Dialog):
        self.setWindowTitle("设置")
        self.projectContext = projectContext
        # todo 把所有说话人都加上来
-        li = self.projectContext.get_all_speaker_info()
+        self.speaker_li = self.projectContext.get_all_speaker_info()
-        for i in li:
+        for i in self.speaker_li:
            self.comboBox.addItem(i)
-        li_2 = ["1.00(4字/秒)", "1.10(4.5字/秒)", "1.25(5字/秒)", "1.50(6字/秒)", "1.75(7字/秒)", "2.00(8字/秒)", "2.50(10字/秒)"]
+        self.speed_li_2 = ["1.00(4字/秒)", "1.10(4.5字/秒)", "1.25(5字/秒)", "1.50(6字/秒)", "1.75(7字/秒)", "2.00(8字/秒)", "2.50(10字/秒)"]
-        self.comboBox_2.addItems(li_2)
+        self.comboBox_2.addItems(self.speed_li_2)
-        self.comboBox.setCurrentIndex(li.index(self.projectContext.speaker_info))
+        if self.projectContext.speaker_info is None:
-        self.comboBox_2.setCurrentIndex(li_2.index(self.projectContext.speaker_speed))
+            self.comboBox.setCurrentIndex(0)
+        else:
+            self.comboBox.setCurrentIndex(self.speaker_li.index(self.projectContext.speaker_info))
+        if self.projectContext.speaker_speed is None:
+            self.comboBox_2.setCurrentIndex(0)
+        else:    
+            self.comboBox_2.setCurrentIndex(self.speed_li_2.index(self.projectContext.speaker_speed))
        self.comboBox.currentIndexChanged.connect(self.speaker_change_slot)
        self.comboBox_2.currentIndexChanged.connect(self.speed_change_slot)
        self.pushButton.clicked.connect(self.play_audio_slot)
+    def content_fresh(self):
+        if self.projectContext.speaker_info is None:
+            self.comboBox.setCurrentIndex(0)
+        else:
+            self.comboBox.setCurrentIndex(self.speaker_li.index(self.projectContext.speaker_info))
+        if self.projectContext.speaker_speed is None:
+            self.comboBox_2.setCurrentIndex(0)
+        else:    
+            self.comboBox_2.setCurrentIndex(self.speed_li_2.index(self.projectContext.speaker_speed))
    def speaker_change_slot(self):
        self.projectContext.speaker_info = self.comboBox.currentText()
        self.projectContext.save_conf()
        # print("self.projectContext.speaker_info:", self.projectContext.speaker_info)
    def speed_change_slot(self):
        self.projectContext.speaker_speed = self.comboBox_2.currentText()
        self.projectContext.save_conf()
@@ -64,6 +80,12 @@ class Setting_Dialog(QDialog, Ui_Dialog):
        global audioPlayed
        winsound.PlaySound(audioPlayed, winsound.SND_PURGE)
        event.accept()
+    def showDialog(self):
+        self.content_fresh()
+        self.show()
 def thread_it(func, *args, name):
    """创建守护线程
@@ -79,6 +101,7 @@ def thread_it(func, *args, name):
    t.setDaemon(True)
    # 启动
    t.start()
 if __name__ == '__main__':
    app = QApplication(sys.argv)
    app.setWindowIcon(QIcon("./images/eagle_2.ico"))

--- a/speech_synthesis.py
+++ b/speech_synthesis.py
@@ -22,7 +22,7 @@ from typing import Tuple
 import datetime
 import numpy as np
-from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer, ResultReason
+from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer, ResultReason, AudioDataStream
 from azure.cognitiveservices.speech.audio import AudioOutputConfig
 import openpyxl
@@ -91,7 +91,9 @@ def speech_synthesis(text: str, output_file: str, speaker: Speaker, speed: float
    """
    audio_path = tmp_file
    speech_config = SpeechConfig(
-        subscription="db34d38d2d3447d482e0f977c66bd624", region="eastus")
+        subscription="db34d38d2d3447d482e0f977c66bd624",
+        region="eastus"
+    )
    speech_config.speech_synthesis_language = "zh-CN"
    speech_config.speech_synthesis_voice_name = speaker.speaker_code
@@ -101,25 +103,33 @@ def speech_synthesis(text: str, output_file: str, speaker: Speaker, speed: float
        print("output_file路径不存在，创建:", os.path.dirname(output_file))
        os.makedirs(os.path.dirname(output_file))
    audio_config = AudioOutputConfig(filename=audio_path)
-    synthesizer = SpeechSynthesizer(
+    synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None)
-        speech_config=speech_config, audio_config=audio_config)
+    ssml_string = f"""
-    result = synthesizer.speak_text(text)
+    <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{speech_config.speech_synthesis_language}">
+        <voice name="{speaker.speaker_code}">
+            <prosody rate="{round((speed - 1.0) * 100, 2)}%">
+                {text}
+            </prosody>
+        </voice>
+    </speak>"""
+    result = synthesizer.speak_ssml_async(ssml_string).get()
+    stream = AudioDataStream(result)
+    stream.save_to_wav_file(output_file)
    print(result.reason)
    while result.reason == ResultReason.Canceled:
        cancellation_details = result.cancellation_details
-        print("取消的原因", cancellation_details.reason)
+        print("取消的原因", cancellation_details.reason, cancellation_details.error_details)
        time.sleep(1)
        synthesizer.stop_speaking()
        del synthesizer
-        synthesizer = SpeechSynthesizer(
+        synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None)
-            speech_config=speech_config, audio_config=audio_config)
+        result = synthesizer.speak_ssml_async(ssml_string).get()
-        result = synthesizer.speak_text(text)
+        stream = AudioDataStream(result)
+        stream.save_to_wav_file(output_file)
        print(result.reason)
-    change_speed_and_volume(output_file, speed)
+# detached
 def change_speed_and_volume(wav_path: str, speed: float = 1.0):
    """调整语速，顺便把音量调大，语音合成的声音太小了
@@ -248,40 +258,6 @@ def export_caption(sheet_content: dict, caption_file: str):
                f.write(x + "\n\n")
-def adjust_volume(origin: str, start_timestamp: list, end_timestamp: list):
-    """调整原音频中待插入旁白位置的音量
-    Args:
-        origin (str): 原音频存储位置
-        start_timestamp (list): 旁白开始时间
-        end_timestamp (list): 旁白结束时间
-    """
-    global adjusted_wav_path
-    adjusted_wav_path = os.path.join(os.path.dirname(origin), adjusted_wav_path)
-    n = len(start_timestamp)
-    groups = int(np.ceil(n / part_len))
-    start = 0
-    middle_wav, res_wav = origin, os.path.join(os.path.dirname(origin), "adjust0.wav")
-    for x in range(groups):
-        if x == groups - 1:
-            res_wav = adjusted_wav_path
-        st = start_timestamp[start: start + part_len] if start + part_len < n else start_timestamp[start: n]
-        et = end_timestamp[start: start + part_len] if start + part_len < n else start_timestamp[start: n]
-        command_line = "{} -i {} -af \"".format(ffmpeg_path, middle_wav)
-        for i in range(len(st)):
-            command_line += "volume=enable='between(t,{},{})':volume=0.3".format(st[i], et[i])
-            if i != len(st) - 1:
-                command_line += ","
-        command_line += "\" -y {}".format(res_wav)
-        print(command_line)
-        os.system(command_line)
-        if x != 0:
-            os.remove(middle_wav)
-        middle_wav = res_wav
-        res_wav = os.path.join(os.path.dirname(origin), "adjust{}.wav".format(x + 1))
-        start += part_len
 def mix_speech(origin: str, narratage_paths: list, start_timestamps: list):
    """将合成音频与原音频混合
@@ -372,26 +348,6 @@ def ss_and_export(video_path: str, sheet_path: str, output_dir: str, speed: floa
        if state is not None:
            state[0] = float((i + 1) / len(narratages)) * 0.97
-    # 合成总音频，并入原视频音频中
-    # 提取原音频
-    print("mix the final wav at time: ", datetime.datetime.now())
-    from split_wav import extract_audio
-    origin_wav_path = extract_audio(video_path, output_dir, 0, -1)
-    start = 0
-    n = len(start_timestamp)
-    part_len = 50
-    x = int(np.ceil(n / part_len))
-    start_timestamp = list(reversed(start_timestamp))
-    end_timestamp = list(reversed(end_timestamp))
-    narratage_paths = list(reversed(narratage_paths))
-    # 调整原音频中旁白对应位置的音量
-    adjust_volume(origin_wav_path, start_timestamp, end_timestamp)
-    print("--------------------------ok-----------------------------------")
-    # 将旁白混入原音频
-    mix_speech(adjusted_wav_path, narratage_paths, start_timestamp)
-    # print(middle_wav)
-    start += part_len
    # 删除临时语音文件、提取出来的原视频音频以及调整后的视频音频
    if os.path.exists(tmp_file):
        time.sleep(1)
@@ -403,13 +359,17 @@ def ss_and_export(video_path: str, sheet_path: str, output_dir: str, speed: floa
 if __name__ == '__main__':
-    # video_path = r'D:/Downloads/zhanlang.rmvb'
+    video_path = r'D:/Downloads/zhanlang.rmvb'
-    # sheet_path = r'D:/Downloads/战狼.xlsx'
+    sheet_path = r'D:/Downloads/zhanlang/战狼.xlsx'
-    # output_dir = r'D:/AddCaption/last_version/accessibility_movie/zhanlang'
+    output_dir = r'D:/AddCaption/last_version/accessibility_movie/zhanlang'
-    # speed = 1.25
+    speed = 1.25
-    # caption_file = './zhanlang/zhanlang.srt'
+    caption_file = os.path.join(output_dir, os.path.basename(video_path) + ".srt")
-    # speaker_name = '晓秋'
+    speaker_name = '晓秋'
-    # ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, speaker_name)
+    ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, speaker_name)
-    import pprint
+    # import pprint
-    d = read_sheet("./test37second.xlsx")
+    # d = read_sheet("./test37second.xlsx")
-    pprint.pprint(d)
+    # pprint.pprint(d)
\ No newline at end of file
+    # init_speakers()
+    # speaker_name = "晓秋"
+    # speaker = choose_speaker(speaker_name)
+    # speech_synthesis("今天我们讲解的电影是何以笙箫默，它讲述了", r"D:\AddCaption\cur_version\accessibility_movie_2\test.wav", speaker, 0.5)
\ No newline at end of file
--- a/start.py
+++ b/start.py
@@ -6,7 +6,10 @@ from PyQt5.QtGui import *;
 from PyQt5.QtWidgets import *;
 from main_window import MainWindow
+from qt_material import apply_stylesheet
+import qdarkstyle
+import os
+os.environ['PYQTGRAPH_QT_LIB'] = 'PyQt5'
 if __name__ == '__main__':
    try:
@@ -14,6 +17,8 @@ if __name__ == '__main__':
        app = QApplication(sys.argv)
        app.setWindowIcon(QIcon("./images/eagle_2.ico"))
        mainWindow = MainWindow()
+        # apply_stylesheet(app, theme='dark_amber.xml')
+        # app.setStyleSheet(qdarkstyle.load_stylesheet(qt_api=os.environ['PYQTGRAPH_QT_LIB']))
        mainWindow.show()
        sys.exit(app.exec_())