fix some bugs and fit ocr with low quality films

8142abd4 · 翟艳秋（20软） · ad04a182 · 8142abd4 · 8142abd4 · 8142abd4
Commit 8142abd4 authored Dec 01, 2022 by 翟艳秋（20软）
8 changed files
--- a/assemble_dialog.ui
+++ b/assemble_dialog.ui
@@ -13,155 +13,72 @@
  <property name="windowTitle">
   <string>Dialog</string>
  </property>
-  <widget class="QDialogButtonBox" name="buttonBox">
-   <property name="geometry">
-    <rect>
-     <x>220</x>
-     <y>250</y>
-     <width>341</width>
-     <height>32</height>
-    </rect>
-   </property>
-   <property name="toolTip">
-    <string/>
-   </property>
-   <property name="toolTipDuration">
-    <number>-1</number>
-   </property>
-   <property name="orientation">
-    <enum>Qt::Horizontal</enum>
-   </property>
-   <property name="standardButtons">
-    <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
-   </property>
-   <property name="centerButtons">
-    <bool>false</bool>
-   </property>
-  </widget>
-  <widget class="QLabel" name="label">
-   <property name="geometry">
-    <rect>
-     <x>40</x>
-     <y>40</y>
-     <width>72</width>
-     <height>15</height>
-    </rect>
-   </property>
-   <property name="text">
-    <string>视频路径:</string>
-   </property>
-  </widget>
-  <widget class="QLineEdit" name="lineEdit">
-   <property name="geometry">
-    <rect>
-     <x>130</x>
-     <y>40</y>
-     <width>231</width>
-     <height>21</height>
-    </rect>
-   </property>
-  </widget>
-  <widget class="QPushButton" name="pushButton">
-   <property name="geometry">
-    <rect>
-     <x>390</x>
-     <y>40</y>
-     <width>101</width>
-     <height>31</height>
-    </rect>
-   </property>
-   <property name="text">
-    <string>选择文件</string>
-   </property>
-  </widget>
-  <widget class="QLabel" name="label_2">
-   <property name="geometry">
-    <rect>
-     <x>20</x>
-     <y>90</y>
-     <width>111</width>
-     <height>16</height>
-    </rect>
-   </property>
-   <property name="text">
-    <string>旁白脚本表格：</string>
-   </property>
-  </widget>
-  <widget class="QLineEdit" name="lineEdit_2">
-   <property name="geometry">
-    <rect>
-     <x>130</x>
-     <y>90</y>
-     <width>231</width>
-     <height>21</height>
-    </rect>
-   </property>
-  </widget>
-  <widget class="QPushButton" name="pushButton_2">
-   <property name="geometry">
-    <rect>
-     <x>390</x>
-     <y>90</y>
-     <width>101</width>
-     <height>31</height>
-    </rect>
-   </property>
-   <property name="text">
-    <string>表格路径</string>
-   </property>
-  </widget>
-  <widget class="QLabel" name="label_3">
-   <property name="geometry">
-    <rect>
-     <x>30</x>
-     <y>140</y>
-     <width>111</width>
-     <height>16</height>
-    </rect>
-   </property>
-   <property name="text">
-    <string>旁白说话人：</string>
-   </property>
-  </widget>
-  <widget class="QLabel" name="label_4">
-   <property name="geometry">
-    <rect>
-     <x>30</x>
-     <y>200</y>
-     <width>111</width>
-     <height>16</height>
-    </rect>
-   </property>
-   <property name="text">
-    <string>旁白语速：</string>
-   </property>
-  </widget>
-  <widget class="QLineEdit" name="lineEdit_3">
-   <property name="enabled">
-    <bool>false</bool>
-   </property>
-   <property name="geometry">
-    <rect>
-     <x>130</x>
-     <y>140</y>
-     <width>231</width>
-     <height>21</height>
-    </rect>
-   </property>
-  </widget>
-  <widget class="QLineEdit" name="lineEdit_4">
-   <property name="enabled">
-    <bool>false</bool>
-   </property>
-   <property name="geometry">
-    <rect>
-     <x>130</x>
-     <y>200</y>
-     <width>231</width>
-     <height>21</height>
-    </rect>
-   </property>
-  </widget>
+  <layout class="QGridLayout" name="gridLayout">
+   <item row="0" column="2">
+    <widget class="QPushButton" name="pushButton_2">
+     <property name="text">
+      <string>表格路径</string>
+     </property>
+    </widget>
+   </item>
+   <item row="1" column="1">
+    <widget class="QLineEdit" name="lineEdit_3">
+     <property name="enabled">
+      <bool>false</bool>
+     </property>
+    </widget>
+   </item>
+   <item row="2" column="1">
+    <widget class="QLineEdit" name="lineEdit_4">
+     <property name="enabled">
+      <bool>false</bool>
+     </property>
+    </widget>
+   </item>
+   <item row="0" column="1">
+    <widget class="QLineEdit" name="lineEdit_2"/>
+   </item>
+   <item row="2" column="0">
+    <widget class="QLabel" name="label_4">
+     <property name="text">
+      <string>旁白语速：</string>
+     </property>
+    </widget>
+   </item>
+   <item row="3" column="1" alignment="Qt::AlignHCenter">
+    <widget class="QDialogButtonBox" name="buttonBox">
+     <property name="toolTip">
+      <string/>
+     </property>
+     <property name="toolTipDuration">
+      <number>-1</number>
+     </property>
+     <property name="orientation">
+      <enum>Qt::Horizontal</enum>
+     </property>
+     <property name="standardButtons">
+      <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
+     </property>
+     <property name="centerButtons">
+      <bool>false</bool>
+     </property>
+    </widget>
+   </item>
+   <item row="0" column="0">
+    <widget class="QLabel" name="label_2">
+     <property name="text">
+      <string>旁白脚本表格：</string>
+     </property>
+    </widget>
+   </item>
+   <item row="1" column="0">
+    <widget class="QLabel" name="label_3">
+     <property name="text">
+      <string>旁白说话人：</string>
+     </property>
+    </widget>
+   </item>
+  </layout>
 </widget>
 <resources/>
 <connections>

--- a/assemble_dialog_ui.py
+++ b/assemble_dialog_ui.py
 # -*- coding: utf-8 -*-

-# Form implementation generated from reading ui file 'assemble_dialog.ui'
+# Form implementation generated from reading ui file 'd:\AddCaption\cur_version\accessibility_movie_2\assemble_dialog.ui'
 #
-# Created by: PyQt5 UI code generator 5.12
+# Created by: PyQt5 UI code generator 5.15.4
 #
-# WARNING! All changes made in this file will be lost!
+# WARNING: Any manual changes made to this file will be lost when pyuic5 is
+# run again.  Do not edit this file unless you know what you are doing.
+

 from PyQt5 import QtCore, QtGui, QtWidgets

@@ -13,46 +15,48 @@ class Ui_Dialog(object):
    def setupUi(self, Dialog):
        Dialog.setObjectName("Dialog")
        Dialog.resize(585, 326)
-        self.buttonBox = QtWidgets.QDialogButtonBox(Dialog)
-        self.buttonBox.setGeometry(QtCore.QRect(220, 250, 341, 32))
-        self.buttonBox.setToolTip("")
-        self.buttonBox.setToolTipDuration(-1)
-        self.buttonBox.setOrientation(QtCore.Qt.Horizontal)
-        self.buttonBox.setStandardButtons(QtWidgets.QDialogButtonBox.Cancel|QtWidgets.QDialogButtonBox.Ok)
-        self.buttonBox.setCenterButtons(False)
-        self.buttonBox.setObjectName("buttonBox")
-        self.label = QtWidgets.QLabel(Dialog)
-        self.label.setGeometry(QtCore.QRect(40, 40, 72, 15))
-        self.label.setObjectName("label")
-        self.lineEdit = QtWidgets.QLineEdit(Dialog)
-        self.lineEdit.setGeometry(QtCore.QRect(130, 40, 231, 21))
-        self.lineEdit.setObjectName("lineEdit")
+        self.gridLayout = QtWidgets.QGridLayout(Dialog)
+        self.gridLayout.setObjectName("gridLayout")
        self.pushButton = QtWidgets.QPushButton(Dialog)
-        self.pushButton.setGeometry(QtCore.QRect(390, 40, 101, 31))
        self.pushButton.setObjectName("pushButton")
-        self.label_2 = QtWidgets.QLabel(Dialog)
-        self.label_2.setGeometry(QtCore.QRect(20, 90, 111, 16))
-        self.label_2.setObjectName("label_2")
-        self.lineEdit_2 = QtWidgets.QLineEdit(Dialog)
-        self.lineEdit_2.setGeometry(QtCore.QRect(130, 90, 231, 21))
-        self.lineEdit_2.setObjectName("lineEdit_2")
-        self.pushButton_2 = QtWidgets.QPushButton(Dialog)
-        self.pushButton_2.setGeometry(QtCore.QRect(390, 90, 101, 31))
-        self.pushButton_2.setObjectName("pushButton_2")
-        self.label_3 = QtWidgets.QLabel(Dialog)
-        self.label_3.setGeometry(QtCore.QRect(30, 140, 111, 16))
-        self.label_3.setObjectName("label_3")
+        self.gridLayout.addWidget(self.pushButton, 0, 2, 1, 1)
        self.label_4 = QtWidgets.QLabel(Dialog)
-        self.label_4.setGeometry(QtCore.QRect(30, 200, 111, 16))
        self.label_4.setObjectName("label_4")
+        self.gridLayout.addWidget(self.label_4, 3, 0, 1, 1)
+        self.label_3 = QtWidgets.QLabel(Dialog)
+        self.label_3.setObjectName("label_3")
+        self.gridLayout.addWidget(self.label_3, 2, 0, 1, 1)
        self.lineEdit_3 = QtWidgets.QLineEdit(Dialog)
        self.lineEdit_3.setEnabled(False)
-        self.lineEdit_3.setGeometry(QtCore.QRect(130, 140, 231, 21))
        self.lineEdit_3.setObjectName("lineEdit_3")
+        self.gridLayout.addWidget(self.lineEdit_3, 2, 1, 1, 1)
        self.lineEdit_4 = QtWidgets.QLineEdit(Dialog)
        self.lineEdit_4.setEnabled(False)
-        self.lineEdit_4.setGeometry(QtCore.QRect(130, 200, 231, 21))
        self.lineEdit_4.setObjectName("lineEdit_4")
+        self.gridLayout.addWidget(self.lineEdit_4, 3, 1, 1, 1)
+        self.lineEdit = QtWidgets.QLineEdit(Dialog)
+        self.lineEdit.setObjectName("lineEdit")
+        self.gridLayout.addWidget(self.lineEdit, 0, 1, 1, 1)
+        self.label_2 = QtWidgets.QLabel(Dialog)
+        self.label_2.setObjectName("label_2")
+        self.gridLayout.addWidget(self.label_2, 1, 0, 1, 1)
+        self.lineEdit_2 = QtWidgets.QLineEdit(Dialog)
+        self.lineEdit_2.setObjectName("lineEdit_2")
+        self.gridLayout.addWidget(self.lineEdit_2, 1, 1, 1, 1)
+        self.label = QtWidgets.QLabel(Dialog)
+        self.label.setObjectName("label")
+        self.gridLayout.addWidget(self.label, 0, 0, 1, 1)
+        self.pushButton_2 = QtWidgets.QPushButton(Dialog)
+        self.pushButton_2.setObjectName("pushButton_2")
+        self.gridLayout.addWidget(self.pushButton_2, 1, 2, 1, 1)
+        self.buttonBox = QtWidgets.QDialogButtonBox(Dialog)
+        self.buttonBox.setToolTip("")
+        self.buttonBox.setToolTipDuration(-1)
+        self.buttonBox.setOrientation(QtCore.Qt.Horizontal)
+        self.buttonBox.setStandardButtons(QtWidgets.QDialogButtonBox.Cancel|QtWidgets.QDialogButtonBox.Ok)
+        self.buttonBox.setCenterButtons(False)
+        self.buttonBox.setObjectName("buttonBox")
+        self.gridLayout.addWidget(self.buttonBox, 4, 1, 1, 1, QtCore.Qt.AlignHCenter)

        self.retranslateUi(Dialog)
        self.buttonBox.accepted.connect(Dialog.accept)
@@ -62,9 +66,9 @@ class Ui_Dialog(object):
    def retranslateUi(self, Dialog):
        _translate = QtCore.QCoreApplication.translate
        Dialog.setWindowTitle(_translate("Dialog", "Dialog"))
-        self.label.setText(_translate("Dialog", "视频路径:"))
        self.pushButton.setText(_translate("Dialog", "选择文件"))
+        self.label_4.setText(_translate("Dialog", "旁白语速："))
+        self.label_3.setText(_translate("Dialog", "旁白说话人："))
        self.label_2.setText(_translate("Dialog", "旁白脚本表格："))
+        self.label.setText(_translate("Dialog", "视频路径:"))
        self.pushButton_2.setText(_translate("Dialog", "表格路径"))
-        self.label_3.setText(_translate("Dialog", "旁白说话人："))
-        self.label_4.setText(_translate("Dialog", "旁白语速："))
\ No newline at end of file
--- a/detect_with_ocr.py
+++ b/detect_with_ocr.py
-"""基于OCR的旁白区间检测算法
-
-总体流程如下：
-    - 首先检测得到字幕的上下边界位置，框定大概的字幕检测范围；
-    - 视频每秒取3帧，并使用OCR获取字幕以及时间戳等信息；
-    - 根据字幕间的时间差判断能否插入旁白以及旁白的推荐字数。
-
-外部调用方式：
-
-.. code-block:: python
-
-
-
-    from detect_with_ocr import detect_with_ocr
-    detect_with_ocr(video_path, book_path, start_time, end_time, state)
-"""
-import os
-
-import cv2
-import numpy as np
-from paddleocr import PaddleOCR
-import sys
-print("PaddleOCR load path:", os.path.abspath(sys.modules[PaddleOCR.__module__].__file__))
-import difflib
-import re
-
-from typing import Tuple, Union
-
-from detect_with_asr import create_sheet, write_to_sheet
-from main_window import MainWindow, Element
-# 字幕的上下边界
-up_b, down_b = 0, 0
-
-# 初始化ocr工具
-ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False)
-
-# 正常语速为4字/秒
-normal_speed = 4
-
-
-def get_position(video_path: str, start_time: float) -> Tuple[float, float]:
-    # return (885.0, 989.0)
-    """根据对视频中的画面进行分析，确定字幕的位置，以便后续的字幕识别
-
-    Args:
-        video_path (str): 视频存储路径
-        start_time (float): 视频开始时间
-
-    Returns:
-        Tuple[float, float]: 字幕在整个画面中的上下边界位置
-    """
-    print("video_path:", video_path)
-    video = cv2.VideoCapture(video_path)
-    # print("video:", video)
-    subtitle_position = {}
-    fps = video.get(cv2.CAP_PROP_FPS)
-    start = int(start_time * fps)
-    cnt = 0
-    txt_cnt = 0
-    pre_txt = None
-    video.set(cv2.CAP_PROP_POS_FRAMES, start)
-    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
-    while True:
-        _, img = video.read()
-        # print("img:", img)
-        # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        # cv2.imshow('img', gray)
-        # cv2.imshow(img)
-        cnt += 1
-        if img is None or cnt > 10000:
-            break
-        if cnt % int(fps / 3) != 0:
-            continue
-        img = img[height:]
-        res = ocr.ocr(img, cls=True)
-        sorted(res, key=lambda text: text[0][0][1])
-        bottom_position = None
-        if len(res) == 0:
-            continue
-        log = []
-
-        print("cnt:", cnt, "rect_num:", len(res))
-        for x in res:
-            # print("x:", x)
-            rect, (txt, confidence) = x
-            # font_size = rect[2][1] - rect[0][1]
-            mid = (rect[0][0] + rect[1][0]) / 2
-            gradient = np.arctan(abs((rect[1][1] - rect[0][1]) / (rect[1][0] - rect[0][0])))
-            # 可能是字幕的文本
-            if confidence > 0.9 and 0.4 * img.shape[1] < mid < 0.6 * img.shape[1] and gradient < 0.1:
-                if bottom_position is None:
-                    bottom_position = rect[0][1]
-                # 判断是否与前一文本相同（是不是同一个字幕），非同一字幕的前提下，取对应上下边界，
-                keys = subtitle_position.keys()
-                if abs(rect[0][1] - bottom_position) < 10:
-                    if pre_txt is None or pre_txt != txt:
-                        txt_cnt += 1
-                        pre_txt = txt
-                        if (rect[0][0], rect[2][1]) in keys:
-                            subtitle_position[(rect[0][1], rect[2][1])] += 1
-                        else:
-                            replace = False
-                            for k in keys:
-                                # 更新键值为最宽的上下限
-                                if abs(rect[0][1] - k[0]) + abs(rect[2][1] - k[1]) < 10:
-                                    new_k = min(k[0], rect[0][1]), max(k[1], rect[2][1])
-                                    if new_k != k:
-                                        subtitle_position[new_k] = subtitle_position[k]
-                                        subtitle_position[new_k] += 1
-                                        subtitle_position.pop(k)
-                                    else:
-                                        subtitle_position[k] += 1
-                                    replace = True
-                                    break
-                            if not replace:
-                                subtitle_position[(rect[0][1], rect[2][1])] = 1
-        if txt_cnt == 3:
-            break
-    print(subtitle_position)
-    up_bounding, down_bounding = max(subtitle_position, key=subtitle_position.get)
-    return up_bounding + height, down_bounding + height
-
-
-def erasePunc(txt: str) -> str:
-    """去除字符串中的非中文字符
-
-    Args:
-        txt (str): 待处理字符串
-
-    Returns:
-        str: 处理后的字符串
-    """
-    pattern = re.compile(r'[^\u4e00-\u9fa5]')
-    txt = re.sub(pattern, '', txt)
-    return txt
-
-
-def string_similar(s1: str, s2: str) -> float:
-    """比较字符串s1和s2的相似度，主要用于减少输出文件中相似字幕的重复
-
-    Args:
-        s1 (str): 第一个字符串
-        s2 (str): 第二个字符串
-
-    Returns:
-        float: 字符串间的相似度
-    """
-    # 去除非中文字符后，再比较相似度
-    s1 = erasePunc(s1)
-    s2 = erasePunc(s2)
-    return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
-
-
-def normalize(text: str) -> str:
-    """规范化处理文本中的一些标点符号
-
-    Args:
-        text (str): 待处理字符串
-
-    Returns:
-        str: 处理后的字符串
-    """
-    # 将英文标点转换为中文标点
-    E_pun = u',.!?()[]:;'
-    C_pun = u'，。！？（）【】：；'
-    table = {ord(f): ord(t) for f, t in zip(E_pun, C_pun)}
-    text = text.translate(table)
-    text = text.strip(' ，。、【】_·：-@‘［；')
-    # 促成首尾匹配的（）
-    if text[-1] == '）' and text[0] != '（':
-        text = '（' + text
-    elif text[-1] != '）' and text[0] == '（':
-        text = text + '）'
-    return text
-
-
-def detect_subtitle(img: np.ndarray) -> Union[str, None]:
-    """检测当前画面得到字幕信息
-
-    Args:
-        img (np.ndarray): 当前画面
-
-    Returns:
-        Union[str, None]: 字幕信息(没有字幕时返回None)
-    """
-    subTitle = ''
-    height = down_b - up_b
-    img = img[int(up_b - height * 0.7):int(down_b + height * 0.7)]
-    # img = cv2.resize(img, (int(img.shape[1] * 0.5), int(img.shape[0] * 0.5)))
-    res = ocr.ocr(img, cls=True)
-    sorted(res, key=lambda text: text[0][0][1])
-    if len(res) == 0:
-        return None
-    possible_txt = []
-    for x in res:
-        # cv2.imshow("cut", img)
-        # cv2.waitKey(0)
-        # cv2.destroyAllWindows()
-        rect, (txt, confidence) = x
-        font_size = rect[2][1] - rect[0][1]
-        mid = (rect[0][0] + rect[1][0]) / 2
-        gradient = np.arctan(abs((rect[1][1] - rect[0][1]) / (rect[1][0] - rect[0][0])))
-        # log.append("文本：{}，置信度：{}，中心点：{}，斜率：{}，字体大小：{}".format(txt, confidence, mid / img.shape[1], gradient,
-        # font_size)) 置信度>0.7 & 斜率<0.1 & 字幕偏移量<=25 & 字幕中心在画面宽的0.4-0.6之间
-        if confidence > 0.7 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6 and \
-                abs(rect[0][1] - 30) + abs(img.shape[0] - rect[2][1] - 30) <= 25:
-            subTitle += txt
-        # 如果字幕在一行中分为两个（或以上）对话文本
-        elif confidence > 0.85 and gradient < 0.1:
-            if 0.3 < mid / img.shape[1] < 0.4 or 0.6 < mid / img.shape[1] < 0.7:
-                possible_txt.append(txt)
-    if len(possible_txt) >= 2:
-        subTitle = ''.join(possible_txt)
-    if len(subTitle) > 0:
-        return subTitle
-    return None
-
-
-def process_video(video_path: str, begin: float, end: float, book_path: str, sheet_name: str, state=None, mainWindow: MainWindow=None):
-    """处理视频，主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
-
-    Args:
-        video_path (str): 待处理视频的路径
-        begin (float): 电影的实际开始位置（秒）
-        end (float): 电影除演职表外的实际结束位置（秒）
-        book_path (str): 输出表格地址
-        sheet_name (str): 输出表格中的表名
-        state (optional): 用于通信的状态关键字. Defaults to None.
-    """
-    global normal_speed
-    if mainWindow.projectContext.speaker_speed is not None:
-        normal_speed = float(mainWindow.projectContext.speaker_speed.split('(')[1].split('字')[0])
-    sz = len(mainWindow.projectContext.all_elements)
-    if sz == 0:
-        last_time = begin
-    else:
-        last_time = float(mainWindow.projectContext.all_elements[sz - 1].ed_time_sec) + 0.01
-    print("当前使用的语速为", normal_speed)
-    if state is None:
-        state = [None]
-    video = cv2.VideoCapture(video_path)
-    fps = video.get(cv2.CAP_PROP_FPS)
-    lastSubTitle = None
-    # res是在视频遍历过程中获取的字幕文件，不掺杂对旁白的分析
-    res = []
-    cnt = 0
-    start_time = 0
-    end_time = 0
-    video.set(cv2.CAP_PROP_POS_MSEC, begin * 1000)
-    pre_state = state[0]
-    while True:
-        _, frame = video.read()
-        if frame is None:
-            break
-        cnt += 1
-        cur_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
-        # 判断当前帧是否已超限制
-        if cur_time > end:
-            if cur_time - end_time > 1:
-                print('--------------------------------------------------')
-                recommend_lens = int((cur_time - end_time) * normal_speed)
-                # write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
-                add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
-
-                # 判断当前是否有字幕需要被保存下来
-            if end_time < start_time:
-                # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end, 2), lastSubTitle, ''])
-                add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end, 3), lastSubTitle, ''])
-            break
-        # 每秒取4帧画面左右
-        if cnt % int(fps / 4) == 0:
-            # 更新当前工程的检测进度
-            if pre_state is None:
-                state[0] = float(cur_time/ end)
-            else:
-                state[0] = min(0.9999, float(cur_time / end))
-            mainWindow.projectContext.nd_process = state[0]
-            mainWindow.projectContext.last_time = cur_time
-
-            subTitle = detect_subtitle(frame)
-            if subTitle is not None:
-                subTitle = normalize(subTitle)
-            # 第一次找到字幕
-            if lastSubTitle is None and subTitle is not None:
-                start_time = cur_time
-            # 字幕消失
-            elif lastSubTitle is not None and subTitle is None:
-                end_time = cur_time
-                res.append([start_time, end_time, lastSubTitle])
-                if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
-                    print('--------------------------------------------------')
-                    recommend_lens = int((res[-1][0] - last_time) * normal_speed) if len(res) == 1 else int(
-                        (res[-1][0] - res[-2][1]) * normal_speed)
-                    # write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
-                    add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
-                print(start_time, end_time, lastSubTitle)
-
-                # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
-                add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''])
-            elif lastSubTitle is not None and subTitle is not None:
-                # 两句话连在一起，但是两句话不一样
-                if string_similar(lastSubTitle, subTitle) < 0.7:
-                    end_time = cur_time
-                    res.append([start_time, end_time, lastSubTitle])
-                    if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
-                        print('--------------------------------------------------')
-                        recommend_lens = int((res[-1][0] - last_time) * normal_speed) if len(res) == 1 else int(
-                            (res[-1][0] - res[-2][1]) * normal_speed)
-                        # write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
-                        add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
-                    print(start_time, end_time, lastSubTitle)
-                    # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
-                    add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''])
-                    start_time = end_time
-                else:
-                    lastSubTitle = subTitle if len(subTitle) > len(lastSubTitle) else lastSubTitle
-                    continue
-            # 当前字幕与上一段字幕不一样
-            lastSubTitle = subTitle
-
-def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
-
-    st_time_sec, ed_time_sec, subtitle, suggest = li
-    st_time_sec, ed_time_sec = str(st_time_sec), str(ed_time_sec)
-    # 默认使用配置文件中的语速
-    speed = mainWindow.projectContext.speaker_speed
-    aside = ""
-    i = len(mainWindow.projectContext.all_elements)
-    if element_type == "字幕":
-        mainWindow.projectContext.subtitle_list.append(Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed))
-        mainWindow.projectContext.all_elements.append(mainWindow.projectContext.subtitle_list[-1])
-    else:
-        if i == 0:
-            st_time_sec = "0.01"
-        else:
-            st_time_sec = "%.2f" % (float(mainWindow.projectContext.all_elements[i - 1].ed_time_sec) + 0.01)
-
-        # 因为暂时没有用到ed_time_sec，所以直接赋值空吧
-        ed_time_sec = ""
-
-        mainWindow.projectContext.aside_list.append(Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed))
-        mainWindow.projectContext.all_elements.append(mainWindow.projectContext.aside_list[-1])
-
-
-def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time: float, state=None, mainWindow: MainWindow=None):
-    """使用ocr检测视频获取字幕并输出旁白推荐
-
-    Args:
-        video_path (str): 待处理视频地址
-        book_path (str): 表格存储位置
-        start_time (float): 视频实际开始时间
-        end_time (float): 视频实际结束时间
-        state (optional): 用于通信的状态关键字. Defaults to None.
-    """
-    print("start and end time:", start_time, end_time)
-    if state is None:
-        state = [None]
-    # if os.path.exists(book_path):
-    #     os.remove(book_path)
-    book_name_xlsx = book_path
-    sheet_name_xlsx = "旁白插入位置建议"
-
-    context = mainWindow.projectContext
-    # 获取字幕在画面中的上下边界，方便在后续视频遍历过程中直接对字幕对应区域进行分析
-    global up_b, down_b
-    if context.detected:
-        up_b, down_b = context.caption_boundings[0], context.caption_boundings[1]
-    else:
-        # 此处start_time + 300是为了节省用户调整视频开始时间的功夫（强行跳过前5分钟）
-        up_b, down_b = get_position(video_path, start_time +300)
-        context.caption_boundings = [up_b, down_b]
-
-    context.detected = True
-    # 获取并构建输出信息
-    # table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']]
-    # print("create sheet at time: ", datetime.datetime.now())
-    # create_sheet(book_name_xlsx, sheet_name_xlsx, table_head)
-    # print("process the total video at time: ", datetime.datetime.now())
-    process_video(video_path, start_time, end_time, book_name_xlsx, sheet_name_xlsx, state, mainWindow)
-
-
-if __name__ == '__main__':
-    path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
-    print("get_pos:", get_position(path, 0))
+"""基于OCR的旁白区间检测算法
+
+总体流程如下：
+    - 首先检测得到字幕的上下边界位置，框定大概的字幕检测范围；
+    - 视频每秒取3帧，并使用OCR获取字幕以及时间戳等信息；
+    - 根据字幕间的时间差判断能否插入旁白以及旁白的推荐字数。
+
+外部调用方式：
+
+.. code-block:: python
+
+
+
+    from detect_with_ocr import detect_with_ocr
+    detect_with_ocr(video_path, book_path, start_time, end_time, state)
+"""
+import os
+
+import cv2
+import numpy as np
+from paddleocr import PaddleOCR
+import sys
+print("PaddleOCR load path:", os.path.abspath(sys.modules[PaddleOCR.__module__].__file__))
+import difflib
+import re
+
+from typing import Tuple, Union
+
+from detect_with_asr import create_sheet, write_to_sheet
+from main_window import MainWindow, Element
+# 字幕的上下边界
+up_b, down_b = 0, 0
+
+# 初始化ocr工具
+ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False)
+
+# 正常语速为4字/秒
+normal_speed = 4
+
+
+def get_position(video_path: str, start_time: float) -> Tuple[float, float]:
+    # return (885.0, 989.0)
+    """根据对视频中的画面进行分析，确定字幕的位置，以便后续的字幕识别
+
+    Args:
+        video_path (str): 视频存储路径
+        start_time (float): 视频开始时间
+
+    Returns:
+        Tuple[float, float]: 字幕在整个画面中的上下边界位置
+    """
+    print("video_path:", video_path)
+    video = cv2.VideoCapture(video_path)
+    # print("video:", video)
+    subtitle_position = {}
+    fps = video.get(cv2.CAP_PROP_FPS)
+    start = int(start_time * fps)
+    cnt = 0
+    txt_cnt = 0
+    pre_txt = None
+    video.set(cv2.CAP_PROP_POS_FRAMES, start)
+    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
+    while True:
+        _, img = video.read()
+        # print("img:", img)
+        # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        # cv2.imshow('img', gray)
+        # cv2.imshow(img)
+        cnt += 1
+        if img is None or cnt > 10000:
+            break
+        if cnt % int(fps / 3) != 0:
+            continue
+        img = img[height:]
+        res = ocr.ocr(img, cls=True)
+        sorted(res, key=lambda text: text[0][0][1])
+        bottom_position = None
+        if len(res) == 0:
+            continue
+        log = []
+
+        print("cnt:", cnt, "rect_num:", len(res))
+        for x in res:
+            # print("x:", x)
+            rect, (txt, confidence) = x
+            # font_size = rect[2][1] - rect[0][1]
+            mid = (rect[0][0] + rect[1][0]) / 2
+            gradient = np.arctan(abs((rect[1][1] - rect[0][1]) / (rect[1][0] - rect[0][0])))
+            # 可能是字幕的文本
+            if confidence > 0.9 and 0.4 * img.shape[1] < mid < 0.6 * img.shape[1] and gradient < 0.1:
+                if bottom_position is None:
+                    bottom_position = rect[0][1]
+                # 判断是否与前一文本相同（是不是同一个字幕），非同一字幕的前提下，取对应上下边界，
+                keys = subtitle_position.keys()
+                if abs(rect[0][1] - bottom_position) < 10:
+                    if pre_txt is None or pre_txt != txt:
+                        txt_cnt += 1
+                        pre_txt = txt
+                        if (rect[0][0], rect[2][1]) in keys:
+                            subtitle_position[(rect[0][1], rect[2][1])] += 1
+                        else:
+                            replace = False
+                            for k in keys:
+                                # 更新键值为最宽的上下限
+                                if abs(rect[0][1] - k[0]) + abs(rect[2][1] - k[1]) < 10:
+                                    new_k = min(k[0], rect[0][1]), max(k[1], rect[2][1])
+                                    if new_k != k:
+                                        subtitle_position[new_k] = subtitle_position[k]
+                                        subtitle_position[new_k] += 1
+                                        subtitle_position.pop(k)
+                                    else:
+                                        subtitle_position[k] += 1
+                                    replace = True
+                                    break
+                            if not replace:
+                                subtitle_position[(rect[0][1], rect[2][1])] = 1
+        if txt_cnt == 3:
+            break
+    print(subtitle_position)
+    up_bounding, down_bounding = max(subtitle_position, key=subtitle_position.get)
+    return up_bounding + height, down_bounding + height
+
+
+def erasePunc(txt: str) -> str:
+    """去除字符串中的非中文字符
+
+    Args:
+        txt (str): 待处理字符串
+
+    Returns:
+        str: 处理后的字符串
+    """
+    pattern = re.compile(r'[^\u4e00-\u9fa5]')
+    txt = re.sub(pattern, '', txt)
+    return txt
+
+
+def string_similar(s1: str, s2: str) -> float:
+    """比较字符串s1和s2的相似度，主要用于减少输出文件中相似字幕的重复
+
+    Args:
+        s1 (str): 第一个字符串
+        s2 (str): 第二个字符串
+
+    Returns:
+        float: 字符串间的相似度
+    """
+    # 去除非中文字符后，再比较相似度
+    s1 = erasePunc(s1)
+    s2 = erasePunc(s2)
+    return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
+
+
+def normalize(text: str) -> str:
+    """规范化处理文本中的一些标点符号
+
+    Args:
+        text (str): 待处理字符串
+
+    Returns:
+        str: 处理后的字符串
+    """
+    # 将英文标点转换为中文标点
+    E_pun = u',.!?()[]:;'
+    C_pun = u'，。！？（）【】：；'
+    table = {ord(f): ord(t) for f, t in zip(E_pun, C_pun)}
+    text = text.translate(table)
+    text = text.strip(' ，。、【】_·：-@‘［；')
+    # 促成首尾匹配的（）
+    if text[-1] == '）' and text[0] != '（':
+        text = '（' + text
+    elif text[-1] != '）' and text[0] == '（':
+        text = text + '）'
+    return text
+
+
+def detect_subtitle(img: np.ndarray) -> Union[str, None]:
+    """检测当前画面得到字幕信息
+
+    Args:
+        img (np.ndarray): 当前画面
+
+    Returns:
+        Union[str, None]: 字幕信息(没有字幕时返回None)
+    """
+    subTitle = ''
+    height = down_b - up_b
+    img = img[int(up_b - height * 0.7):int(down_b + height * 0.7)]
+    # 针对低帧率的视频做图像放大处理
+    if img.shape[1] < 1000:
+        img = cv2.resize(img, (int(img.shape[1] * 1.5), int(img.shape[0] * 1.5)))
+    res = ocr.ocr(img, cls=True)
+    sorted(res, key=lambda text: text[0][0][1])
+    if len(res) == 0:
+        return None
+    possible_txt = []
+    for x in res:
+        # cv2.imshow("cut", img)
+        # cv2.waitKey(0)
+        # cv2.destroyAllWindows()
+        rect, (txt, confidence) = x
+        font_size = rect[2][1] - rect[0][1]
+        mid = (rect[0][0] + rect[1][0]) / 2
+        gradient = np.arctan(abs((rect[1][1] - rect[0][1]) / (rect[1][0] - rect[0][0])))
+        # log.append("文本：{}，置信度：{}，中心点：{}，斜率：{}，字体大小：{}".format(txt, confidence, mid / img.shape[1], gradient,
+        # font_size)) 置信度>0.7 & 斜率<0.1 & 字幕偏移量<=25 & 字幕中心在画面宽的0.4-0.6之间
+        if confidence > 0.7 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6 and \
+                abs(rect[0][1] - 30) + abs(img.shape[0] - rect[2][1] - 30) <= 25:
+            subTitle += txt
+        # 如果字幕在一行中分为两个（或以上）对话文本
+        elif confidence > 0.85 and gradient < 0.1:
+            if 0.3 < mid / img.shape[1] < 0.4 or 0.6 < mid / img.shape[1] < 0.7:
+                possible_txt.append(txt)
+    if len(possible_txt) >= 2:
+        subTitle = ''.join(possible_txt)
+    if len(subTitle) > 0:
+        return subTitle
+    return None
+
+
+def process_video(video_path: str, begin: float, end: float, book_path: str, sheet_name: str, state=None, mainWindow: MainWindow=None):
+    """处理视频，主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
+
+    Args:
+        video_path (str): 待处理视频的路径
+        begin (float): 电影的实际开始位置（秒）
+        end (float): 电影除演职表外的实际结束位置（秒）
+        book_path (str): 输出表格地址
+        sheet_name (str): 输出表格中的表名
+        state (optional): 用于通信的状态关键字. Defaults to None.
+    """
+    global normal_speed
+    if mainWindow.projectContext.speaker_speed is not None:
+        normal_speed = float(mainWindow.projectContext.speaker_speed.split('(')[1].split('字')[0])
+    sz = len(mainWindow.projectContext.all_elements)
+    if sz == 0:
+        last_time = begin
+    else:
+        last_time = float(mainWindow.projectContext.all_elements[sz - 1].ed_time_sec) + 0.01
+    print("当前使用的语速为", normal_speed)
+    if state is None:
+        state = [None]
+    video = cv2.VideoCapture(video_path)
+    fps = video.get(cv2.CAP_PROP_FPS)
+    lastSubTitle = None
+    # res是在视频遍历过程中获取的字幕文件，不掺杂对旁白的分析
+    res = []
+    cnt = 0
+    start_time = 0
+    end_time = 0
+    video.set(cv2.CAP_PROP_POS_MSEC, begin * 1000)
+    pre_state = state[0]
+    while True:
+        _, frame = video.read()
+        if frame is None:
+            break
+        cnt += 1
+        cur_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
+        # 判断当前帧是否已超限制
+        if cur_time > end:
+            if cur_time - end_time > 1:
+                print('--------------------------------------------------')
+                recommend_lens = int((cur_time - end_time) * normal_speed)
+                # write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
+                add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
+
+                # 判断当前是否有字幕需要被保存下来
+            if end_time < start_time:
+                # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end, 2), lastSubTitle, ''])
+                add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end, 3), lastSubTitle, ''])
+            break
+        # 每秒取4帧画面左右
+        if cnt % int(fps / 4) == 0:
+            # 更新当前工程的检测进度
+            if pre_state is None:
+                state[0] = float(cur_time/ end)
+            else:
+                state[0] = min(0.9999, float(cur_time / end))
+            mainWindow.projectContext.nd_process = state[0]
+            mainWindow.projectContext.last_time = cur_time
+
+            subTitle = detect_subtitle(frame)
+            if subTitle is not None:
+                subTitle = normalize(subTitle)
+            # 第一次找到字幕
+            if lastSubTitle is None and subTitle is not None:
+                start_time = cur_time
+            # 字幕消失
+            elif lastSubTitle is not None and subTitle is None:
+                end_time = cur_time
+                res.append([start_time, end_time, lastSubTitle])
+                if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
+                    print('--------------------------------------------------')
+                    recommend_lens = int((res[-1][0] - last_time) * normal_speed) if len(res) == 1 else int(
+                        (res[-1][0] - res[-2][1]) * normal_speed)
+                    # write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
+                    add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
+                print(start_time, end_time, lastSubTitle)
+
+                # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
+                add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''])
+            elif lastSubTitle is not None and subTitle is not None:
+                # 两句话连在一起，但是两句话不一样
+                if string_similar(lastSubTitle, subTitle) < 0.7:
+                    end_time = cur_time
+                    res.append([start_time, end_time, lastSubTitle])
+                    if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
+                        print('--------------------------------------------------')
+                        recommend_lens = int((res[-1][0] - last_time) * normal_speed) if len(res) == 1 else int(
+                            (res[-1][0] - res[-2][1]) * normal_speed)
+                        # write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
+                        add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
+                    print(start_time, end_time, lastSubTitle)
+                    # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
+                    add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''])
+                    start_time = end_time
+                else:
+                    lastSubTitle = subTitle if len(subTitle) > len(lastSubTitle) else lastSubTitle
+                    continue
+            # 当前字幕与上一段字幕不一样
+            lastSubTitle = subTitle
+
+def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
+
+    st_time_sec, ed_time_sec, subtitle, suggest = li
+    st_time_sec, ed_time_sec = str(st_time_sec), str(ed_time_sec)
+    # 默认使用配置文件中的语速
+    speed = mainWindow.projectContext.speaker_speed
+    aside = ""
+    i = len(mainWindow.projectContext.all_elements)
+    if element_type == "字幕":
+        mainWindow.projectContext.subtitle_list.append(Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed))
+        mainWindow.projectContext.all_elements.append(mainWindow.projectContext.subtitle_list[-1])
+    else:
+        if i == 0:
+            st_time_sec = "0.01"
+        else:
+            st_time_sec = "%.2f" % (float(mainWindow.projectContext.all_elements[i - 1].ed_time_sec) + 0.01)
+
+        # 因为暂时没有用到ed_time_sec，所以直接赋值空吧
+        ed_time_sec = ""
+
+        mainWindow.projectContext.aside_list.append(Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed))
+        mainWindow.projectContext.all_elements.append(mainWindow.projectContext.aside_list[-1])
+
+
+def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time: float, state=None, mainWindow: MainWindow=None):
+    """使用ocr检测视频获取字幕并输出旁白推荐
+
+    Args:
+        video_path (str): 待处理视频地址
+        book_path (str): 表格存储位置
+        start_time (float): 视频实际开始时间
+        end_time (float): 视频实际结束时间
+        state (optional): 用于通信的状态关键字. Defaults to None.
+    """
+    print("start and end time:", start_time, end_time)
+    if state is None:
+        state = [None]
+    # if os.path.exists(book_path):
+    #     os.remove(book_path)
+    book_name_xlsx = book_path
+    sheet_name_xlsx = "旁白插入位置建议"
+
+    context = mainWindow.projectContext
+    # 获取字幕在画面中的上下边界，方便在后续视频遍历过程中直接对字幕对应区域进行分析
+    global up_b, down_b
+    if context.detected:
+        up_b, down_b = context.caption_boundings[0], context.caption_boundings[1]
+    else:
+        # 此处start_time + 300是为了节省用户调整视频开始时间的功夫（强行跳过前5分钟）
+        up_b, down_b = get_position(video_path, start_time +300)
+        context.caption_boundings = [up_b, down_b]
+
+    context.detected = True
+    # 获取并构建输出信息
+    # table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']]
+    # print("create sheet at time: ", datetime.datetime.now())
+    # create_sheet(book_name_xlsx, sheet_name_xlsx, table_head)
+    # print("process the total video at time: ", datetime.datetime.now())
+    process_video(video_path, start_time, end_time, book_name_xlsx, sheet_name_xlsx, state, mainWindow)
+
+
+if __name__ == '__main__':
+    path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
+    print("get_pos:", get_position(path, 0))
--- a/main_window.py
+++ b/main_window.py
--- a/main_window_ui.py
+++ b/main_window_ui.py
@@ -21,6 +21,7 @@ class Ui_MainWindow(object):
 "    padding: 5px;\n"
 "}")
        self.centralwidget = QtWidgets.QWidget(MainWindow)
+        # self.centralwidget.setStyleSheet("font: 9pt \"黑体\";")
        self.centralwidget.setObjectName("centralwidget")
        self.horizontalLayout = QtWidgets.QHBoxLayout(self.centralwidget)
        self.horizontalLayout.setObjectName("horizontalLayout")
@@ -206,6 +207,7 @@ class Ui_MainWindow(object):
        self.horizontalLayout_4 = QtWidgets.QHBoxLayout(self.all_tab)
        self.horizontalLayout_4.setObjectName("horizontalLayout_4")
        self.all_tableWidget = QtWidgets.QTableWidget(self.all_tab)
+        # self.all_tableWidget.setStyleSheet("font: 9pt \"微软雅黑\";")
        self.all_tableWidget.setObjectName("all_tableWidget")
        self.all_tableWidget.setColumnCount(0)
        self.all_tableWidget.setRowCount(0)
@@ -240,6 +242,7 @@ class Ui_MainWindow(object):
        self.shuiping.setStretch(1, 5)
        self.verticalLayout.addLayout(self.shuiping)
        self.verticalWidget_2 = QtWidgets.QWidget(self.centralwidget)
+        # self.verticalWidget_2.setStyleSheet("font: 9pt \"微软雅黑\";")
        self.verticalWidget_2.setObjectName("verticalWidget_2")
        self.chuizhi = QtWidgets.QVBoxLayout(self.verticalWidget_2)
        self.chuizhi.setObjectName("chuizhi")
@@ -330,18 +333,26 @@ class Ui_MainWindow(object):
        self.statusbar.setObjectName("statusbar")
        MainWindow.setStatusBar(self.statusbar)
        self.setting = QtWidgets.QAction(MainWindow)
+        # font = QtGui.QFont()
+        # font.setFamily("微软雅黑")
+        # self.setting.setFont(font)
        self.setting.setObjectName("setting")
        self.action_open_project = QtWidgets.QAction(MainWindow)
+        # self.action_open_project.setFont(font)
        self.action_open_project.setObjectName("action_open_project")
        self.import_movie = QtWidgets.QAction(MainWindow)
+        # self.import_movie.setFont(font)
        self.import_movie.setObjectName("import_movie")
        self.actions = QtWidgets.QAction(MainWindow)
        self.actions.setObjectName("actions")
        self.action_save = QtWidgets.QAction(MainWindow)
+        # self.action_save.setFont(font)
        self.action_save.setObjectName("action_save")
        self.action_undo = QtWidgets.QAction(MainWindow)
+        # self.action_undo.setFont(font)
        self.action_undo.setObjectName("action_undo")
        self.action_redo = QtWidgets.QAction(MainWindow)
+        # self.action_redo.setFont(font)
        self.action_redo.setObjectName("action_redo")
        self.action_3 = QtWidgets.QAction(MainWindow)
        self.action_3.setObjectName("action_3")
@@ -352,10 +363,12 @@ class Ui_MainWindow(object):
        self.action_operate = QtWidgets.QAction(MainWindow)
        self.action_operate.setObjectName("action_operate")
        self.action_export = QtWidgets.QAction(MainWindow)
+        # self.action_export.setFont(font)
        self.action_export.setObjectName("action_export")
        self.action_insert_aside_from_now = QtWidgets.QAction(MainWindow)
        self.action_insert_aside_from_now.setObjectName("action_insert_aside_from_now")
        self.action_create = QtWidgets.QAction(MainWindow)
+        # self.action_create.setFont(font)
        self.action_create.setObjectName("action_create")
        self.menu.addAction(self.action_create)
        self.menu.addAction(self.action_open_project)

--- a/management.py
+++ b/management.py
@@ -174,6 +174,7 @@ class ProjectContext:
            return 
        with open(self.conf_path, 'r', encoding='utf8') as f:
            info = json.load(f)
+            # print(json.dumps(info, ensure_ascii=False, indent=4))
            self.video_path = info["video_path"]
            self.excel_path = info["excel_path"]
            self.speaker_info = info["speaker_info"]["speaker_id"]
@@ -207,8 +208,7 @@ class ProjectContext:
                }
            }
            f.write(json.dumps(info))
-
-
+ 
    def setVideoPath(self, video_path):
        self.video_path = video_path


--- a/operation_dialog.py
+++ b/operation_dialog.py
@@ -141,7 +141,7 @@ class Operation_Dialog(QDialog, Ui_Dialog):
                    assert start_time_f < end_time_f
                    if row_number < rowCount:
                        # 要求结束时间在下一行的起始时间前面，对增加和修改都适用
-                        assert float(self.mainWindow.projectContext.all_elements[list_idx+1].st_time_sec) > end_time_f
+                        assert float(self.mainWindow.projectContext.all_elements[list_idx+1].st_time_sec) >= end_time_f

                # 要求起始时间在上一行后面，在下一行前面
                if self.comboBox_2.currentText() == "增加一行":

--- a/operation_dialog_ui.py
+++ b/operation_dialog_ui.py
 # -*- coding: utf-8 -*-

-# Form implementation generated from reading ui file 'operation_dialog.ui'
+# Form implementation generated from reading ui file 'd:\AddCaption\cur_version\accessibility_movie_2\operation_dialog.ui'
 #
-# Created by: PyQt5 UI code generator 5.12
+# Created by: PyQt5 UI code generator 5.15.4
 #
-# WARNING! All changes made in this file will be lost!
+# WARNING: Any manual changes made to this file will be lost when pyuic5 is
+# run again.  Do not edit this file unless you know what you are doing.
+

 from PyQt5 import QtCore, QtGui, QtWidgets

@@ -12,52 +14,20 @@ from PyQt5 import QtCore, QtGui, QtWidgets
 class Ui_Dialog(object):
    def setupUi(self, Dialog):
        Dialog.setObjectName("Dialog")
-        Dialog.resize(724, 437)
+        Dialog.resize(724, 502)
        self.gridLayout = QtWidgets.QGridLayout(Dialog)
        self.gridLayout.setObjectName("gridLayout")
-        self.label_14 = QtWidgets.QLabel(Dialog)
-        self.label_14.setObjectName("label_14")
-        self.gridLayout.addWidget(self.label_14, 5, 3, 1, 1)
+        self.label_11 = QtWidgets.QLabel(Dialog)
+        self.label_11.setObjectName("label_11")
+        self.gridLayout.addWidget(self.label_11, 6, 2, 1, 1)
        self.comboBox_3 = QtWidgets.QComboBox(Dialog)
        self.comboBox_3.setEnabled(True)
        self.comboBox_3.setEditable(False)
        self.comboBox_3.setObjectName("comboBox_3")
-        self.gridLayout.addWidget(self.comboBox_3, 5, 4, 1, 1)
-        self.label_11 = QtWidgets.QLabel(Dialog)
-        self.label_11.setObjectName("label_11")
-        self.gridLayout.addWidget(self.label_11, 5, 2, 1, 1)
+        self.gridLayout.addWidget(self.comboBox_3, 6, 4, 1, 1)
        self.label_9 = QtWidgets.QLabel(Dialog)
        self.label_9.setObjectName("label_9")
        self.gridLayout.addWidget(self.label_9, 3, 2, 1, 1)
-        self.lineEdit_6 = QtWidgets.QLineEdit(Dialog)
-        self.lineEdit_6.setEnabled(True)
-        self.lineEdit_6.setObjectName("lineEdit_6")
-        self.gridLayout.addWidget(self.lineEdit_6, 5, 1, 1, 1)
-        self.lineEdit_4 = QtWidgets.QLineEdit(Dialog)
-        self.lineEdit_4.setEnabled(True)
-        self.lineEdit_4.setObjectName("lineEdit_4")
-        self.gridLayout.addWidget(self.lineEdit_4, 4, 1, 1, 1)
-        self.pickEndPos = QtWidgets.QPushButton(Dialog)
-        self.pickEndPos.setObjectName("pickEndPos")
-        self.gridLayout.addWidget(self.pickEndPos, 3, 3, 1, 1)
-        self.pickStartPos = QtWidgets.QPushButton(Dialog)
-        self.pickStartPos.setObjectName("pickStartPos")
-        self.gridLayout.addWidget(self.pickStartPos, 2, 3, 1, 1)
-        self.label_4 = QtWidgets.QLabel(Dialog)
-        self.label_4.setObjectName("label_4")
-        self.gridLayout.addWidget(self.label_4, 3, 0, 1, 1)
-        self.label = QtWidgets.QLabel(Dialog)
-        self.label.setObjectName("label")
-        self.gridLayout.addWidget(self.label, 1, 0, 1, 1)
-        self.pushButton_3 = QtWidgets.QPushButton(Dialog)
-        self.pushButton_3.setObjectName("pushButton_3")
-        self.gridLayout.addWidget(self.pushButton_3, 1, 3, 1, 2)
-        self.label_3 = QtWidgets.QLabel(Dialog)
-        self.label_3.setObjectName("label_3")
-        self.gridLayout.addWidget(self.label_3, 2, 0, 1, 1)
-        self.label_8 = QtWidgets.QLabel(Dialog)
-        self.label_8.setObjectName("label_8")
-        self.gridLayout.addWidget(self.label_8, 2, 2, 1, 1)
        self.verticalLayout = QtWidgets.QVBoxLayout()
        self.verticalLayout.setObjectName("verticalLayout")
        self.label_2 = QtWidgets.QLabel(Dialog)
@@ -70,9 +40,41 @@ class Ui_Dialog(object):
        self.label_13.setObjectName("label_13")
        self.verticalLayout.addWidget(self.label_13)
        self.gridLayout.addLayout(self.verticalLayout, 1, 2, 1, 1)
+        self.label_4 = QtWidgets.QLabel(Dialog)
+        self.label_4.setObjectName("label_4")
+        self.gridLayout.addWidget(self.label_4, 3, 0, 1, 1)
+        self.pickStartPos = QtWidgets.QPushButton(Dialog)
+        self.pickStartPos.setObjectName("pickStartPos")
+        self.gridLayout.addWidget(self.pickStartPos, 2, 3, 1, 1)
+        self.label_10 = QtWidgets.QLabel(Dialog)
+        self.label_10.setObjectName("label_10")
+        self.gridLayout.addWidget(self.label_10, 4, 2, 1, 1)
+        self.label_14 = QtWidgets.QLabel(Dialog)
+        self.label_14.setObjectName("label_14")
+        self.gridLayout.addWidget(self.label_14, 6, 3, 1, 1)
        self.label_7 = QtWidgets.QLabel(Dialog)
        self.label_7.setObjectName("label_7")
-        self.gridLayout.addWidget(self.label_7, 5, 0, 1, 1)
+        self.gridLayout.addWidget(self.label_7, 6, 0, 1, 1)
+        self.label_3 = QtWidgets.QLabel(Dialog)
+        self.label_3.setObjectName("label_3")
+        self.gridLayout.addWidget(self.label_3, 2, 0, 1, 1)
+        self.pickEndPos = QtWidgets.QPushButton(Dialog)
+        self.pickEndPos.setObjectName("pickEndPos")
+        self.gridLayout.addWidget(self.pickEndPos, 3, 3, 1, 1)
+        self.lineEdit_6 = QtWidgets.QLineEdit(Dialog)
+        self.lineEdit_6.setEnabled(True)
+        self.lineEdit_6.setObjectName("lineEdit_6")
+        self.gridLayout.addWidget(self.lineEdit_6, 6, 1, 1, 1)
+        self.lineEdit_4 = QtWidgets.QLineEdit(Dialog)
+        self.lineEdit_4.setEnabled(True)
+        self.lineEdit_4.setObjectName("lineEdit_4")
+        self.gridLayout.addWidget(self.lineEdit_4, 4, 1, 1, 1)
+        self.label = QtWidgets.QLabel(Dialog)
+        self.label.setObjectName("label")
+        self.gridLayout.addWidget(self.label, 1, 0, 1, 1)
+        self.pushButton_3 = QtWidgets.QPushButton(Dialog)
+        self.pushButton_3.setObjectName("pushButton_3")
+        self.gridLayout.addWidget(self.pushButton_3, 1, 3, 1, 2)
        self.buttonBox = QtWidgets.QDialogButtonBox(Dialog)
        self.buttonBox.setEnabled(False)
        sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed)
@@ -87,10 +89,31 @@ class Ui_Dialog(object):
        self.buttonBox.setStandardButtons(QtWidgets.QDialogButtonBox.Ok)
        self.buttonBox.setCenterButtons(False)
        self.buttonBox.setObjectName("buttonBox")
-        self.gridLayout.addWidget(self.buttonBox, 7, 4, 1, 1)
-        self.label_10 = QtWidgets.QLabel(Dialog)
-        self.label_10.setObjectName("label_10")
-        self.gridLayout.addWidget(self.label_10, 4, 2, 1, 1)
+        self.gridLayout.addWidget(self.buttonBox, 8, 4, 1, 1)
+        self.label_8 = QtWidgets.QLabel(Dialog)
+        self.label_8.setObjectName("label_8")
+        self.gridLayout.addWidget(self.label_8, 2, 2, 1, 1)
+        self.horizontalLayout_2 = QtWidgets.QHBoxLayout()
+        self.horizontalLayout_2.setContentsMargins(-1, 0, -1, -1)
+        self.horizontalLayout_2.setObjectName("horizontalLayout_2")
+        self.pushButton_2 = QtWidgets.QPushButton(Dialog)
+        self.pushButton_2.setObjectName("pushButton_2")
+        self.horizontalLayout_2.addWidget(self.pushButton_2)
+        spacerItem = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
+        self.horizontalLayout_2.addItem(spacerItem)
+        self.pushButton = QtWidgets.QPushButton(Dialog)
+        self.pushButton.setObjectName("pushButton")
+        self.horizontalLayout_2.addWidget(self.pushButton)
+        self.horizontalLayout_2.setStretch(0, 1)
+        self.horizontalLayout_2.setStretch(1, 1)
+        self.horizontalLayout_2.setStretch(2, 1)
+        self.gridLayout.addLayout(self.horizontalLayout_2, 8, 1, 1, 2)
+        self.label_6 = QtWidgets.QLabel(Dialog)
+        self.label_6.setObjectName("label_6")
+        self.gridLayout.addWidget(self.label_6, 5, 0, 1, 1)
+        self.label_5 = QtWidgets.QLabel(Dialog)
+        self.label_5.setObjectName("label_5")
+        self.gridLayout.addWidget(self.label_5, 4, 0, 1, 1)
        self.horizontalLayout = QtWidgets.QHBoxLayout()
        self.horizontalLayout.setObjectName("horizontalLayout")
        self.comboBox = QtWidgets.QComboBox(Dialog)
@@ -100,42 +123,29 @@ class Ui_Dialog(object):
        self.comboBox.addItem("")
        self.comboBox.addItem("")
        self.horizontalLayout.addWidget(self.comboBox)
-        spacerItem = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
-        self.horizontalLayout.addItem(spacerItem)
+        spacerItem1 = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
+        self.horizontalLayout.addItem(spacerItem1)
        self.horizontalLayout.setStretch(0, 1)
        self.horizontalLayout.setStretch(1, 1)
        self.gridLayout.addLayout(self.horizontalLayout, 0, 1, 1, 1)
-        self.label_5 = QtWidgets.QLabel(Dialog)
-        self.label_5.setObjectName("label_5")
-        self.gridLayout.addWidget(self.label_5, 4, 0, 1, 1)
-        self.timeEdit = QtWidgets.QTimeEdit(Dialog)
-        self.timeEdit.setCurrentSection(QtWidgets.QDateTimeEdit.MinuteSection)
-        self.timeEdit.setCurrentSectionIndex(1)
-        self.timeEdit.setObjectName("timeEdit")
-        self.gridLayout.addWidget(self.timeEdit, 2, 1, 1, 1)
-        self.horizontalLayout_2 = QtWidgets.QHBoxLayout()
-        self.horizontalLayout_2.setContentsMargins(-1, 0, -1, -1)
-        self.horizontalLayout_2.setObjectName("horizontalLayout_2")
-        self.pushButton_2 = QtWidgets.QPushButton(Dialog)
-        self.pushButton_2.setObjectName("pushButton_2")
-        self.horizontalLayout_2.addWidget(self.pushButton_2)
-        spacerItem1 = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
-        self.horizontalLayout_2.addItem(spacerItem1)
-        self.pushButton = QtWidgets.QPushButton(Dialog)
-        self.pushButton.setObjectName("pushButton")
-        self.horizontalLayout_2.addWidget(self.pushButton)
-        self.horizontalLayout_2.setStretch(0, 1)
-        self.horizontalLayout_2.setStretch(1, 1)
-        self.gridLayout.addLayout(self.horizontalLayout_2, 7, 1, 1, 2)
        self.comboBox_2 = QtWidgets.QComboBox(Dialog)
        self.comboBox_2.setObjectName("comboBox_2")
        self.comboBox_2.addItem("")
        self.comboBox_2.addItem("")
        self.comboBox_2.addItem("")
        self.gridLayout.addWidget(self.comboBox_2, 0, 2, 1, 1)
+        self.lineEdit_5 = QtWidgets.QLineEdit(Dialog)
+        self.lineEdit_5.setEnabled(True)
+        self.lineEdit_5.setObjectName("lineEdit_5")
+        self.gridLayout.addWidget(self.lineEdit_5, 5, 1, 1, 1)
        self.lineEdit = QtWidgets.QLineEdit(Dialog)
        self.lineEdit.setObjectName("lineEdit")
        self.gridLayout.addWidget(self.lineEdit, 1, 1, 1, 1)
+        self.timeEdit = QtWidgets.QTimeEdit(Dialog)
+        self.timeEdit.setCurrentSection(QtWidgets.QDateTimeEdit.MinuteSection)
+        self.timeEdit.setCurrentSectionIndex(1)
+        self.timeEdit.setObjectName("timeEdit")
+        self.gridLayout.addWidget(self.timeEdit, 2, 1, 1, 1)
        self.timeEdit_2 = QtWidgets.QTimeEdit(Dialog)
        self.timeEdit_2.setCurrentSection(QtWidgets.QDateTimeEdit.SecondSection)
        self.timeEdit_2.setCurrentSectionIndex(2)
@@ -148,30 +158,29 @@ class Ui_Dialog(object):
    def retranslateUi(self, Dialog):
        _translate = QtCore.QCoreApplication.translate
        Dialog.setWindowTitle(_translate("Dialog", "Dialog"))
-        self.label_14.setText(_translate("Dialog", "语速："))
        self.label_11.setText(_translate("Dialog", "*请填文字"))
        self.label_9.setText(_translate("Dialog", "*请填数字，最多保留两位小数"))
-        self.pickEndPos.setText(_translate("Dialog", "取当前位置"))
-        self.pickStartPos.setText(_translate("Dialog", "取当前位置"))
+        self.label_2.setText(_translate("Dialog", "行（如果操作是增加，则在该行后面增加）"))
+        self.label_13.setText(_translate("Dialog", "*需要填在【字幕旁白】页面中的行数"))
        self.label_4.setText(_translate("Dialog", "结束时间："))
+        self.pickStartPos.setText(_translate("Dialog", "取当前位置"))
+        self.label_10.setText(_translate("Dialog", "*请填文字"))
+        self.label_14.setText(_translate("Dialog", "语速："))
+        self.label_7.setText(_translate("Dialog", "旁白："))
+        self.label_3.setText(_translate("Dialog", "起始时间："))
+        self.pickEndPos.setText(_translate("Dialog", "取当前位置"))
        self.label.setText(_translate("Dialog", "我想操作第"))
        self.pushButton_3.setText(_translate("Dialog", "填充\n"
 "行信息"))
-        self.label_3.setText(_translate("Dialog", "起始时间："))
        self.label_8.setText(_translate("Dialog", "*请填数字，最多保留两位小数"))
-        self.label_2.setText(_translate("Dialog", "行（如果操作是增加，则在该行后面增加）"))
-        self.label_13.setText(_translate("Dialog", "*需要填在【字幕旁白】页面中的行数"))
-        self.label_7.setText(_translate("Dialog", "旁白："))
-        self.label_10.setText(_translate("Dialog", "*请填文字"))
-        self.comboBox.setItemText(0, _translate("Dialog", "字幕"))
-        self.comboBox.setItemText(1, _translate("Dialog", "旁白"))
-        self.label_5.setText(_translate("Dialog", "字幕："))
-        self.timeEdit.setDisplayFormat(_translate("Dialog", "hh:mm:ss.zzz"))
        self.pushButton_2.setText(_translate("Dialog", "修改"))
        self.pushButton.setText(_translate("Dialog", "检测"))
+        self.label_6.setText(_translate("Dialog", "推荐字数："))
+        self.label_5.setText(_translate("Dialog", "字幕："))
+        self.comboBox.setItemText(0, _translate("Dialog", "字幕"))
+        self.comboBox.setItemText(1, _translate("Dialog", "旁白"))
        self.comboBox_2.setItemText(0, _translate("Dialog", "增加一行"))
        self.comboBox_2.setItemText(1, _translate("Dialog", "修改一行"))
        self.comboBox_2.setItemText(2, _translate("Dialog", "删除一行"))
+        self.timeEdit.setDisplayFormat(_translate("Dialog", "hh:mm:ss.zzz"))
        self.timeEdit_2.setDisplayFormat(_translate("Dialog", "hh:mm:ss.zzz"))
-
-