fix:ocr algorithm issue with repetitive subtitle

9b46d4c7 · wux51 · 9b3754fc · 2c4cd5c0 · 9b46d4c7 · 9b46d4c7
Commit 9b46d4c7 authored Nov 05, 2023 by wux51
6 changed files
--- a/constant.py
+++ b/constant.py
@@ -12,10 +12,11 @@ import os

 class Content:
    StartTimeColumn = 0
+    SubtitleColumnNumber = 2
    AsideColumnNumber = 4
    SpeedColumnNumber = 5
    # ActivateColumns = [2, 3]
-    ActivateColumns = [4,5]
+    ActivateColumns = [2,4,5]
    # ColumnCount = 3
    ObjectName = "all_tableWidget"
    # TimeFormatColumns = [0]

--- a/detect_with_ocr.py
+++ b/detect_with_ocr.py
@@ -62,7 +62,38 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
    start = int(start_time * fps)
    video.set(cv2.CAP_PROP_POS_FRAMES, start)
    frame_num = video.get(cv2.CAP_PROP_FRAME_COUNT)
+
+    time1 = time.time()
+
+    # 一共有60*60*60*3 均匀取2000帧
+    gap = math.floor(frame_num/2000) # 前一帧与后一帧的跨度
+    # #读取方案1：跳转读写头到指定帧
+    # for i in range(2000):
+    #     frame_index = i*gap + 1
+    #     video.set(cv2.CAP_PROP_POS_FRAMES,frame_index)
+    #     _, frame = video.read()
+    #     cv2.imwrite(str.format('./extrated_imgs/{}.png',i),frame)
+    # 读写方案2：连续读取，读到指定帧保存
+    # i=1
+    # while i<frame_num:
+    #     _, frame = video.read()
+    #     if i%gap == 0:
+    #         print(i,gap)
+    #         cv2.imwrite(str.format('./extrated_imgs_2/{}.png',i),frame)
+    #     i = i + 1
+
+    # 主要工作：
+    paddle_dir = "res/.paddleocr/2.3.0.1/ocr/"
+    cur_cls_model_dir = paddle_dir + "cls/ch_ppocr_mobile_v2.0_cls_infer"
+    cur_det_model_dir = paddle_dir + "det/ch/ch_PP-OCRv2_det_infer"
+    cur_rec_model_dir = paddle_dir + "rec/ch/ch_PP-OCRv2_rec_infer"
+    ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, cls_model_dir=cur_cls_model_dir, det_model_dir=cur_det_model_dir, rec_model_dir=cur_rec_model_dir)
+
    position_map = {}
+    R = []
+    G = []
+    B = []
+    #  统计位置（高度）
    # 分成五等份，1/5 2/5 3/5 4/5
    for i in range(4):
        frame_index = math.floor((i+1)*frame_num/5)
@@ -70,9 +101,10 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
        for j in range(10):#每个阶段取10帧
            for k in range(120):#隔120帧取一帧
                _,frame = video.read()
+            _,frame = video.read()
            if (j+1)*120+frame_index>frame_num:
                break
-            _,frame = video.read()
+            # cv2.imwrite(str.format('./imgs_2/{}_{}.png',i+1,j),frame)
            res = ocr.ocr(frame, cls=True)
            for result_item in res:
                [x1,y1],[x2,y2],[x3,y3],[x4,y4] = result_item[0]
@@ -80,22 +112,87 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]
                english_text = ''.join(re.findall(r'[A-Za-z]',text))
                # 跳过英文字幕
                if(len(english_text)/len(text)<0.1):
+                    # 提取ocr区域像素点颜色
+                    subtitle_img = frame[int(y1):int(y3),int(x1):int(x2)]
+                    r = subtitle_img[:,:,0].copy()
+                    r = r.reshape(-1)
+                    g = subtitle_img[:,:,1].copy()
+                    g = g.reshape(-1)
+                    b = subtitle_img[:,:,2].copy()
+                    b = b.reshape(-1)
+                    R = np.append(R,r)
+                    G = np.append(G,g)
+                    B = np.append(B,b)
                    print(i+1,j,text)
+                    # 分析文字位置
                    if len(position_map) == 0:
                        position_map[(y1,y3)]=1
                    else:
+                        # 在列表中查找
                        keys = list(position_map.keys())
+                        flag = False
                        for key in keys:
-                            if abs(y1-key[0])<2 and abs(y3-key[1])<2:
+                            height = y3-y1
+                            threshold = int((y3-y1)*0.3)
+                            # print(threshold)
+                            if abs(y1-key[0])<threshold or abs(y3-key[1])<threshold:
                                position_map[key]+=1
+                                # # 绘制字幕横线到图片上：
+                                # frame1 = cv2.line(frame,(0,int(key[0])),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(key[0])),(255,0,0),2)
+                                # frame1 = cv2.line(frame,(0,int(key[1])),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(key[1])),(255,0,0),2)
+                                # file_name = './mytest('+str(key[0])+','+str(key[1])+').png'
+                                # cv2.imwrite(file_name,frame1)
+                                flag = True# 如果能归为某一类则flag置为True
                                break
-                            else:
+                        if flag == False:# 如果找不到，则向列表添加
                            position_map[(y1,y3)]=1
-    print(sorted( position_map.items(), key = lambda kv:(kv[1], kv[0])))
-    y1,y2 = sorted( position_map.items(), key = lambda kv:(kv[1], kv[0]))[-1][0]    
+                            # # 绘制字幕横线到图片上：
+                            # frame1 = cv2.line(frame,(0,int(y1)),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(y1)),(255,0,0),2)
+                            # frame1 = cv2.line(frame,(0,int(y3)),(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),int(y3)),(255,0,0),2)
+                            # file_name = './mytest('+str(y1)+','+str(y3)+').png'
+                            # cv2.imwrite(file_name,frame1)
+    # 给出字幕行数
+    position_rank_list = sorted( position_map.items(), key = lambda kv:(kv[1], kv[0]))
+    print(position_rank_list)
+    line_num = 0
+    num = position_rank_list[-1][1]
+    y1,y2 = position_rank_list[-1][0]
+    height = abs(y2-y1)
+    result = []
+    item_result = []
+    for item in position_rank_list:
+        if item[1]>num*0.3 and item[0][0]>0.5*video.get(cv2.CAP_PROP_FRAME_HEIGHT) and item[0][1]>0.5*video.get(cv2.CAP_PROP_FRAME_HEIGHT):#出现次数达到rank1的30%以上，判定为新的一行字幕
+            line_num += 1 
+            y1,y2 = item[0]
            y1 = y1/video.get(cv2.CAP_PROP_FRAME_HEIGHT)
            y2 = y2/video.get(cv2.CAP_PROP_FRAME_HEIGHT)
-    return y1,y2
+            item_result.append([y1,y2])
+    print([line_num,height,item_result])
+    # 给出文字和背景的颜色
+    import matplotlib.pyplot as plt
+    from sklearn import cluster
+    plt.hist(R)
+    plt.savefig('R-hist-all')
+    plt.close()
+    # 尝试聚成两类(k-means算法)    
+    estimator = cluster.KMeans(n_clusters=2)
+    estimator.fit(R.reshape(-1,1))
+    # print(R[0].shape)
+    r_centers = estimator.cluster_centers_.reshape(-1)
+    estimator = cluster.KMeans(n_clusters=2)
+    estimator.fit(G.reshape(-1,1))
+    g_centers = estimator.cluster_centers_.reshape(-1)
+    estimator = cluster.KMeans(n_clusters=2)
+    estimator.fit(B.reshape(-1,1))
+    b_centers = estimator.cluster_centers_.reshape(-1)
+    r,g,b = max(r_centers),max(g_centers),max(b_centers)
+    print('颜色结果:',r,g,b)
+    # 返回值：字幕行数 字幕高度 字幕位置(比例形式)  字幕颜色（r,g,b）
+    # [2, 109.0, [[0.8637892376681614, 0.9243273542600897], [0.9304932735426009, 0.9915919282511211]],(r,g,b)]
+    return item_result[0][0],item_result[0][1]
+    return [line_num,height,item_result,(r,g,b)]
+
+    

 ocr_positions = []

@@ -257,6 +354,29 @@ def normalize(text: str) -> str:
    return text


+def resize_img(img):
+    resize_height = 152
+    height, width = img.shape[:2]
+    if resize_height > height:
+
+        # 定义放大倍数
+        scale_factor = float(resize_height / height)
+
+        # 计算新的宽度和高度
+        new_width = int(width * scale_factor)
+        new_height = int(height * scale_factor)
+
+        # 使用插值方法进行图像放大
+        enlarged_image = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
+        return enlarged_image
+    else:
+        # 定义缩小倍数
+        scale_factor = float(height / resize_height)  # 0.5表示缩小为原来的一半大小
+
+        # 使用插值方法进行图像缩小
+        smaller_image = cv2.resize(img, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
+        return smaller_image
+
 def extract_white_prior(img, threshold=200):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

@@ -267,7 +387,8 @@ def extract_white_prior(img, threshold=200):


 index = 0
-def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
+t = 140
+def detect_subtitle(org_img: np.ndarray, lastSubTitle, last_confidence) -> Tuple[Union[str, None], float]:
    """检测当前画面得到字幕信息

    Args:
@@ -276,23 +397,36 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
    Returns:
        Tuple[Union[str, None]]: 字幕信息(没有字幕时返回None)和置信度
    """
-    subTitle = ''
+    
+    ocr_res = ""
    # up_b = 276
    # down_b = 297
    global ocr_positions
+    # ocr_positions.append([676, 712])
    h = None
+    global index
    for i in range(len(ocr_positions)):
        img = copy.deepcopy(org_img)
        up_b = ocr_positions[i][0]
        down_b = ocr_positions[i][1]
        height = down_b - up_b
-        # if len(ocr_positions) == 1:
-        #     img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
-        # else:
+        if len(ocr_positions) == 1:
+            img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
+        else:
+            # cropped_img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
            cropped_img = img[int(up_b):int(down_b)]
+            # cropped_img = resize_img(cropped_img)
+            # x = float(150 / height)
+            # img_h, img_w = cropped_img.shape[:2]
+            # img_h = int(img_h * x)
+            # img_w = int(img_w * x)
+            # cropped_img = cv2.resize(cropped_img, (img_w, img_h))
+            
            # 定义要添加的上下空白的高度
            padding_top = height*0.7
            padding_bottom = height*0.7
+            # padding_top = 150
+            # padding_bottom = 150

            # 计算新图像的高度
            new_height = cropped_img.shape[0] + padding_top + padding_bottom
@@ -304,6 +438,23 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
            start_y = int(padding_top)
            end_y = start_y + cropped_img.shape[0]
            img[start_y:end_y, :] = cropped_img
+            
+            # _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
+            # global index
+            # cv2.imwrite(f'./cap/cap{index}.png', img)
+            
+            
+        # img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+
+        # # ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        # # img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
+        
+        # t = 230
+        # _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
+        # cv2.imwrite(f'./test2.png', img)
+        
+
+        # index = index + 1
        # img = img[int(up_b - height*0.2):int(down_b + height*0.2)]
        # 针对低帧率的视频做图像放大处理
        print(height)
@@ -312,7 +463,8 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
        print(img.shape)
        if img.shape[1] < 1000:
            img = cv2.resize(img, (int(img.shape[1] * 1.5), int(img.shape[0] * 1.5)))
-        global index
+
+        

        # img = extract_white_prior(img)

@@ -327,6 +479,7 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
            continue
            # return None, 0, None
        possible_txt = []
+        subTitle = ''
        conf = 0
        print('res --------->', res)
        res.sort(key=lambda rect: rect[0][0][0] + rect[0][1][0])   # 按照中心点排序
@@ -343,6 +496,11 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
            print("文本：{}，置信度：{}，中心点：{}，斜率：{}，字体大小：{}".format(txt, confidence, mid / img.shape[1], gradient, font_size))
            print("字体大小差距: {}", format(height - font_size))
            print("高度中心:{}".format((rect[0][1] + rect[1][1])/2/img.shape[0]))
+            # if confidence < 0.95:
+            #     # global t
+            #     print("小于0.95，重新检测，阈值为：" + str(t))
+            #     t = t + 20
+            #     return detect_subtitle(org_img, lastSubTitle, last_confidence)
            if h == None:
                h = font_size
            conf_thred1 = 0.7
@@ -372,8 +530,17 @@ def detect_subtitle(org_img: np.ndarray) -> Tuple[Union[str, None], float]:
            subTitle = ' '.join(possible_txt)
        print(subTitle, conf)
        if len(subTitle) > 0:
-        return subTitle, conf, h
-    return None, 0, None
+            ocr_res = ocr_res + subTitle
+    if (len(ocr_res)) >0:
+        print(">>>>>>>>>>>>cur subtitle:" + ocr_res + ",confidence: " + str(confidence) + ",last_confidence: " + str(last_confidence))
+        # if len(ocr_positions) == 1 and last_confidence != None and confidence != None and confidence < last_confidence:
+        #     ocr_res = lastSubTitle
+        #     confidence = last_confidence
+        # print(">>>>>>>>>>>>res subtitle:" + ocr_res + ",confidence: " + str(confidence) + ",last_confidence: " + str(last_confidence))
+        return ocr_res, confidence, conf, h
+    if check_have_ocr(img):
+        return "err", None, 0, None
+    return None, None, 0, None



@@ -402,6 +569,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
    video = cv2.VideoCapture(video_path)
    fps = video.get(cv2.CAP_PROP_FPS)
    lastSubTitle = None
+    last_confidence = None
    lastConf = 0
    # res是在视频遍历过程中获取的字幕文件，不掺杂对旁白的分析
    res = []
@@ -447,8 +615,9 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
            mainWindow.projectContext.nd_process = state[0]
            mainWindow.projectContext.last_time = cur_time

-            subTitle, conf, cur_ocr_h = detect_subtitle(frame)
-            
+            subTitle, confidence, conf, cur_ocr_h = detect_subtitle(frame, lastSubTitle, last_confidence)
+            if subTitle == "err":
+                continue
            
            if subTitle is not None:

@@ -490,20 +659,20 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
                fl = False
                cur_time1 = cur_time
                continue
-                end_time = cur_time
-                res.append([start_time, end_time, lastSubTitle])
-                if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
-                    print('--------------------------------------------------')
-                    recommend_lens = int((res[-1][0] - last_time) * normal_speed) if len(res) == 1 else int(
-                        (res[-1][0] - res[-2][1]) * normal_speed)
-                    # write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
-                    # add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
-                    add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens],ocr_h)
-                print(start_time, end_time, lastSubTitle)
-
-                # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
-                add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''],ocr_h)
-                print(">>>>>>subtitle,ocr_h2:" + str(lastSubTitle) + ">>>" + str(ocr_h))
+                # end_time = cur_time
+                # res.append([start_time, end_time, lastSubTitle])
+                # if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
+                #     print('--------------------------------------------------')
+                #     recommend_lens = int((res[-1][0] - last_time) * normal_speed) if len(res) == 1 else int(
+                #         (res[-1][0] - res[-2][1]) * normal_speed)
+                #     # write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
+                #     # add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
+                #     add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens],ocr_h)
+                # print(start_time, end_time, lastSubTitle)
+
+                # # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
+                # add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''],ocr_h)
+                # print(">>>>>>subtitle,ocr_h2:" + str(lastSubTitle) + ">>>" + str(ocr_h))
            elif lastSubTitle is not None and subTitle is not None:
                # 两句话连在一起，但是两句话不一样
                if string_similar(lastSubTitle, subTitle) < 0.6:
@@ -527,10 +696,26 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
                    lastSubTitle = subTitle if conf > lastConf else lastSubTitle
                    continue
            # 当前字幕与上一段字幕不一样
+            if subTitle != "err":
                lastSubTitle = subTitle
+                last_confidence = confidence
                lastConf = conf
    print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>end 1")
    # print(ocr_h_map)
+    # mainWindow.refresh_tab_slot()
+
+    while(mainWindow.refresh_flag == True):
+        time.sleep(1)
+    mainWindow.detect_lock = True
+    try:
+        process_err_ocr(mainWindow)
+    finally:
+        mainWindow.detect_lock = False
+    # mainWindow.refresh_lock.acquire()
+    # try:
+    #     process_err_ocr(mainWindow)
+    # finally:
+    #     mainWindow.refresh_lock.release()
    
    

@@ -572,6 +757,7 @@ def process_err_ocr(mainWindow):
            print(">>>>>>>>>will rm" + str(table_indexs[rm_list[i]] + 1 - i) + ", subtitle:" + mainWindow.projectContext.all_elements[table_indexs[rm_list[i]] - i].subtitle)
            print(table_indexs[rm_list[i]])
            mainWindow.del_line_operation_slot(row = table_indexs[rm_list[i]] + 1 - i, show_msg_flag = False)
+            time.sleep(0.5)
        except Exception as e:
            print(">>>>>>>>>>>>>>>>>>>>>>del err")
            print(e)
@@ -695,9 +881,37 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
    # print("process the total video at time: ", datetime.datetime.now())
    process_video(video_path, start_time, end_time, book_name_xlsx, sheet_name_xlsx, state, mainWindow)

+def check_have_ocr(img):
+    new_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    t = 230
+    _, new_img = cv2.threshold(new_img, t,255, cv2.THRESH_BINARY)
+    if np.any(new_img == 255):
+        return True
+    else:
+        return False
+

 if __name__ == '__main__':
    # path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
    path = "C:/Users/Smile/Desktop/accessibility-movie/"
    # print("get_pos:", get_position(path, 0))
-    evaluate_position("C:/Users/AIA/Desktop/1/1.mp4", 0)
+    # evaluate_position("C:/Users/AIA/Desktop/1/1.mp4", 0)
+    # img = cv2.imread("./cap/cap879.png")
+    img = cv2.imread("./cap/cap812.png", cv2.IMREAD_GRAYSCALE)
+    # img = cv2.equalizeHist(img)
+    t = 230
+    _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
+    # img = resize_img(img)
+    detect_subtitle(img, None, None)
+
+    # img = cv2.equalizeHist(img)
+    # t = 120
+    # _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
+    
+    # # ret, binary_image = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+    # # binary_image = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
+    cv2.imwrite(f'./binary2.png', img)
+    if np.any(img == 255):
+        print("111111")
+    else:
+        print("222222")
--- a/main_window.py
+++ b/main_window.py
@@ -13,7 +13,7 @@ import os
 import cv2

 from PyQt5 import QtWidgets
-from PyQt5.QtWidgets import QMainWindow, QFileDialog, QTableWidget, QTableWidgetItem, QAbstractItemView, QProgressBar, QLabel, QApplication, QPushButton, QMenu
+from PyQt5.QtWidgets import QMainWindow, QFileDialog, QTableWidget, QTableWidgetItem, QAbstractItemView, QProgressBar, QLabel, QApplication, QPushButton, QMenu, QDialog, QVBoxLayout
 from PyQt5.QtCore import QUrl, Qt, QTimer, QRect, pyqtSignal, QPersistentModelIndex
 from PyQt5.QtMultimedia import *
 from PyQt5.QtGui import QIcon
@@ -43,6 +43,7 @@ from excel_utils import read_xls,read_xlsx
 from ffmpeg_util import adjust_audio_volume

 from PyQt5.QtCore import QThread;
+import threading


 # from emit_import_event import emit_import_event
@@ -71,12 +72,56 @@ class WorkerThread(QThread):
        for t in self.main_window.threads:
            t.start()

+    def refresh(self):
+        self.main_window.import_process_sign.emit(self.elements)
+
+class ProcessErrThread(QThread):
+    def __init__(self, main_window):
+        super().__init__()
+        self.main_window = main_window
+
+    def run(self):
+        from narratage_detection import process_err
+        t = RunThread(funcName=process_err,args=(self.main_window, ),name="process_err")
+        t.setDaemon(True)
+        t.start()

+class AutoCloseDialog(QDialog):
+    def __init__(self, parent = None):
+        super(AutoCloseDialog, self).__init__(parent)

+        self.setWindowFlags(Qt.CustomizeWindowHint | Qt.NoDropShadowWindowHint)
+        layout = QVBoxLayout()
+        label = QLabel("字幕边界定位中，请稍后！")
+        layout.addWidget(label)
+        self.setLayout(layout)

+class LocalOcrThread(QThread):
+    def __init__(self, main_window,path):
+        super().__init__()
+        self.main_window = main_window
+        self.path = path

-    def refresh(self):
-        self.main_window.import_process_sign.emit(self.elements)
+    def run(self):
+        # 在后台执行耗时操作
+
+        state = [None]
+        self.main_window.state = state
+        state[0] = 0.1
+        self.main_window.threads = []
+
+        t = RunThread(funcName=self.main_window.auto_location_ocr,
+                    args=(self.path, ),
+                    name="auto_location_ocr")
+        t.setDaemon(True)
+        self.main_window.threads.append(t)
+        self.main_window.all_threads.append(t)
+
+        for t in self.main_window.threads:
+            t.start()
+
+    def finish(self):
+        self.main_window.location_ocr_sign.emit()

 class CustomDelegate(QtWidgets.QStyledItemDelegate):
    def paint(self, painter, option, index):
@@ -92,9 +137,11 @@ class MainWindow(QMainWindow, Ui_MainWindow):
    renew_signal = pyqtSignal(str)
    import_process_sign = pyqtSignal(list)
    refresh_sign = pyqtSignal()
+    location_ocr_sign = pyqtSignal()

    def __init__(self, project_path):
        super(MainWindow, self).__init__()
+        self.location_ocr_sign.connect(self.finish_location_ocr)
        self.last_aside_index = None
        self.setupUi(self)
        self.statusbar.showMessage("hello", 5000)
@@ -150,6 +197,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        # 所有QTimer集中管理
        self.import_excel_timer = QTimer()
        self.import_excel_timer.timeout.connect(self.check_if_import_excel_over)
+        self.location_ocr_timer = QTimer()
+        self.location_ocr_timer.timeout.connect(self.check_if_location_ocr)
        self.detect_timer = QTimer()
        self.detect_timer.timeout.connect(self.check_if_detect_over_slot)
        self.synthesis_timer = QTimer()
@@ -416,6 +465,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        self.aside_head_time = None
        self.add_head_aside = False
        self.ocr_ranges = []
+        self.refresh_lock = threading.Lock()
+        self.detect_lock = False
+        self.refresh_flag = False

        # 打印到log文件中
        t = RunThread(funcName=make_print_to_file, args=os.path.join(os.getcwd(), 'log'), name="logging")
@@ -425,13 +477,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        get_focus_thread.setDaemon(True)
        get_focus_thread.start()

+    def finish_location_ocr(self):
+        self.import_excel_dialog.show_with_msg("字幕定位结束，请检查是否准确，并调整正确")
+    
    def show_confirmation_dialog(self):
        confirm_box = QtWidgets.QMessageBox.question(self, u'警告', u'确认已经校准OCR范围（只包含中文，不含英文）?',
                                               QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No)
        if confirm_box == QtWidgets.QMessageBox.Yes:
            self.show_detect_dialog()
        else:
-            print(1)
+            print(">>>>>>show_confirmation_dialog")

    def getFocus(self):
        while(True):
@@ -520,12 +575,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        """弹出旁白区间检测相关信息填写窗口

        """
-        # if self.rate == None:
+        if self.rate == None:
            # self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
-        #     return
-        # if self.rate_bottom == None:
+            h = self.widget.get_h()
+            video_h = self.wgt_video.height()
+            self.rate = float(h-10)/float(video_h)
+        if self.rate_bottom == None:
            # self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
-        #     return
+            h = self.widget_bottom.get_h()
+            video_h = self.wgt_video.height()
+            self.rate_bottom = float(h-6)/float(video_h)
        if not self.check_ocr_rate():
            self.prompt_dialog.show_with_msg("字幕上边界不能低于下边界")
            return
@@ -631,12 +690,30 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        self.action_operate.setEnabled(True)
        self.action_insert_aside_from_now.setEnabled(True)
        self.insert_aside_from_now_btn.setEnabled(True)
-        # from detect_with_ocr import evaluate_position
-        # print(">>>>>>>>>>>>>>>>>>>>>v_path" + path.path()[1:])
-        # y1,y2 = evaluate_position(path.path()[1:], 0)
-        # self.widget.setY(y1)
-        # self.widget_bottom.setY(y2)
-        # print("y1:%d,y2:%d" %(y1,y2))
+        # self.import_excel_dialog.show_with_msg("正在自动定位字幕边界，请稍后！")
+        confirm_box = QtWidgets.QMessageBox.question(self, u'警告', u'是否需要自动定位字幕边界?',
+                                               QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No)
+        if confirm_box == QtWidgets.QMessageBox.Yes:
+            self.location_ocr_thread = LocalOcrThread(self, path)
+            self.location_ocr_thread.start()
+            self.location_ocr_timer.start(1000)
+            # self.auto_close_dialog = AutoCloseDialog()
+            # self.auto_close_dialog.exec_()
+        else:
+            print(">>>>>>>>>>>>>play_video")
+        
+        
+        
+
+    def auto_location_ocr(self, path):
+        from detect_with_ocr import evaluate_position
+        print(">>>>>>>>>>>>>>>>>>>>>v_path" + path.path()[1:])
+        y1,y2 = evaluate_position(path.path()[1:], 0)
+        video_h = self.wgt_video.height()
+        self.widget.setY(int(video_h * y1) + 6)
+        time.sleep(1)
+        self.widget_bottom.setY(int(video_h * y2) + 10)
+        print("y1:%d,y2:%d" %(y1,y2))

    def check_ocr_rate(self):
        if self.rate > self.rate_bottom:
@@ -969,6 +1046,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        """        
        self.check_if_over("旁白导入")

+    def check_if_location_ocr(self):
+        self.check_if_over("字幕定位")
+        alive = True
+        for t in self.threads:
+            alive = alive and t.is_alive()
+        if not alive:
+            self.location_ocr_timer.stop()
+            # self.auto_close_dialog.close()
+            self.threads = []
+
    # type = 检测 或 合成 或 导出
    def check_if_over(self, type: str):
        """确认传入的待检测任务是否完成
@@ -996,12 +1083,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
            elif type == "检测":
                self.detect_timer.stop()
                self.refresh_tab_timer.stop()
+                # t = ProcessErrThread(self)
+                # t.start()
+                # from narratage_detection import process_err
+                # process_err(self)
                
-                from narratage_detection import process_err
-                process_err(self)
            elif type == "旁白导入":
                self.import_excel_timer.stop()
                # self.refresh_tab_timer.stop()
+            elif type == "字幕定位":
+                self.location_ocr_timer.stop()
            else:
                self.export_timer.stop()

@@ -1016,6 +1107,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
            self.progressBar.setValue(100)
            self.progressLabel.setText(f"100%")
            self.projectContext.nd_process = 1
+            self.threads = []

    def deal_synthesis_callback_slot(self, threads, state):
        """实现旁白音频合成任务状态在界面中的实时显示，更新界面中的对应变量，每5s更新一次任务状态
@@ -1400,9 +1492,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
                item = QTableWidgetItem(text)
                item.setTextAlignment(Qt.AlignCenter)
                # 设置为不可编辑
-                if self.checkIfTableItemCanChange(table, idx, j) == False:
+                # if self.checkIfTableItemCanChange(table, idx, j) == False:
                    # item.setFlags(Qt.ItemIsEnabled)
-                    print(1)
+                    # print(">>>>>>>>>setElememtToTable")
                table.setItem(idx, j, item)

    # 只有Content页的字幕列和 Aside页的字幕列 可编辑
@@ -1871,7 +1963,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):

            pre_item = self.all_tableWidget.item(row, col - 1)
            suggest = pre_item.text()
-            if suggest != None and suggest != "":
+            print(">>>>>>>>suggest:" + suggest)
+            
+            if col == constant.Content.AsideColumnNumber and suggest != None and suggest != "":
                arrays = suggest.split("/")
                if len(arrays) == 2:
                    suggest =  str(len(text)) + "/" + arrays[1]
@@ -1902,6 +1996,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
                    # self.all_tableWidget.setItem(
                    #     int(idx), constant.Content.SpeedColumnNumber, QTableWidgetItem(text))
                    self.projectContext.refresh_speed(row, text)
+                elif col == constant.Content.SubtitleColumnNumber:
+                    self.projectContext.refresh_subtitle(row, text)
                # self.all_tableWidget_idx = int(row)
                # self.set_table_to_window(False)

@@ -2037,8 +2133,13 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        将表格内容更新至界面中，并保存当前工程内容

        """
+        if not self.detect_lock:
+            self.refresh_flag = True
+            try:
                self.set_table_to_window(need_refresh_all=False)
                self.projectContext.save_project(False)
+            finally:
+                self.refresh_flag = False

    def refresh_all_tab_slot(self):
        """刷新整个表格

--- a/main_window_ui.py
+++ b/main_window_ui.py
@@ -65,6 +65,8 @@ class MyWidget(QWidget):
        print(">>>>>cur_y2 : " + str(self.y()))
        self.move(0, h)

+    def get_h(self):
+        return self.y()
        

    def down(self, mov_len):

--- a/management.py
+++ b/management.py
@@ -264,6 +264,11 @@ class ProjectContext:
        if not self.initial_ing: 
            save_excel_to_path(self.all_elements, self.excel_path, self.write_header, self.excel_sheet_name)

+    def refresh_subtitle(self, row, subtitle: str):
+        self.all_elements[int(row)].subtitle = subtitle
+        if not self.initial_ing: 
+            save_excel_to_path(self.all_elements, self.excel_path, self.write_header, self.excel_sheet_name)
+
    def refresh_speed(self, row, speed: str)->None:
        self.all_elements[int(row)].speed = speed
        if not self.initial_ing:
@@ -308,7 +313,7 @@ class ProjectContext:
                if d["终止时间"][i] is None:
                    # 如果是最后一条
                    if i == len(d["字幕"]) - 1:
-                        print(1)
+                        print(">>>>>>>>>load_excel_from_path")
                        # ed_time_sec = "360000" if self.duration == 0 else self.duration # todo 默认最大时长是100h
                    else:
                        ed_time_sec = "%.2f"%(float(d["起始时间"][i + 1]) - 0.01)
@@ -429,6 +434,8 @@ def save_excel_to_path(all_element, new_excel_path, header, excel_sheet_name):
            backup_path = os.path.dirname(new_excel_path) + "/tmp_"+str(time.time())+".xlsx"
            # os.remove(new_excel_path)
            os.rename(new_excel_path, backup_path)
+        # print(">>>>>>new_excel_path:" + new_excel_path)
+        # print(">>>>>>>>>>backup_path:" + backup_path)
        try:
            create_sheet(new_excel_path, "旁白插入位置建议", [header])
            # for element in all_element:

--- a/narratage_detection.py
+++ b/narratage_detection.py
@@ -65,7 +65,11 @@ def detect(video_path: str, start_time: float, end_time: float, book_path: str,

 def process_err(mainWindow: MainWindow=None):
    from detect_with_ocr import process_err_ocr
+    try:
        process_err_ocr(mainWindow)
+    except Exception as e:
+        print("process_err err")
+        print(e)

 if __name__ == '__main__':
    # 定义参数