Merge remote-tracking branch 'refs/remotes/origin/feat_1' into feat_1

2c4cd5c0 · smile2019 · d3fcd34f · 7296e8d4 · 2c4cd5c0 · 2c4cd5c0
Commit 2c4cd5c0 authored Nov 05, 2023 by smile2019
8 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -13,4 +13,31 @@ res/ffmpeg-4.3.1/bin/output.mp4
 res/ffmpeg-4.3.1/bin/qiji_local.mp4
 venv/
 venv37/
-shenming_test
\ No newline at end of file
+shenming_test
+
+
+cap.png
+requirements3.8.txt
+venv3.8-new/
+webrtcvad-2.0.10-cp38-abi3-win_amd64.whl
+xlsx-resource/
+deal_ocr.csv
+deal_srt.csv
+new.srt
+shenhai1.xlsx
+shenhai2.xlsx
+test,py
+"\346\267\261\346\265\267\347\237\255\347\211\2072.xlsx"
+"\346\267\261\346\265\267\347\237\255\347\211\207origin.xlsx"
+11.py
+222.py
+cap/
+cap1597.png
+cap831.png
+deal.py
+deal_movie.py
+movie_1.txt
+movie_pro.txt
+res/.paddleocr/2.3.0.1/ocr/paddleocr/
+script1.py
+test/
\ No newline at end of file
--- a/constant.py
+++ b/constant.py
@@ -12,10 +12,11 @@ import os

 class Content:
    StartTimeColumn = 0
+    SubtitleColumnNumber = 2
    AsideColumnNumber = 4
    SpeedColumnNumber = 5
    # ActivateColumns = [2, 3]
-    ActivateColumns = [4,5]
+    ActivateColumns = [2,4,5]
    # ColumnCount = 3
    ObjectName = "all_tableWidget"
    # TimeFormatColumns = [0]

--- a/detect_with_ocr.py
+++ b/detect_with_ocr.py
@@ -30,6 +30,9 @@ from typing import Tuple, Union
 from utils import reverse_time_to_seconds
 from detect_with_asr import create_sheet, write_to_sheet
 from main_window import MainWindow, Element
+import time
+import numpy as np
+import copy

 import math
 # 字幕的上下边界
@@ -42,10 +45,18 @@ cur_det_model_dir = paddle_dir + "det/ch/ch_PP-OCRv2_det_infer"
 cur_rec_model_dir = paddle_dir + "rec/ch/ch_PP-OCRv2_rec_infer"
 ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, cls_model_dir=cur_cls_model_dir, det_model_dir=cur_det_model_dir, rec_model_dir=cur_rec_model_dir)

+# paddle_dir =  "res/.paddleocr/2.3.0.1/ocr/paddleocr/"
+# cur_det_model_dir = paddle_dir + "ch_PP-OCRv4_det_infer"
+# cur_rec_model_dir = paddle_dir + "ch_PP-OCRv4_rec_infer"
+# ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, det_model_dir=cur_det_model_dir, rec_model_dir=cur_rec_model_dir)
+
 # 正常语速为4字/秒
 normal_speed = 4

+table_index = 0
+ocr_h_map = {}
 def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]:
+    print(">>>>>>video path:" + video_path)
    video = cv2.VideoCapture(video_path)
    fps = video.get(cv2.CAP_PROP_FPS)
    start = int(start_time * fps)
@@ -183,7 +194,9 @@ def evaluate_position(video_path: str, start_time: float) -> Tuple[float, float]

    

-def get_position(video_path: str, start_time: float, rate: float, rate_bottom: float) -> Tuple[float, float]:
+ocr_positions = []
+
+def get_position(video_path: str, start_time: float, ocr_ranges) -> Tuple[float, float]:
    # return (885.0, 989.0)
    """根据对视频中的画面进行分析，确定字幕的位置，以便后续的字幕识别

@@ -194,34 +207,36 @@ def get_position(video_path: str, start_time: float, rate: float, rate_bottom: f
    Returns:
        Tuple[float, float]: 字幕在整个画面中的上下边界位置
    """
-    print(">>>>>>>>>>open")
-    print("video_path:", video_path)
-    video = cv2.VideoCapture(video_path)
-    # print("video:", video)
-    subtitle_position = {}
-    fps = video.get(cv2.CAP_PROP_FPS)
-    start = int(start_time * fps)
-    cnt = 0
-    txt_cnt = 0
-    pre_txt = None
-    video.set(cv2.CAP_PROP_POS_FRAMES, start)
-    # height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
-    print(cv2.CAP_PROP_FRAME_HEIGHT)
-    print(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    up = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate))
-    down = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate_bottom))
-
+    print(">>>>get posti")
+    print(ocr_ranges)
+    for i in range(len(ocr_ranges)):
+        rate = ocr_ranges[i][0]
+        rate_bottom = ocr_ranges[i][1]
+        print(">>>>>>>>>>open")
+        print("video_path:", video_path)
+        video = cv2.VideoCapture(video_path)
+        # print("video:", video)
+        subtitle_position = {}
+        fps = video.get(cv2.CAP_PROP_FPS)
+        start = int(start_time * fps)
+        cnt = 0
+        txt_cnt = 0
+        pre_txt = None
+        video.set(cv2.CAP_PROP_POS_FRAMES, start)
+        # height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
+        print(cv2.CAP_PROP_FRAME_HEIGHT)
+        print(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        up = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate))
+        down = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate_bottom))
+        global ocr_positions
+        print("add positions")
+        print(up)
+        ocr_positions.insert(0,[up, down])
    # down = up + 20
    # down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73)
-    print(up)
-    # print(down)
-    
-    up_rate,down_rate = evaluate_position(video_path,0)
-    up = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)*up_rate)
-    down = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)*down_rate)
-    
-    return int(up), int(down)
-# TODO 现阶段是主窗体设定字幕的开始位置和结束位置，传入该函数。现在希望做成自动检测的？
+    # print(up)
+    # # print(down)
+    # return int(up), int(down)
    # while True:
    #     _, img = video.read()
    #     # print("img:", img)
@@ -339,7 +354,41 @@ def normalize(text: str) -> str:
    return text


-def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
+def resize_img(img):
+    resize_height = 152
+    height, width = img.shape[:2]
+    if resize_height > height:
+
+        # 定义放大倍数
+        scale_factor = float(resize_height / height)
+
+        # 计算新的宽度和高度
+        new_width = int(width * scale_factor)
+        new_height = int(height * scale_factor)
+
+        # 使用插值方法进行图像放大
+        enlarged_image = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
+        return enlarged_image
+    else:
+        # 定义缩小倍数
+        scale_factor = float(height / resize_height)  # 0.5表示缩小为原来的一半大小
+
+        # 使用插值方法进行图像缩小
+        smaller_image = cv2.resize(img, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
+        return smaller_image
+
+def extract_white_prior(img, threshold=200):
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+
+    # 设定阈值，将非白色部分二值化为黑色
+    ret, binary_image = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY)
+
+    return binary_image
+
+
+index = 0
+t = 140
+def detect_subtitle(org_img: np.ndarray, lastSubTitle, last_confidence) -> Tuple[Union[str, None], float]:
    """检测当前画面得到字幕信息

    Args:
@@ -348,71 +397,151 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
    Returns:
        Tuple[Union[str, None]]: 字幕信息(没有字幕时返回None)和置信度
    """
-    subTitle = ''
+    
+    ocr_res = ""
    # up_b = 276
    # down_b = 297
-    height = down_b - up_b
-    img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
-   
-    # 针对低帧率的视频做图像放大处理
-    print(height)
-    print(up_b)
-    print(down_b)
-    print(img.shape)
-    if img.shape[1] < 1000:
-        img = cv2.resize(img, (int(img.shape[1] * 1.5), int(img.shape[0] * 1.5)))
-    cv2.imwrite('./cap.png', img)
-    res = ocr.ocr(img, cls=True)
-    print('--------> res', res)
-    sorted(res, key=lambda text: text[0][0][1])
-    sorted(res, key=lambda text: text[0][0][0])
-    if len(res) == 0:
-        return None, 0
-    possible_txt = []
-    conf = 0
-    print('res --------->', res)
-    for x in res:
-        # cv2.imshow("cut", img)
-        # cv2.waitKey(0)
-        # cv2.destroyAllWindows()
-        rect, (txt, confidence) = x
-        font_size = rect[2][1] - rect[0][1]
-        mid = (rect[0][0] + rect[1][0]) / 2
-        gradient = np.arctan(abs((rect[1][1] - rect[0][1]) / (rect[1][0] - rect[0][0])))
-        # log.append("文本：{}，置信度：{}，中心点：{}，斜率：{}，字体大小：{}".format(txt, confidence, mid / img.shape[1], gradient,
-        # font_size)) 置信度>0.7 & 斜率<0.1 & 字幕偏移量<=25 & 字幕中心在画面宽的0.4-0.6之间
-        print("文本：{}，置信度：{}，中心点：{}，斜率：{}，字体大小：{}".format(txt, confidence, mid / img.shape[1], gradient, font_size))
-        print("字体大小差距: {}", format(height - font_size))
-        print("高度中心:{}".format((rect[0][1] + rect[1][1])/2/img.shape[0]))
-        conf_thred1 = 0.7
-        conf_thred2 = 0.85
-        # conf_thred1 = 0.1
-        # conf_thred2 = 0.4
-        # conf_thred1 = 0.5
-        # conf_thred2 = 0.7
-        if (rect[0][1] + rect[1][1])/2/img.shape[0] > 0.5 or (rect[0][1] + rect[1][1])/2/img.shape[0] <= 0.1:
+    global ocr_positions
+    # ocr_positions.append([676, 712])
+    h = None
+    global index
+    for i in range(len(ocr_positions)):
+        img = copy.deepcopy(org_img)
+        up_b = ocr_positions[i][0]
+        down_b = ocr_positions[i][1]
+        height = down_b - up_b
+        if len(ocr_positions) == 1:
+            img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
+        else:
+            # cropped_img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
+            cropped_img = img[int(up_b):int(down_b)]
+            # cropped_img = resize_img(cropped_img)
+            # x = float(150 / height)
+            # img_h, img_w = cropped_img.shape[:2]
+            # img_h = int(img_h * x)
+            # img_w = int(img_w * x)
+            # cropped_img = cv2.resize(cropped_img, (img_w, img_h))
+            
+            # 定义要添加的上下空白的高度
+            padding_top = height*0.7
+            padding_bottom = height*0.7
+            # padding_top = 150
+            # padding_bottom = 150
+
+            # 计算新图像的高度
+            new_height = cropped_img.shape[0] + padding_top + padding_bottom
+
+            # 创建一个新的空白图像
+            img = np.zeros((int(new_height), cropped_img.shape[1], 3), dtype=np.uint8)
+
+            # 将裁剪后的图像放置在新图像中间
+            start_y = int(padding_top)
+            end_y = start_y + cropped_img.shape[0]
+            img[start_y:end_y, :] = cropped_img
+            
+            # _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
+            # global index
+            # cv2.imwrite(f'./cap/cap{index}.png', img)
+            
+            
+        # img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
+
+        # # ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        # # img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
+        
+        # t = 230
+        # _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
+        # cv2.imwrite(f'./test2.png', img)
+        
+
+        # index = index + 1
+        # img = img[int(up_b - height*0.2):int(down_b + height*0.2)]
+        # 针对低帧率的视频做图像放大处理
+        print(height)
+        print(up_b)
+        print(down_b)
+        print(img.shape)
+        if img.shape[1] < 1000:
+            img = cv2.resize(img, (int(img.shape[1] * 1.5), int(img.shape[0] * 1.5)))
+
+        
+
+        # img = extract_white_prior(img)
+
+        cv2.imwrite(f'./cap/cap{index}.png', img)
+        index = index + 1
+        print(">>>>>>>>>>>>>>>>>>>>>>>>>>>new log" + str(index - 1))
+        res = ocr.ocr(img, cls=True)
+        print('--------> res', res)
+        sorted(res, key=lambda text: text[0][0][1])
+        sorted(res, key=lambda text: text[0][0][0])
+        if len(res) == 0:
            continue
-        # TODO 字幕去重算法改进
-        if confidence > conf_thred1 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6:
-            subTitle += txt
-            conf = max(conf,confidence)
-            # possible_txt.append([txt, mid/img.shape[1]])
-            possible_txt.append(txt)
-        # 如果字幕在一行中分为两个（或以上）对话文本
-        elif confidence > conf_thred2 and gradient < 0.1:
-            if 0.3 < mid / img.shape[1] < 0.4 or 0.6 < mid / img.shape[1] < 0.7:
+            # return None, 0, None
+        possible_txt = []
+        subTitle = ''
+        conf = 0
+        print('res --------->', res)
+        res.sort(key=lambda rect: rect[0][0][0] + rect[0][1][0])   # 按照中心点排序
+        for x in res:
+            # cv2.imshow("cut", img)
+            # cv2.waitKey(0)
+            # cv2.destroyAllWindows()
+            rect, (txt, confidence) = x
+            font_size = rect[2][1] - rect[0][1]
+            mid = (rect[0][0] + rect[1][0]) / 2
+            gradient = np.arctan(abs((rect[1][1] - rect[0][1]) / (rect[1][0] - rect[0][0])))
+            # log.append("文本：{}，置信度：{}，中心点：{}，斜率：{}，字体大小：{}".format(txt, confidence, mid / img.shape[1], gradient,
+            # font_size)) 置信度>0.7 & 斜率<0.1 & 字幕偏移量<=25 & 字幕中心在画面宽的0.4-0.6之间
+            print("文本：{}，置信度：{}，中心点：{}，斜率：{}，字体大小：{}".format(txt, confidence, mid / img.shape[1], gradient, font_size))
+            print("字体大小差距: {}", format(height - font_size))
+            print("高度中心:{}".format((rect[0][1] + rect[1][1])/2/img.shape[0]))
+            # if confidence < 0.95:
+            #     # global t
+            #     print("小于0.95，重新检测，阈值为：" + str(t))
+            #     t = t + 20
+            #     return detect_subtitle(org_img, lastSubTitle, last_confidence)
+            if h == None:
+                h = font_size
+            conf_thred1 = 0.7
+            conf_thred2 = 0.85
+            # conf_thred1 = 0.1
+            # conf_thred2 = 0.4
+            # conf_thred1 = 0.5
+            # conf_thred2 = 0.7
+            if (rect[0][1] + rect[1][1])/2/img.shape[0] > 0.5 or (rect[0][1] + rect[1][1])/2/img.shape[0] <= 0.1:
+                continue
+            # TODO 字幕去重算法改进
+            if confidence > conf_thred1 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6:
+                subTitle += txt
+                conf = max(conf,confidence)
                # possible_txt.append([txt, mid/img.shape[1]])
                possible_txt.append(txt)
-                conf = max(conf, confidence)
-    # sorted(possible_txt, key=lambda pos : pos[1])
-    # print(possible_txt)
-    if len(possible_txt) >= 2:
-        # subTitle = ' '.join([x[0] for x in possible_txt])
-        subTitle = ' '.join(possible_txt)
-    print(subTitle, conf)
-    if len(subTitle) > 0:
-        return subTitle, conf
-    return None, 0
+            # 如果字幕在一行中分为两个（或以上）对话文本
+            elif confidence > conf_thred2 and gradient < 0.1:
+                if 0.3 < mid / img.shape[1] < 0.4 or 0.6 < mid / img.shape[1] < 0.7:
+                    # possible_txt.append([txt, mid/img.shape[1]])
+                    possible_txt.append(txt)
+                    conf = max(conf, confidence)
+        # sorted(possible_txt, key=lambda pos : pos[1])
+        # print(possible_txt)
+        if len(possible_txt) >= 2:
+            # subTitle = ' '.join([x[0] for x in possible_txt])
+            subTitle = ' '.join(possible_txt)
+        print(subTitle, conf)
+        if len(subTitle) > 0:
+            ocr_res = ocr_res + subTitle
+    if (len(ocr_res)) >0:
+        print(">>>>>>>>>>>>cur subtitle:" + ocr_res + ",confidence: " + str(confidence) + ",last_confidence: " + str(last_confidence))
+        # if len(ocr_positions) == 1 and last_confidence != None and confidence != None and confidence < last_confidence:
+        #     ocr_res = lastSubTitle
+        #     confidence = last_confidence
+        # print(">>>>>>>>>>>>res subtitle:" + ocr_res + ",confidence: " + str(confidence) + ",last_confidence: " + str(last_confidence))
+        return ocr_res, confidence, conf, h
+    if check_have_ocr(img):
+        return "err", None, 0, None
+    return None, None, 0, None
+


 def process_video(video_path: str, begin: float, end: float, book_path: str, sheet_name: str, state=None, mainWindow: MainWindow=None):
@@ -440,6 +569,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
    video = cv2.VideoCapture(video_path)
    fps = video.get(cv2.CAP_PROP_FPS)
    lastSubTitle = None
+    last_confidence = None
    lastConf = 0
    # res是在视频遍历过程中获取的字幕文件，不掺杂对旁白的分析
    res = []
@@ -448,13 +578,16 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
    end_time = 0
    video.set(cv2.CAP_PROP_POS_MSEC, begin * 1000)
    pre_state = state[0]
+    ocr_h = None
    while True:
        _, frame = video.read()
        if frame is None:
            break
        cnt += 1
        cur_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
+        
        # 判断当前帧是否已超限制
+        # end 主要用来判断是否越界
        if cur_time > end:
            if cur_time - end_time > 1:
                print('--------------------------------------------------')
@@ -466,7 +599,8 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
                # 判断当前是否有字幕需要被保存下来
            if end_time < start_time:
                # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end, 2), lastSubTitle, ''])
-                add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end, 3), lastSubTitle, ''])
+                add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end, 3), lastSubTitle, ''],ocr_h=ocr_h)
+                print(">>>>>>subtitle,ocr_h1:" + str(lastSubTitle) + ">>>" + str(ocr_h))
            break
        # 每秒取4帧画面左右
        # TODO 取帧算法优化
@@ -479,7 +613,10 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
            mainWindow.projectContext.nd_process = state[0]
            mainWindow.projectContext.last_time = cur_time

-            subTitle, conf = detect_subtitle(frame)
+            subTitle, confidence, conf, cur_ocr_h = detect_subtitle(frame, lastSubTitle, last_confidence)
+            if subTitle == "err":
+                continue
+            
            if subTitle is not None:

                subTitle = normalize(subTitle)
@@ -488,6 +625,8 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she

            # 第一次找到字幕
            if lastSubTitle is None and subTitle is not None:
+                if cur_ocr_h != None:
+                    ocr_h = cur_ocr_h
                start_time = cur_time

            # 字幕消失
@@ -500,14 +639,17 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
                        (res[-1][0] - res[-2][1]) * normal_speed)
                    # write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
                    # add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
-                    add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens])
+                    add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens],ocr_h)
                print(start_time, end_time, lastSubTitle)

                # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
-                add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''])
+                add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''],ocr_h)
+                print(">>>>>>subtitle,ocr_h2:" + str(lastSubTitle) + ">>>" + str(ocr_h))
            elif lastSubTitle is not None and subTitle is not None:
                # 两句话连在一起，但是两句话不一样
                if string_similar(lastSubTitle, subTitle) < 0.6:
+                    if cur_ocr_h != None:
+                        ocr_h = cur_ocr_h
                    end_time = cur_time
                    res.append([start_time, end_time, lastSubTitle])
                    if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
@@ -516,24 +658,98 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
                            (res[-1][0] - res[-2][1]) * normal_speed)
                        # write_to_sheet(book_path, sheet_name, ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
                        # add_to_list(mainWindow, "旁白", ['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
-                        add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens])
+                        add_to_list(mainWindow, "旁白", ['', '', '', '%d' % recommend_lens],ocr_h)
                    print(start_time, end_time, lastSubTitle)
                    # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
-                    add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''])
+                    add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''],ocr_h)
+                    print(">>>>>>subtitle,ocr_h3:" + str(lastSubTitle) + ">>>" + str(ocr_h))
                    start_time = end_time
                else:
                    lastSubTitle = subTitle if conf > lastConf else lastSubTitle
                    continue
            # 当前字幕与上一段字幕不一样
-            lastSubTitle = subTitle
-            lastConf = conf
+            if subTitle != "err":
+                lastSubTitle = subTitle
+                last_confidence = confidence
+                lastConf = conf
+    print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>end 1")
+    # print(ocr_h_map)
+    # mainWindow.refresh_tab_slot()
+
+    while(mainWindow.refresh_flag == True):
+        time.sleep(1)
+    mainWindow.detect_lock = True
+    try:
+        process_err_ocr(mainWindow)
+    finally:
+        mainWindow.detect_lock = False
+    # mainWindow.refresh_lock.acquire()
+    # try:
+    #     process_err_ocr(mainWindow)
+    # finally:
+    #     mainWindow.refresh_lock.release()
+    
+    

-def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
+def process_err_ocr(mainWindow):
+    # if 1==1:
+    #     return
+    for i in range (len(mainWindow.projectContext.all_elements)):
+        if mainWindow.projectContext.all_elements[i].subtitle != None and mainWindow.projectContext.all_elements[i].subtitle != "":
+            ocr_h_map[i] =  mainWindow.projectContext.all_elements[i].ocr_h
+
+    print(ocr_h_map)
+
+
+    data = list(ocr_h_map.values())
+    print(">>>>values")
+    print(ocr_h_map.values())
+    table_indexs = list(ocr_h_map.keys())
+    print(table_indexs)
+    # 计算均值和标准差
+    mean = np.mean(data)
+    std_dev = np.std(data)
+
+    # 定义阈值（例如，可以选择 2 倍标准差作为阈值）
+    threshold = 2.7 * std_dev
+    print(mean)
+    print(threshold)
+
+    rm_list = []
+    for i, x in enumerate(data):
+        if abs(x - mean) > threshold:
+            rm_list.append(i)
+
+    print(rm_list)
+    # while(mainWindow.refresh_tab_timer.isActive()):
+    #     time.sleep(1)
+    print(len(mainWindow.projectContext.all_elements))
+    for i in range(len(rm_list)):
+        try:
+            print(">>>>>>>>>will rm" + str(table_indexs[rm_list[i]] + 1 - i) + ", subtitle:" + mainWindow.projectContext.all_elements[table_indexs[rm_list[i]] - i].subtitle)
+            print(table_indexs[rm_list[i]])
+            mainWindow.del_line_operation_slot(row = table_indexs[rm_list[i]] + 1 - i, show_msg_flag = False)
+            time.sleep(0.5)
+        except Exception as e:
+            print(">>>>>>>>>>>>>>>>>>>>>>del err")
+            print(e)
+
+    # mainWindow.refresh_all_tab_slot()
+
+def add_to_list(mainWindow: MainWindow, element_type: str, li: list, ocr_h : int = None):
    # 默认使用配置文件中的语速
    speed = mainWindow.projectContext.speaker_speed
    aside_head_time = float(reverse_time_to_seconds(mainWindow.aside_head_time)) if mainWindow.aside_head_time != None else float(0)
    
    st_time_sec, ed_time_sec, subtitle, suggest = li
+
+    # global table_index
+    # if ocr_h != None and element_type == "字幕":
+    #     index = table_index
+    #     ocr_h_map[index] = ocr_h
+    #     print(">>>>>>>>>>>>>>>>>>ocr H map:" + str(index) + ",subtitle:" + subtitle)
+    #     print(ocr_h_map)
+    
    print(">>>>>>>>start time:")
    print(aside_head_time)
    print(st_time_sec)
@@ -541,11 +757,12 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
        print(">>>>>>need del")
        print(st_time_sec)
        if not mainWindow.add_head_aside:
-            new_element = Element('0.00', "", "", "0/100", "",speed)
+            new_element = Element('0.00', "", "", "0/100", "",speed,ocr_h)
            mainWindow.projectContext.aside_list.append(new_element)
            mainWindow.projectContext.all_elements.append(mainWindow.projectContext.aside_list[-1])
            mainWindow.last_aside_index = len(mainWindow.projectContext.all_elements) - 1
            mainWindow.add_head_aside = True
+            # table_index = table_index + 1
        return
    
    st_time_sec, ed_time_sec = str(st_time_sec), str(ed_time_sec)
@@ -553,7 +770,7 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
    aside = ""
    i = len(mainWindow.projectContext.all_elements)
    if element_type == "字幕":
-        new_element = Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed)
+        new_element = Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed,ocr_h)
        new_element.print_self()
        if mainWindow.last_aside_index != None and mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec == "" and new_element.ed_time_sec != "":
            mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec = new_element.st_time_sec
@@ -561,9 +778,11 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
            #    print(">>>>>>>>>>>remove short aside")
               mainWindow.projectContext.aside_list.remove(mainWindow.projectContext.all_elements[mainWindow.last_aside_index])
               mainWindow.projectContext.all_elements.remove(mainWindow.projectContext.all_elements[mainWindow.last_aside_index])
+            #    table_index = table_index - 1
               mainWindow.last_aside_index = None
        mainWindow.projectContext.subtitle_list.append(new_element)
        mainWindow.projectContext.all_elements.append(mainWindow.projectContext.subtitle_list[-1])
+        # table_index = table_index + 1
    else:
        if i == 0:
            st_time_sec = "0.01"
@@ -574,7 +793,7 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):

        # 因为暂时没有用到ed_time_sec，所以直接赋值空吧
        ed_time_sec = ""
-        new_element = Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed)
+        new_element = Element(st_time_sec, ed_time_sec, subtitle, suggest, aside,speed, ocr_h)
        new_element.print_self()
        if mainWindow.last_aside_index != None and mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec == "" and new_element.ed_time_sec != "":
            mainWindow.projectContext.all_elements[mainWindow.last_aside_index].ed_time_sec = new_element.st_time_sec
@@ -582,15 +801,18 @@ def add_to_list(mainWindow: MainWindow, element_type: str, li: list):
            #    print(">>>>>>>>>>>remove short aside")
               mainWindow.projectContext.aside_list.remove(mainWindow.projectContext.all_elements[mainWindow.last_aside_index])
               mainWindow.projectContext.all_elements.remove(mainWindow.projectContext.all_elements[mainWindow.last_aside_index])
+            #    table_index = table_index - 1
               mainWindow.last_aside_index = None
        new_element.suggest = "0/" + new_element.suggest
        if (st_time_sec != None and st_time_sec != "" and aside_head_time > float(st_time_sec)):
            return
        mainWindow.projectContext.aside_list.append(new_element)
        mainWindow.projectContext.all_elements.append(mainWindow.projectContext.aside_list[-1])
+        # table_index = table_index + 1
        mainWindow.last_aside_index = len(mainWindow.projectContext.all_elements) - 1
+    

-
+# end_time 主要用来判断是否越界
 def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time: float, state=None, mainWindow: MainWindow=None):
    """使用ocr检测视频获取字幕并输出旁白推荐

@@ -616,7 +838,11 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
        up_b, down_b = context.caption_boundings[0], context.caption_boundings[1]
    else:
        # 此处start_time + 300是为了节省用户调整视频开始时间的功夫（强行跳过前5分钟）
-        up_b, down_b = get_position(video_path, 0, mainWindow.rate, mainWindow.rate_bottom)
+        # up_b, down_b = get_position(video_path, 0, mainWindow.rate, mainWindow.rate_bottom)
+        get_position(video_path, 0, mainWindow.ocr_ranges)
+        print(">>>>>positions:")
+        global ocr_positions
+        print(ocr_positions)
        context.caption_boundings = [up_b, down_b]

    context.detected = True
@@ -627,8 +853,37 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
    # print("process the total video at time: ", datetime.datetime.now())
    process_video(video_path, start_time, end_time, book_name_xlsx, sheet_name_xlsx, state, mainWindow)

+def check_have_ocr(img):
+    new_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    t = 230
+    _, new_img = cv2.threshold(new_img, t,255, cv2.THRESH_BINARY)
+    if np.any(new_img == 255):
+        return True
+    else:
+        return False
+

 if __name__ == '__main__':
    # path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
    path = "C:/Users/Smile/Desktop/accessibility-movie/"
    # print("get_pos:", get_position(path, 0))
+    # evaluate_position("C:/Users/AIA/Desktop/1/1.mp4", 0)
+    # img = cv2.imread("./cap/cap879.png")
+    img = cv2.imread("./cap/cap812.png", cv2.IMREAD_GRAYSCALE)
+    # img = cv2.equalizeHist(img)
+    t = 230
+    _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
+    # img = resize_img(img)
+    detect_subtitle(img, None, None)
+
+    # img = cv2.equalizeHist(img)
+    # t = 120
+    # _, img = cv2.threshold(img, t,255, cv2.THRESH_BINARY)
+    
+    # # ret, binary_image = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+    # # binary_image = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
+    cv2.imwrite(f'./binary2.png', img)
+    if np.any(img == 255):
+        print("111111")
+    else:
+        print("222222")
--- a/main_window.py
+++ b/main_window.py
@@ -13,7 +13,7 @@ import os
 import cv2

 from PyQt5 import QtWidgets
-from PyQt5.QtWidgets import QMainWindow, QFileDialog, QTableWidget, QTableWidgetItem, QAbstractItemView, QProgressBar, QLabel, QApplication, QPushButton, QMenu
+from PyQt5.QtWidgets import QMainWindow, QFileDialog, QTableWidget, QTableWidgetItem, QAbstractItemView, QProgressBar, QLabel, QApplication, QPushButton, QMenu, QDialog, QVBoxLayout
 from PyQt5.QtCore import QUrl, Qt, QTimer, QRect, pyqtSignal, QPersistentModelIndex
 from PyQt5.QtMultimedia import *
 from PyQt5.QtGui import QIcon
@@ -43,6 +43,7 @@ from excel_utils import read_xls,read_xlsx
 from ffmpeg_util import adjust_audio_volume

 from PyQt5.QtCore import QThread;
+import threading


 # from emit_import_event import emit_import_event
@@ -71,13 +72,57 @@ class WorkerThread(QThread):
        for t in self.main_window.threads:
            t.start()

-        
-
-        
-    
    def refresh(self):
        self.main_window.import_process_sign.emit(self.elements)

+class ProcessErrThread(QThread):
+    def __init__(self, main_window):
+        super().__init__()
+        self.main_window = main_window
+
+    def run(self):
+        from narratage_detection import process_err
+        t = RunThread(funcName=process_err,args=(self.main_window, ),name="process_err")
+        t.setDaemon(True)
+        t.start()
+
+class AutoCloseDialog(QDialog):
+    def __init__(self, parent = None):
+        super(AutoCloseDialog, self).__init__(parent)
+
+        self.setWindowFlags(Qt.CustomizeWindowHint | Qt.NoDropShadowWindowHint)
+        layout = QVBoxLayout()
+        label = QLabel("字幕边界定位中，请稍后！")
+        layout.addWidget(label)
+        self.setLayout(layout)
+
+class LocalOcrThread(QThread):
+    def __init__(self, main_window,path):
+        super().__init__()
+        self.main_window = main_window
+        self.path = path
+
+    def run(self):
+        # 在后台执行耗时操作
+
+        state = [None]
+        self.main_window.state = state
+        state[0] = 0.1
+        self.main_window.threads = []
+
+        t = RunThread(funcName=self.main_window.auto_location_ocr,
+                    args=(self.path, ),
+                    name="auto_location_ocr")
+        t.setDaemon(True)
+        self.main_window.threads.append(t)
+        self.main_window.all_threads.append(t)
+
+        for t in self.main_window.threads:
+            t.start()
+
+    def finish(self):
+        self.main_window.location_ocr_sign.emit()
+
 class CustomDelegate(QtWidgets.QStyledItemDelegate):
    def paint(self, painter, option, index):
        # Customize the painting behavior for the specific column
@@ -92,9 +137,11 @@ class MainWindow(QMainWindow, Ui_MainWindow):
    renew_signal = pyqtSignal(str)
    import_process_sign = pyqtSignal(list)
    refresh_sign = pyqtSignal()
+    location_ocr_sign = pyqtSignal()

    def __init__(self, project_path):
        super(MainWindow, self).__init__()
+        self.location_ocr_sign.connect(self.finish_location_ocr)
        self.last_aside_index = None
        self.setupUi(self)
        self.statusbar.showMessage("hello", 5000)
@@ -150,6 +197,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        # 所有QTimer集中管理
        self.import_excel_timer = QTimer()
        self.import_excel_timer.timeout.connect(self.check_if_import_excel_over)
+        self.location_ocr_timer = QTimer()
+        self.location_ocr_timer.timeout.connect(self.check_if_location_ocr)
        self.detect_timer = QTimer()
        self.detect_timer.timeout.connect(self.check_if_detect_over_slot)
        self.synthesis_timer = QTimer()
@@ -161,7 +210,14 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        self.video_timer.start(1000)  # todo 作为参数配置
        self.refresh_tab_timer = QTimer()
        self.refresh_tab_timer.timeout.connect(self.refresh_tab_slot)
-
+        self.up_ocr_timer = QTimer()
+        self.down_ocr_timer = QTimer()
+        self.up_ocr_timer.timeout.connect(self.up_ocr_timer_func)
+        self.down_ocr_timer.timeout.connect(self.down_ocr_timer_func)
+        self.up_ocr_bottom_timer = QTimer()
+        self.down_ocr_bottom_timer = QTimer()
+        self.up_ocr_bottom_timer.timeout.connect(self.up_ocr_bottom_timer_func)
+        self.down_ocr_bottom_timer.timeout.connect(self.down_ocr_bottom_timer_func)
        """状态栏相关空间

        """
@@ -219,18 +275,28 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        self.insert_aside_from_now_btn.clicked.connect(
            self.insert_aside_from_now_slot)
        self.insert_aside_from_now_btn.setEnabled(False)
-        self.up_ocr_btn.clicked.connect(
+        self.up_ocr_btn.pressed.connect(
            self.up_ocr)
-        self.down_ocr_btn.clicked.connect(
+        self.up_ocr_btn.released.connect(
+            self.up_ocr_stop)
+        self.down_ocr_btn.pressed.connect(
            self.down_ocr)
-        self.up_ocr_bottom_btn.clicked.connect(
+        self.down_ocr_btn.released.connect(
+            self.down_ocr_stop)
+        self.up_ocr_bottom_btn.pressed.connect(
            self.up_ocr_bottom)
-        self.down_ocr_bottom_btn.clicked.connect(
+        self.up_ocr_bottom_btn.released.connect(
+            self.up_ocr_bottom_stop)
+        self.down_ocr_bottom_btn.pressed.connect(
            self.down_ocr_bottom)
+        self.down_ocr_bottom_btn.released.connect(
+            self.down_ocr_bottom_stop)
+        self.confirm_ocr_btn.clicked.connect(
+            self.confirm_ocr)
        self.confirm_head_aside_btn.clicked.connect(
            self.confirm_head_aside)
        self.detect_btn.clicked.connect(
-            self.show_detect_dialog)
+            self.show_confirmation_dialog)


        """视频预览相关信息
@@ -398,6 +464,10 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        self.sld_video.setFocus()
        self.aside_head_time = None
        self.add_head_aside = False
+        self.ocr_ranges = []
+        self.refresh_lock = threading.Lock()
+        self.detect_lock = False
+        self.refresh_flag = False

        # 打印到log文件中
        t = RunThread(funcName=make_print_to_file, args=os.path.join(os.getcwd(), 'log'), name="logging")
@@ -407,6 +477,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        get_focus_thread.setDaemon(True)
        get_focus_thread.start()

+    def finish_location_ocr(self):
+        self.import_excel_dialog.show_with_msg("字幕定位结束，请检查是否准确，并调整正确")
+    
+    def show_confirmation_dialog(self):
+        confirm_box = QtWidgets.QMessageBox.question(self, u'警告', u'确认已经校准OCR范围（只包含中文，不含英文）?',
+                                               QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No)
+        if confirm_box == QtWidgets.QMessageBox.Yes:
+            self.show_detect_dialog()
+        else:
+            print(">>>>>>show_confirmation_dialog")

    def getFocus(self):
        while(True):
@@ -496,11 +576,20 @@ class MainWindow(QMainWindow, Ui_MainWindow):

        """
        if self.rate == None:
-            self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
-            return
+            # self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
+            h = self.widget.get_h()
+            video_h = self.wgt_video.height()
+            self.rate = float(h-10)/float(video_h)
        if self.rate_bottom == None:
-            self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
+            # self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
+            h = self.widget_bottom.get_h()
+            video_h = self.wgt_video.height()
+            self.rate_bottom = float(h-6)/float(video_h)
+        if not self.check_ocr_rate():
+            self.prompt_dialog.show_with_msg("字幕上边界不能低于下边界")
            return
+        if len(self.ocr_ranges) == 0:
+            self.ocr_ranges.append([self.rate, self.rate_bottom])
        self.detect_dialog.init_self()
        self.detect_dialog.show()
    
@@ -601,9 +690,39 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        self.action_operate.setEnabled(True)
        self.action_insert_aside_from_now.setEnabled(True)
        self.insert_aside_from_now_btn.setEnabled(True)
+        # self.import_excel_dialog.show_with_msg("正在自动定位字幕边界，请稍后！")
+        confirm_box = QtWidgets.QMessageBox.question(self, u'警告', u'是否需要自动定位字幕边界?',
+                                               QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No)
+        if confirm_box == QtWidgets.QMessageBox.Yes:
+            self.location_ocr_thread = LocalOcrThread(self, path)
+            self.location_ocr_thread.start()
+            self.location_ocr_timer.start(1000)
+            # self.auto_close_dialog = AutoCloseDialog()
+            # self.auto_close_dialog.exec_()
+        else:
+            print(">>>>>>>>>>>>>play_video")
+        
+        
+        

-    def up_ocr(self):
-        self.widget.change_painter_flag(True)
+    def auto_location_ocr(self, path):
+        from detect_with_ocr import evaluate_position
+        print(">>>>>>>>>>>>>>>>>>>>>v_path" + path.path()[1:])
+        y1,y2 = evaluate_position(path.path()[1:], 0)
+        video_h = self.wgt_video.height()
+        self.widget.setY(int(video_h * y1) + 6)
+        time.sleep(1)
+        self.widget_bottom.setY(int(video_h * y2) + 10)
+        print("y1:%d,y2:%d" %(y1,y2))
+
+    def check_ocr_rate(self):
+        if self.rate > self.rate_bottom:
+            return False
+        else:
+            return True
+
+    def up_ocr_timer_func(self):
+        # self.widget.change_painter_flag(True)
        h = self.widget.up(3)
        video_h = self.wgt_video.height()
        self.rate = float(h-10)/float(video_h)
@@ -612,9 +731,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        print(self.wgt_video.height())
        print(">>>>>>>>>rate" + str(self.rate))

-
-    def down_ocr(self):
-        self.widget.change_painter_flag(True)
+    def down_ocr_timer_func(self):
+        # self.widget.change_painter_flag(True)
        h = self.widget.down(3)
        video_h = self.wgt_video.height()
        self.rate = float(h-10)/float(video_h)
@@ -622,19 +740,70 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        print(self.wgt_video.height())
        print(">>>>>>>>>rate" + str(self.rate))

-    def up_ocr_bottom(self):
+    def up_ocr(self):
+        self.user_editing_content = True
+        self.up_ocr_timer.start(50)
+        # self.widget.change_painter_flag(True)
+        # h = self.widget.up(3)
+        # video_h = self.wgt_video.height()
+        # self.rate = float(h-10)/float(video_h)
+        # print(">>>>>video_h: "+str(video_h))
+        # print(">>>>>up h:" + str(h))
+        # print(self.wgt_video.height())
+        # print(">>>>>>>>>rate" + str(self.rate))
+    def up_ocr_stop(self):
+        self.user_editing_content = False
+        self.up_ocr_timer.stop()
+    def down_ocr_stop(self):
+        self.user_editing_content = False
+        self.down_ocr_timer.stop()
+    def down_ocr(self):
+        self.user_editing_content = True
+        self.down_ocr_timer.start(50)
+        # self.widget.change_painter_flag(True)
+        # h = self.widget.down(3)
+        # video_h = self.wgt_video.height()
+        # self.rate = float(h-10)/float(video_h)
+        # print(">>>>>down h:" + str(h))
+        # print(self.wgt_video.height())
+        # print(">>>>>>>>>rate" + str(self.rate))
+
+    def up_ocr_bottom_timer_func(self):
        self.widget_bottom.change_painter_flag(True)
        h = self.widget_bottom.up(3)
        video_h = self.wgt_video.height()
        self.rate_bottom = float(h-6)/float(video_h)

-
-    def down_ocr_bottom(self):
+    def down_ocr_bottom_timer_func(self):
        self.widget_bottom.change_painter_flag(True)
        h = self.widget_bottom.down(3)
        video_h = self.wgt_video.height()
        self.rate_bottom = float(h-6)/float(video_h)

+    def up_ocr_bottom_stop(self):
+        self.user_editing_content = False
+        self.up_ocr_bottom_timer.stop()
+    def down_ocr_bottom_stop(self):
+        self.user_editing_content = False
+        self.down_ocr_bottom_timer.stop()
+
+    def up_ocr_bottom(self):
+        self.user_editing_content = True
+        self.up_ocr_bottom_timer.start(50)
+        # self.widget_bottom.change_painter_flag(True)
+        # h = self.widget_bottom.up(3)
+        # video_h = self.wgt_video.height()
+        # self.rate_bottom = float(h-6)/float(video_h)
+
+
+    def down_ocr_bottom(self):
+        self.user_editing_content = True
+        self.down_ocr_bottom_timer.start(50)
+        # self.widget_bottom.change_painter_flag(True)
+        # h = self.widget_bottom.down(3)
+        # video_h = self.wgt_video.height()
+        # self.rate_bottom = float(h-6)/float(video_h)
+
    def refresh_on_import(self):
        print(">>>>>>>refresh in")
        self.refresh_tab_slot()
@@ -877,6 +1046,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        """        
        self.check_if_over("旁白导入")

+    def check_if_location_ocr(self):
+        self.check_if_over("字幕定位")
+        alive = True
+        for t in self.threads:
+            alive = alive and t.is_alive()
+        if not alive:
+            self.location_ocr_timer.stop()
+            # self.auto_close_dialog.close()
+            self.threads = []
+
    # type = 检测 或 合成 或 导出
    def check_if_over(self, type: str):
        """确认传入的待检测任务是否完成
@@ -904,9 +1083,16 @@ class MainWindow(QMainWindow, Ui_MainWindow):
            elif type == "检测":
                self.detect_timer.stop()
                self.refresh_tab_timer.stop()
+                # t = ProcessErrThread(self)
+                # t.start()
+                # from narratage_detection import process_err
+                # process_err(self)
+                
            elif type == "旁白导入":
                self.import_excel_timer.stop()
                # self.refresh_tab_timer.stop()
+            elif type == "字幕定位":
+                self.location_ocr_timer.stop()
            else:
                self.export_timer.stop()

@@ -921,6 +1107,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
            self.progressBar.setValue(100)
            self.progressLabel.setText(f"100%")
            self.projectContext.nd_process = 1
+            self.threads = []

    def deal_synthesis_callback_slot(self, threads, state):
        """实现旁白音频合成任务状态在界面中的实时显示，更新界面中的对应变量，每5s更新一次任务状态
@@ -1305,9 +1492,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
                item = QTableWidgetItem(text)
                item.setTextAlignment(Qt.AlignCenter)
                # 设置为不可编辑
-                if self.checkIfTableItemCanChange(table, idx, j) == False:
+                # if self.checkIfTableItemCanChange(table, idx, j) == False:
                    # item.setFlags(Qt.ItemIsEnabled)
-                    print(1)
+                    # print(">>>>>>>>>setElememtToTable")
                table.setItem(idx, j, item)

    # 只有Content页的字幕列和 Aside页的字幕列 可编辑
@@ -1776,7 +1963,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):

            pre_item = self.all_tableWidget.item(row, col - 1)
            suggest = pre_item.text()
-            if suggest != None and suggest != "":
+            print(">>>>>>>>suggest:" + suggest)
+            
+            if col == constant.Content.AsideColumnNumber and suggest != None and suggest != "":
                arrays = suggest.split("/")
                if len(arrays) == 2:
                    suggest =  str(len(text)) + "/" + arrays[1]
@@ -1807,6 +1996,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
                    # self.all_tableWidget.setItem(
                    #     int(idx), constant.Content.SpeedColumnNumber, QTableWidgetItem(text))
                    self.projectContext.refresh_speed(row, text)
+                elif col == constant.Content.SubtitleColumnNumber:
+                    self.projectContext.refresh_subtitle(row, text)
                # self.all_tableWidget_idx = int(row)
                # self.set_table_to_window(False)

@@ -1942,8 +2133,13 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        将表格内容更新至界面中，并保存当前工程内容

        """
-        self.set_table_to_window(need_refresh_all=False)
-        self.projectContext.save_project(False)
+        if not self.detect_lock:
+            self.refresh_flag = True
+            try:
+                self.set_table_to_window(need_refresh_all=False)
+                self.projectContext.save_project(False)
+            finally:
+                self.refresh_flag = False

    def refresh_all_tab_slot(self):
        """刷新整个表格
@@ -2274,7 +2470,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        self.prompt_dialog.show_with_msg("操作成功！！请查看变化")

    # 只有row起作用
-    def del_line_operation_slot(self, row: int, start_time="0", end_time="0", subtitle="", suggest="", aside="", speed="", refresh_flag = True):
+    def del_line_operation_slot(self, row: int, start_time="0", end_time="0", subtitle="", suggest="", aside="", speed="", refresh_flag = True, show_msg_flag = True):
        """删除一行

        Args:
@@ -2313,7 +2509,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        self.projectContext.all_elements.pop(int(row)-1)
        if refresh_flag:
            self.refresh_tab_slot()
-            self.prompt_dialog.show_with_msg("操作成功！！请查看变化")
+            if show_msg_flag:
+                self.prompt_dialog.show_with_msg("操作成功！！请查看变化")

    def pb_item_changed_by_double_clicked_slot(self, item):
        """双击后修改旁白文本
@@ -2418,4 +2615,17 @@ class MainWindow(QMainWindow, Ui_MainWindow):
            str(round(video_position/1000, 2)))
        self.import_excel_dialog.show_with_msg("定位成功：" + self.aside_head_time)

-    
\ No newline at end of file
+    def confirm_ocr(self):
+        if self.rate == None:
+            self.prompt_dialog.show_with_msg("请选择字幕上边界范围")
+            return
+        if self.rate_bottom == None:
+            self.prompt_dialog.show_with_msg("请选择字幕下边界范围")
+            return
+        if not self.check_ocr_rate():
+            self.prompt_dialog.show_with_msg("字幕上边界不能低于下边界")
+            return
+        
+        self.ocr_ranges.append([self.rate, self.rate_bottom])
+        
+        self.prompt_dialog.show_with_msg(f"操作成功，如果电影存在多行字幕，请移动字幕上下边界，再次点击该按钮确认,目前已存在{len(self.ocr_ranges)}组字幕边界")
\ No newline at end of file
--- a/main_window_ui.py
+++ b/main_window_ui.py
@@ -18,6 +18,9 @@ class MyWidget(QWidget):
    # def __init__(self, parent=None):
    #     super(QWidget, self).__init__(parent)
    #     self.painter_flag = True
+    def __init__(self, parent=None, color = Qt.red):
+        super(QWidget, self).__init__(parent)
+        self.color = color
    
    def paintEvent(self, event):
        # print(">>>>>>>>into paint")
@@ -26,7 +29,7 @@ class MyWidget(QWidget):
        lock.acquire()
        painter = QPainter(self)
        painter.setRenderHint(QPainter.Antialiasing)  # Optional: Enable anti-aliasing
-        painter.setPen(QPen(Qt.red, 2, Qt.SolidLine))
+        painter.setPen(QPen(self.color, 2, Qt.SolidLine))
        painter.drawLine(0, 1, 800, 1)
        painter.end()
        lock.release()
@@ -55,8 +58,17 @@ class MyWidget(QWidget):
        # painter.setPen(QPen(Qt.red, 2, Qt.SolidLine))
        # painter.drawLine(0, 1, 800, 1)
        # painter.end()
+        print(">>>>>cur_y : " + str(self.y()))
        return self.y()

+    def setY(self, h):
+        print(">>>>>cur_y2 : " + str(self.y()))
+        self.move(0, h)
+
+    def get_h(self):
+        return self.y()
+        
+
    def down(self, mov_len):
        print(">>>>>>>>>>>down" + str(mov_len))
        self.move(0,self.y() + mov_len)
@@ -314,6 +326,8 @@ class Ui_MainWindow(object):
        self.horizontalLayout_7.setObjectName("horizontalLayout_7")
        self.up_ocr_btn = QtWidgets.QPushButton(self.centralwidget)
        self.up_ocr_btn.setObjectName("up_ocr_btn")
+        # self.up_ocr_btn.setAutoRepeatDelay(False)
+        # self.up_ocr_btn.setAutoRepeat
        self.horizontalLayout_7.addWidget(self.up_ocr_btn)
        self.down_ocr_btn = QtWidgets.QPushButton(self.centralwidget)
        self.down_ocr_btn.setObjectName("down_ocr_btn")
@@ -324,9 +338,13 @@ class Ui_MainWindow(object):
        self.down_ocr_bottom_btn = QtWidgets.QPushButton(self.centralwidget)
        self.down_ocr_bottom_btn.setObjectName("down_ocr_bottom_btn")
        self.horizontalLayout_7.addWidget(self.down_ocr_bottom_btn)
+        self.confirm_ocr_btn = QtWidgets.QPushButton(self.centralwidget)
+        self.confirm_ocr_btn.setObjectName("confirm_ocr_btn")
+        self.horizontalLayout_7.addWidget(self.confirm_ocr_btn)
        self.confirm_head_aside_btn = QtWidgets.QPushButton(self.centralwidget)
        self.confirm_head_aside_btn.setObjectName("confirm_head_aside_btn")
        self.horizontalLayout_7.addWidget(self.confirm_head_aside_btn)
+        

        self.horizontalLayout_8 = QtWidgets.QHBoxLayout()
        self.horizontalLayout_8.setObjectName("horizontalLayout_8")
@@ -523,7 +541,8 @@ class Ui_MainWindow(object):
        self.action_redo = QtWidgets.QAction(MainWindow)
        # self.action_redo.setFont(font)
        self.action_redo.setObjectName("action_redo")
-        self.action_3 = QtWidgets.QAction("旁白区间检测",self,triggered=self.show_detect_dialog)
+        # self.action_3 = QtWidgets.QAction("旁白区间检测",self,triggered=self.show_detect_dialog)
+        self.action_3 = QtWidgets.QAction("旁白区间检测",self,triggered=self.show_confirmation_dialog)
        self.action_3.setEnabled(False)
        self.action_4 = QtWidgets.QAction("旁白音频合成",self,triggered=self.show_assemble_dialog)
        self.action_4.setEnabled(False)
@@ -539,7 +558,6 @@ class Ui_MainWindow(object):
        self.action_9.setEnabled(True)
        self.action_10 = QtWidgets.QAction("片头旁白定位",self,triggered=self.confirm_head_aside)
        self.action_10.setEnabled(True)
-
        # self.action_3.setObjectName("action_3")
        # self.action_4 = QtWidgets.QAction(MainWindow)
        # self.action_4.setObjectName("action_4")
@@ -604,6 +622,7 @@ class Ui_MainWindow(object):
        self.up_ocr_bottom_btn.setText(_translate("MainWindow", "字幕下边界上移"))
        self.down_ocr_bottom_btn.setText(_translate("MainWindow", "字幕下边界下移"))
        self.confirm_head_aside_btn.setText(_translate("MainWindow", "片头旁白定位"))
+        self.confirm_ocr_btn.setText(_translate("MainWindow", "字幕边界确认"))
        self.detect_btn.setText(_translate("MainWindow", "旁白区间检测"))
        self.tabWidget.setTabText(self.tabWidget.indexOf(self.all_tab), _translate("MainWindow", "字幕旁白"))
        self.tabWidget.setTabText(self.tabWidget.indexOf(self.zm_tab), _translate("MainWindow", "字幕"))

--- a/management.py
+++ b/management.py
@@ -77,13 +77,14 @@ class OperateRecord:

 # 每一行的具体信息，"起始时间", "终止时间", "字幕", '建议', '解说脚本'
 class Element:
-    def __init__(self, st_time_sec: str, ed_time_sec: str, subtitle, suggest, aside, speed = "1.00(4字/秒)"):
+    def __init__(self, st_time_sec: str, ed_time_sec: str, subtitle, suggest, aside, speed = "1.00(4字/秒)",ocr_h = None):
        self.st_time_sec = st_time_sec
        self.ed_time_sec = ed_time_sec
        self.subtitle = subtitle
        self.suggest = suggest
        self.aside = aside
        self.speed = speed
+        self.ocr_h = ocr_h

    # 判断当前元素是否是字幕
    def is_subtitle(self):
@@ -263,6 +264,11 @@ class ProjectContext:
        if not self.initial_ing: 
            save_excel_to_path(self.all_elements, self.excel_path, self.write_header, self.excel_sheet_name)

+    def refresh_subtitle(self, row, subtitle: str):
+        self.all_elements[int(row)].subtitle = subtitle
+        if not self.initial_ing: 
+            save_excel_to_path(self.all_elements, self.excel_path, self.write_header, self.excel_sheet_name)
+
    def refresh_speed(self, row, speed: str)->None:
        self.all_elements[int(row)].speed = speed
        if not self.initial_ing:
@@ -307,7 +313,7 @@ class ProjectContext:
                if d["终止时间"][i] is None:
                    # 如果是最后一条
                    if i == len(d["字幕"]) - 1:
-                        print(1)
+                        print(">>>>>>>>>load_excel_from_path")
                        # ed_time_sec = "360000" if self.duration == 0 else self.duration # todo 默认最大时长是100h
                    else:
                        ed_time_sec = "%.2f"%(float(d["起始时间"][i + 1]) - 0.01)
@@ -428,6 +434,8 @@ def save_excel_to_path(all_element, new_excel_path, header, excel_sheet_name):
            backup_path = os.path.dirname(new_excel_path) + "/tmp_"+str(time.time())+".xlsx"
            # os.remove(new_excel_path)
            os.rename(new_excel_path, backup_path)
+        # print(">>>>>>new_excel_path:" + new_excel_path)
+        # print(">>>>>>>>>>backup_path:" + backup_path)
        try:
            create_sheet(new_excel_path, "旁白插入位置建议", [header])
            # for element in all_element:

--- a/narratage_detection.py
+++ b/narratage_detection.py
@@ -63,6 +63,13 @@ def detect(video_path: str, start_time: float, end_time: float, book_path: str,
            from detect_with_ocr import detect_with_ocr
            detect_with_ocr(video_path, book_path, start_time, end_time, state, mainWindow)

+def process_err(mainWindow: MainWindow=None):
+    from detect_with_ocr import process_err_ocr
+    try:
+        process_err_ocr(mainWindow)
+    except Exception as e:
+        print("process_err err")
+        print(e)

 if __name__ == '__main__':
    # 定义参数

--- a/ocr_metric.py
+++ b/ocr_metric.py
 import re
+import sys
 import csv
+import jieba
 import argparse
 import pandas as pd
 import numpy as np
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 from difflib import SequenceMatcher
-title = ['起始时间（转换后）', '终止时间（转换后）', '字幕']
+from tqdm import tqdm
+# title = ['起始时间（转换后）', '终止时间（转换后）', '字幕']
+title = ['起始时间', '终止时间', '字幕']
+

 def init():
    # 获取中文停用词列表
@@ -22,13 +27,32 @@ def change_to_second(time_str):
              time_obj.second + time_obj.microsecond / 1000000
    return seconds

+# 将中文句子划分，并且防止划分全部为停用词
+def words_segment(str):
+    tmp = ','.join(jieba.cut(str))
+    # 将分割的句子差分成单词，也不进行划分
+    if is_all_stopwords(tmp) or len(list(jieba.cut(str))) == len(str) : 
+        return str
+    return tmp
+
+# 判断是否从中英文字幕中提取中文
+def extract_info(str, has_english=False):
+    if not has_english:
+        return str
+    chinese_text = re.findall(r'[\u4e00-\u9fff]+', str)
+    return ' '.join(chinese_text)
+
 # 计算字幕的相似度
 def calculate_similarity(str1, str2, method='cosine'):
    if method == 'cosine':
-        tfidf_vectorizer = TfidfVectorizer()
-        tfidf_matrix = tfidf_vectorizer.fit_transform([str1, str2])
+        str1, str2 = words_segment(str1), words_segment(str2)
+        tfidf_vectorizer = TfidfVectorizer(min_df=1)
+        tfidf_matrix = tfidf_vectorizer.fit_transform([str1, str2]) # shape=[2, N]
+        # print(np.array(tfidf_matrix.toarray()).shape, type(tfidf_matrix), tfidf_matrix.toarray())
        similarity_matrix = cosine_similarity(tfidf_matrix)
        return similarity_matrix[0][1]
+    elif method == 'distance':
+        return -String_edit_distance(str1, str2)
    else :
        return SequenceMatcher(None, str1, str2).ratio()

@@ -37,23 +61,62 @@ def calculate_time_difference(time1, time2):
    return abs(time2 - time1)

 def calculate_weight(x, y):
-    # weight = e^(-alpha * time_diff)
-    # 相差1s的系数为0.9
-    alpha = 0.11
-    return 1 / (alpha * (x + y) + 1)
+    # # weight = e^(-alpha * time_diff)
+    # # 相差1s的系数为0.9
+    # alpha = 0.11
+    # return 1 / (alpha * (x + y) + 1)
+
+    return 1.0  # 目前不考虑时间系数

 # 检查句子中的每个单词是否都是停用词
 def is_all_stopwords(sentence):
+    sentence = sentence.replace(' ', '')
    return all(word in stop_words for word in sentence)

+# 编辑距离算法    有问题！！！！！！
+def String_edit_distance(str1, str2):
+    n, m = len(str1), len(str2)
+    dp = [[0 for _ in range(m+1)] for _ in range(n+1)]
+    for i in range(n+1):
+        dp[i][0] = i
+    for i in range(m+1):
+        dp[0][i] = i
+    dp[0][0] = 0
+    for i in range(1, n+1):
+        for j in range(1, m+1):
+            if str1[i-1] == str2[j-1]:
+                dp[i][j] = dp[i-1][j-1]
+            else :
+                dp[i][j] = min(dp[i-1][j-1], min(dp[i][j-1], dp[i-1][j])) + 1
+    # print(dp[n][m], n, m)
+    return 1.0 * dp[n][m] / max(n, m)
+
+
 ### 如果其中有-符号，可能在用excel打开时自动添加=变成公式，读取的时候没问题
 def read_srt_to_csv(path_srt, path_output):
-    with open(path_srt, 'r', encoding='utf-8-sig') as f:
-        srt_content = f.read()  # str
+    try:
+        with open(path_srt, 'r', encoding='utf-8-sig') as f:
+            srt_content = f.read()  # str
+    except UnicodeDecodeError:
+        print(f"编码错误，已经切换到utf-16编码")
+        try:
+            with open(path_srt, 'r', encoding='utf-16') as f:
+                srt_content = f.read()  # str
+        except:
+            print(f"请选择utf-8或utf-16编码形式的srt文件")
+            sys.exit(1)
+    
    # 使用正则表达式匹配时间码和字幕内容
    pattern = re.compile(r'(\d+)\n([\d:,]+) --> ([\d:,]+)\n(.+?)(?=\n\d+\n|$)', re.DOTALL)
    matches = pattern.findall(srt_content)

+    has_english = []
+    for i in range(5):
+        idx = np.random.randint(len(matches))
+        pattern = re.compile(r'[a-zA-Z]')
+        has_english.append(bool(pattern.search(matches[idx][3])))
+    has_english = all(has_english)
+    print('!'*20, has_english)
    # 写入 csv 文件
    with open(path_output, 'w', newline='', encoding='utf-8') as f:
        csv_writer = csv.writer(f)
@@ -61,7 +124,7 @@ def read_srt_to_csv(path_srt, path_output):

        for _, start, end, subtitle in matches:  # 都是str格式
            subtitle = re.sub(r'\{[^}]*\}', '', subtitle)  # 将srt文件前的加粗等格式去掉
-            csv_writer.writerow([start, end, subtitle.strip()])
+            csv_writer.writerow([start, end, extract_info(subtitle.strip(), has_english)])

 def read_from_xlsx(path_xlsx='output.xlsx', path_output='deal.csv'):
    data = pd.read_excel(path_xlsx)
@@ -70,20 +133,19 @@ def read_from_xlsx(path_xlsx='output.xlsx', path_output='deal.csv'):
        csv_writer.writerow(title)

        for _, data1 in data.iterrows():
-            start, end, subtitle = data1[1], data1[3], data1[4]
+            # print(data1[1])
+            start, end, subtitle = data1[0], data1[1], data1[2]
            if isinstance(subtitle, float) and np.isnan(subtitle):
                continue
            # 与srt文件格式同步
            start = start.replace('.', ',')
            end = end.replace('.', ',')
-            # print(start, end, subtitle,)
-            # print(type(start), type(end), type(subtitle))
            csv_writer.writerow([start, end, subtitle.strip()])

 ### 对于srt中的字幕计算相似性度。从ocr中找到时间戳满足<=time_t的字幕，
 ### 然后计算字幕间的相似度，取一个最大的。字幕从start和end都匹配一遍
 # time_threshold设置阈值，用于判断时间差是否可接受
-def measure_score(path_srt, path_ocr, time_threshold=5.0, method='cosine'):
+def measure_score(path_srt, path_ocr, time_threshold=5.0, time_threshold_re=False, method='cosine'):
    data_srt, data_ocr = [], []
    with open(path_srt, 'r', encoding='utf-8') as file:
        csv_reader = csv.reader(file)
@@ -103,22 +165,36 @@ def measure_score(path_srt, path_ocr, time_threshold=5.0, method='cosine'):
    # 计算相似度
    total_similarity = 0.0
    total_weight = 0.0
-
-    for sub in data_srt:
-        max_similarity = 0.0
+    txt1 = []
+    for sub in tqdm(data_srt, desc="Processing", ncols=100):
+        max_similarity = 0.0 if method != 'distance' else -1.0
        # 去除srt中的停用词
        if is_all_stopwords(sub[2]):
            continue
+        subb = ""
        for sub1 in data_ocr:
            x, y = abs(sub[0] - sub1[0]), abs(sub[1] - sub1[1])
-            if min(x, y) <= time_threshold:
-                # print(sub[2], sub1[2])
-                score = calculate_similarity(sub[2], sub1[2], 'cosine')
+            if time_threshold_re:
+                time_threshold_tmp = time_threshold
+            else :
+                time_threshold_tmp = (sub[1] - sub[0]) * 0.3 # 10s允许3s的误差
+            if min(x, y) <= time_threshold_tmp:
+                score = calculate_similarity(sub[2], sub1[2], method)
+                if max_similarity <= score * calculate_weight(x, y):
+                    subb = sub1[2]
                max_similarity = max(max_similarity, score * calculate_weight(x, y))
+        if max_similarity <= -0.5:
+            # print(max_similarity, sub[2], subb, sub[0])
+            txt1.append(' !!! '.join([str(max_similarity), sub[2], subb, str(sub[0])]))
        total_similarity += max_similarity
        total_weight += 1
+    if method == 'distance':
+        total_similarity = total_weight + total_similarity

-    # print(total_similarity, total_similarity / len(data_srt), total_similarity / total_weight)
+    with open('movie_pro.txt', 'w', encoding='utf-8') as f:
+        for i in txt1:
+            f.write(i + '\n')
+    
    return total_similarity / len(data_srt), total_similarity / total_weight

 if __name__ == '__main__':
@@ -128,13 +204,23 @@ if __name__ == '__main__':
    # 添加命令行参数
    parser.add_argument("--path_srt", required=True, type=str, help="Path of srt file, format is srt")
    parser.add_argument("--path_ocr", required=True, type=str, help="Path of ocr file, format is xlsx")
-    parser.add_argument("--method", type=str, default='cosine', help="Select evaluation method")
-    parser.add_argument("--time_threshold", type=float,default=5.0, help="Allowable time frame")
+    parser.add_argument("--time_threshold", type=float, default=5.0, help="Allowable time frame")
+
+    parser.add_argument("--method", type=str, default='distance',choices=['cosine', 'distance', 'sequence']
+                        , help="Select evaluation method")
+    parser.add_argument("--time_threshold_re", type=bool, default=True, help="Specify whether \
+                        time threshold is required")
+    
    args = parser.parse_args()

    output_file_srt = 'deal_srt.csv'
    output_file_ocr = 'deal_ocr.csv'
    read_srt_to_csv(args.path_srt, output_file_srt)
    read_from_xlsx(args.path_ocr, output_file_ocr)
-    score = measure_score(output_file_srt, output_file_ocr, args.time_threshold, args.method)
-    print(f'该评估算法得分: {score[1]:.5f}')
\ No newline at end of file
+    score = measure_score(output_file_srt, output_file_ocr, args.time_threshold, \
+                          args.time_threshold_re, args.method)
+    print(f'该评估算法得分: {100 * score[1]:.3f}')
+
+
+
+# python ocr_metric.py --path_srt test/new/movie_1.srt --path_ocr ../测试/the-swan-v3/The.Swan-zimu.xlsx  --time_threshold 10
\ No newline at end of file