dev ocr

eeb2d83a · cxy · cdc541fc · eeb2d83a · eeb2d83a · eeb2d83a
Commit eeb2d83a authored Aug 10, 2023 by cxy
Hide whitespace changes
Inline Side-by-side

Showing with 29 additions and 8 deletions

detect_with_ocr.py detect_with_ocr.py +23 -4

main_window.py main_window.py +5 -3

main_window_ui.py main_window_ui.py +1 -1

No files found.
--- a/detect_with_ocr.py
+++ b/detect_with_ocr.py
@@ -68,12 +68,13 @@ def get_position(video_path: str, start_time: float, rate: float) -> Tuple[float
    # height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
    print(">>>>>>>>>>>>video height")
    print(cv2.CAP_PROP_FRAME_HEIGHT)
+    print(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    up = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate))
    # down = up + 20
    # down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73)
    print(up)
    # print(down)
-    return int(up), int(up + 20)
+    return int(up), int(up + video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.2)
    # while True:
    #     _, img = video.read()
    #     # print("img:", img)
@@ -201,8 +202,11 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
        Tuple[Union[str, None]]: 字幕信息(没有字幕时返回None)和置信度
    """
    subTitle = ''
+    # up_b = 276
+    # down_b = 297
    height = down_b - up_b
-    img = img[int(up_b - height * 0.7):int(down_b)]
+    img = img[int(up_b - height * 0.7):int(down_b + height * 0.7)]
+   
    # 针对低帧率的视频做图像放大处理
    print(">>>>>>>>>>>>>>>>>>>>>img shape")
    print(height)
@@ -211,14 +215,16 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
    print(img.shape)
    if img.shape[1] < 1000:
        img = cv2.resize(img, (int(img.shape[1] * 1.5), int(img.shape[0] * 1.5)))
+    cv2.imwrite('./cap.png', img)
    res = ocr.ocr(img, cls=True)
+    print('--------> res', res)
    sorted(res, key=lambda text: text[0][0][1])
    sorted(res, key=lambda text: text[0][0][0])
    if len(res) == 0:
        return None, 0
    possible_txt = []
    conf = 0
-    print(res)
+    print('res --------->', res)
    for x in res:
        # cv2.imshow("cut", img)
        # cv2.waitKey(0)
@@ -238,7 +244,8 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
        # conf_thred2 = 0.4
        # conf_thred1 = 0.5
        # conf_thred2 = 0.7
-        if (rect[0][1] + rect[1][1])/2/img.shape[0] > 0.5 or (rect[0][1] + rect[1][1])/2/img.shape[0] <= 0.1:
+        if (rect[0][1] + rect[1][1])/2/img.shape[0] <= 0.1:
+            print('drop  res ', x)
            continue
        if confidence > conf_thred1 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6:
            subTitle += txt
@@ -325,15 +332,24 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
            mainWindow.projectContext.last_time = cur_time

            subTitle, conf = detect_subtitle(frame)
+            print(">>>>>>>>>>>>111111111")
            if subTitle is not None:
+                print(">>>>>>>>>>>>111111111  2222222")
+
                subTitle = normalize(subTitle)
                if len(subTitle) == 0:
+                    print(">>>>>>>>>>>>111111111  3333333")
                    subTitle = None
+            print(">>>>>>>>>>>>222222222")
+
            # 第一次找到字幕
            if lastSubTitle is None and subTitle is not None:
                start_time = cur_time
+                print(">>>>>>>>>>>>333333333")
+
            # 字幕消失
            elif lastSubTitle is not None and subTitle is None:
+                print(">>>>>>>>>>>>4444444444")
                end_time = cur_time
                res.append([start_time, end_time, lastSubTitle])
                if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
@@ -347,8 +363,10 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
                # write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
                add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''])
            elif lastSubTitle is not None and subTitle is not None:
+                print(">>>>>>>>>>>>5555555555")
                # 两句话连在一起，但是两句话不一样
                if string_similar(lastSubTitle, subTitle) < 0.7:
+                    print(">>>>>>>>>>>66666666666")
                    end_time = cur_time
                    res.append([start_time, end_time, lastSubTitle])
                    if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
@@ -362,6 +380,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
                    add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''])
                    start_time = end_time
                else:
+                    print(">>>>>>>>>>>>777777777")
                    lastSubTitle = subTitle if conf > lastConf else lastSubTitle
                    continue
            # 当前字幕与上一段字幕不一样

--- a/main_window.py
+++ b/main_window.py
@@ -299,6 +299,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        self.projectContext.Init(project_path)
        self.update_ui()

+        self.rate = 0
+
        # 打印到log文件中
        t = RunThread(funcName=make_print_to_file, args=os.path.join(os.getcwd(), 'log'), name="logging")
        print(t)
@@ -457,7 +459,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        self.insert_aside_from_now_btn.setEnabled(True)

    def up_ocr(self):
-        h = self.widget.up(5)
+        h = self.widget.up(3)
        video_h = self.wgt_video.height()
        self.rate = float(h)/float(video_h)
        print(">>>>>up h:" + str(h))
@@ -466,9 +468,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):


    def down_ocr(self):
-        h = self.widget.down(5)
+        h = self.widget.down(3)
        video_h = self.wgt_video.height()
-        self.rate = float(h)/float(video_h)
+        self.rate = float(h - 6)/float(video_h)
        print(">>>>>down h:" + str(h))
        print(self.wgt_video.height())
        print(">>>>>>>>>rate" + str(self.rate))

--- a/main_window_ui.py
+++ b/main_window_ui.py
@@ -61,7 +61,7 @@ class Ui_MainWindow(object):
        self.wgt_video.setMinimumSize(QtCore.QSize(410, 200))
        self.wgt_video.setMaximumSize(QtCore.QSize(16777215, 16777215))
        self.widget = MyWidget(self.centralwidget)
-        self.widget.setGeometry(0,150,800,3)
+        self.widget.setGeometry(0,250,800,3)
        palette = QtGui.QPalette()
        brush = QtGui.QBrush(QtGui.QColor(0, 0, 0))
        brush.setStyle(QtCore.Qt.SolidPattern)