dev ocr

b229c891 · cxy · eeb2d83a · b229c891 · b229c891 · b229c891
Commit b229c891 authored Aug 11, 2023 by cxy
Hide whitespace changes
Inline Side-by-side

Showing with 36 additions and 13 deletions

detect_with_ocr.py detect_with_ocr.py +7 -6

main_window.py main_window.py +13 -2

main_window_ui.py main_window_ui.py +13 -5

myVideoWidget.py myVideoWidget.py +3 -0

No files found.
--- a/detect_with_ocr.py
+++ b/detect_with_ocr.py
@@ -44,7 +44,7 @@ ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, cl
 normal_speed = 4


-def get_position(video_path: str, start_time: float, rate: float) -> Tuple[float, float]:
+def get_position(video_path: str, start_time: float, rate: float, rate_bottom: float) -> Tuple[float, float]:
    # return (885.0, 989.0)
    """根据对视频中的画面进行分析，确定字幕的位置，以便后续的字幕识别

@@ -70,11 +70,13 @@ def get_position(video_path: str, start_time: float, rate: float) -> Tuple[float
    print(cv2.CAP_PROP_FRAME_HEIGHT)
    print(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    up = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate))
+    down = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate_bottom))
+
    # down = up + 20
    # down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73)
    print(up)
    # print(down)
-    return int(up), int(up + video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.2)
+    return int(up), int(down)
    # while True:
    #     _, img = video.read()
    #     # print("img:", img)
@@ -205,7 +207,7 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
    # up_b = 276
    # down_b = 297
    height = down_b - up_b
-    img = img[int(up_b - height * 0.7):int(down_b + height * 0.7)]
+    img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
   
    # 针对低帧率的视频做图像放大处理
    print(">>>>>>>>>>>>>>>>>>>>>img shape")
@@ -244,8 +246,7 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
        # conf_thred2 = 0.4
        # conf_thred1 = 0.5
        # conf_thred2 = 0.7
-        if (rect[0][1] + rect[1][1])/2/img.shape[0] <= 0.1:
-            print('drop  res ', x)
+        if (rect[0][1] + rect[1][1])/2/img.shape[0] > 0.5 or (rect[0][1] + rect[1][1])/2/img.shape[0] <= 0.1:
            continue
        if confidence > conf_thred1 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6:
            subTitle += txt
@@ -436,7 +437,7 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
        up_b, down_b = context.caption_boundings[0], context.caption_boundings[1]
    else:
        # 此处start_time + 300是为了节省用户调整视频开始时间的功夫（强行跳过前5分钟）
-        up_b, down_b = get_position(video_path, 0, mainWindow.rate)
+        up_b, down_b = get_position(video_path, 0, mainWindow.rate, mainWindow.rate_bottom)
        context.caption_boundings = [up_b, down_b]

    context.detected = True

--- a/main_window.py
+++ b/main_window.py
@@ -461,7 +461,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
    def up_ocr(self):
        h = self.widget.up(3)
        video_h = self.wgt_video.height()
-        self.rate = float(h)/float(video_h)
+        self.rate = float(h-10)/float(video_h)
        print(">>>>>up h:" + str(h))
        print(self.wgt_video.height())
        print(">>>>>>>>>rate" + str(self.rate))
@@ -470,11 +470,22 @@ class MainWindow(QMainWindow, Ui_MainWindow):
    def down_ocr(self):
        h = self.widget.down(3)
        video_h = self.wgt_video.height()
-        self.rate = float(h - 6)/float(video_h)
+        self.rate = float(h-10)/float(video_h)
        print(">>>>>down h:" + str(h))
        print(self.wgt_video.height())
        print(">>>>>>>>>rate" + str(self.rate))

+    def up_ocr_bottom(self):
+        h = self.widget_bottom.up(3)
+        video_h = self.wgt_video.height()
+        self.rate_bottom = float(h-6)/float(video_h)
+
+
+    def down_ocr_bottom(self):
+        h = self.widget_bottom.down(3)
+        video_h = self.wgt_video.height()
+        self.rate_bottom = float(h-6)/float(video_h)
+


    #导入旁白excel

--- a/main_window_ui.py
+++ b/main_window_ui.py
@@ -58,10 +58,12 @@ class Ui_MainWindow(object):
        self.verticalLayout_2 = QtWidgets.QVBoxLayout()
        self.verticalLayout_2.setObjectName("verticalLayout_2")
        self.wgt_video = myVideoWidget(self.centralwidget)
-        self.wgt_video.setMinimumSize(QtCore.QSize(410, 200))
-        self.wgt_video.setMaximumSize(QtCore.QSize(16777215, 16777215))
+        # self.wgt_video.setMinimumSize(QtCore.QSize(410, 200))
+        # self.wgt_video.setMaximumSize(QtCore.QSize(16777215, 16777215))
        self.widget = MyWidget(self.centralwidget)
-        self.widget.setGeometry(0,250,800,3)
+        self.widget.setGeometry(0,350,800,3)
+        self.widget_bottom = MyWidget(self.centralwidget)
+        self.widget_bottom.setGeometry(0,360,800,3)
        palette = QtGui.QPalette()
        brush = QtGui.QBrush(QtGui.QColor(0, 0, 0))
        brush.setStyle(QtCore.Qt.SolidPattern)
@@ -395,10 +397,14 @@ class Ui_MainWindow(object):
        self.action_4.setEnabled(False)
        self.action_5 = QtWidgets.QAction("旁白导入",self,triggered=self.import_excel)
        self.action_5.setEnabled(False)
-        self.action_6 = QtWidgets.QAction("OCR++",self,triggered=self.up_ocr)
+        self.action_6 = QtWidgets.QAction("字幕上边界++",self,triggered=self.up_ocr)
        self.action_6.setEnabled(True)
-        self.action_7 = QtWidgets.QAction("OCR--",self,triggered=self.down_ocr)
+        self.action_7 = QtWidgets.QAction("字幕上边界--",self,triggered=self.down_ocr)
        self.action_7.setEnabled(True)
+        self.action_8 = QtWidgets.QAction("字幕下边界++",self,triggered=self.up_ocr_bottom)
+        self.action_8.setEnabled(True)
+        self.action_9 = QtWidgets.QAction("字幕下边界--",self,triggered=self.down_ocr_bottom)
+        self.action_9.setEnabled(True)

        # self.action_3.setObjectName("action_3")
        # self.action_4 = QtWidgets.QAction(MainWindow)
@@ -439,6 +445,8 @@ class Ui_MainWindow(object):
        self.menubar.addAction(self.action_5)
        self.menubar.addAction(self.action_6)
        self.menubar.addAction(self.action_7)
+        self.menubar.addAction(self.action_8)
+        self.menubar.addAction(self.action_9)
        # self.menubar.addAction(self.menu_5.menuAction())
        # self.menubar.addAction(self.menu_6.menuAction())
        # self.menubar.addAction(self.menu_3.menuAction())

--- a/myVideoWidget.py
+++ b/myVideoWidget.py
 from PyQt5.QtMultimediaWidgets import QVideoWidget
 from PyQt5.QtCore import *
+from PyQt5.QtMultimedia import QMediaPlayer


 class myVideoWidget(QVideoWidget):
@@ -7,6 +8,8 @@ class myVideoWidget(QVideoWidget):

    def __init__(self, parent=None):
        super(QVideoWidget, self).__init__(parent)
+        self.setAspectRatioMode(Qt.IgnoreAspectRatio)
+


    def mouseDoubleClickEvent(self, QMouseEvent):     #双击事件