Commit b229c891 authored by cxy's avatar cxy

dev ocr

parent eeb2d83a
......@@ -44,7 +44,7 @@ ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, cl
normal_speed = 4
def get_position(video_path: str, start_time: float, rate: float) -> Tuple[float, float]:
def get_position(video_path: str, start_time: float, rate: float, rate_bottom: float) -> Tuple[float, float]:
# return (885.0, 989.0)
"""根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
......@@ -70,11 +70,13 @@ def get_position(video_path: str, start_time: float, rate: float) -> Tuple[float
print(cv2.CAP_PROP_FRAME_HEIGHT)
print(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
up = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate))
down = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate_bottom))
# down = up + 20
# down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73)
print(up)
# print(down)
return int(up), int(up + video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.2)
return int(up), int(down)
# while True:
# _, img = video.read()
# # print("img:", img)
......@@ -205,7 +207,7 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
# up_b = 276
# down_b = 297
height = down_b - up_b
img = img[int(up_b - height * 0.7):int(down_b + height * 0.7)]
img = img[int(up_b - height*0.7):int(down_b + height*0.7)]
# 针对低帧率的视频做图像放大处理
print(">>>>>>>>>>>>>>>>>>>>>img shape")
......@@ -244,8 +246,7 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
# conf_thred2 = 0.4
# conf_thred1 = 0.5
# conf_thred2 = 0.7
if (rect[0][1] + rect[1][1])/2/img.shape[0] <= 0.1:
print('drop res ', x)
if (rect[0][1] + rect[1][1])/2/img.shape[0] > 0.5 or (rect[0][1] + rect[1][1])/2/img.shape[0] <= 0.1:
continue
if confidence > conf_thred1 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6:
subTitle += txt
......@@ -436,7 +437,7 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
up_b, down_b = context.caption_boundings[0], context.caption_boundings[1]
else:
# 此处start_time + 300是为了节省用户调整视频开始时间的功夫(强行跳过前5分钟)
up_b, down_b = get_position(video_path, 0, mainWindow.rate)
up_b, down_b = get_position(video_path, 0, mainWindow.rate, mainWindow.rate_bottom)
context.caption_boundings = [up_b, down_b]
context.detected = True
......
......@@ -461,7 +461,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
def up_ocr(self):
h = self.widget.up(3)
video_h = self.wgt_video.height()
self.rate = float(h)/float(video_h)
self.rate = float(h-10)/float(video_h)
print(">>>>>up h:" + str(h))
print(self.wgt_video.height())
print(">>>>>>>>>rate" + str(self.rate))
......@@ -470,11 +470,22 @@ class MainWindow(QMainWindow, Ui_MainWindow):
def down_ocr(self):
h = self.widget.down(3)
video_h = self.wgt_video.height()
self.rate = float(h - 6)/float(video_h)
self.rate = float(h-10)/float(video_h)
print(">>>>>down h:" + str(h))
print(self.wgt_video.height())
print(">>>>>>>>>rate" + str(self.rate))
def up_ocr_bottom(self):
h = self.widget_bottom.up(3)
video_h = self.wgt_video.height()
self.rate_bottom = float(h-6)/float(video_h)
def down_ocr_bottom(self):
h = self.widget_bottom.down(3)
video_h = self.wgt_video.height()
self.rate_bottom = float(h-6)/float(video_h)
#导入旁白excel
......
......@@ -58,10 +58,12 @@ class Ui_MainWindow(object):
self.verticalLayout_2 = QtWidgets.QVBoxLayout()
self.verticalLayout_2.setObjectName("verticalLayout_2")
self.wgt_video = myVideoWidget(self.centralwidget)
self.wgt_video.setMinimumSize(QtCore.QSize(410, 200))
self.wgt_video.setMaximumSize(QtCore.QSize(16777215, 16777215))
# self.wgt_video.setMinimumSize(QtCore.QSize(410, 200))
# self.wgt_video.setMaximumSize(QtCore.QSize(16777215, 16777215))
self.widget = MyWidget(self.centralwidget)
self.widget.setGeometry(0,250,800,3)
self.widget.setGeometry(0,350,800,3)
self.widget_bottom = MyWidget(self.centralwidget)
self.widget_bottom.setGeometry(0,360,800,3)
palette = QtGui.QPalette()
brush = QtGui.QBrush(QtGui.QColor(0, 0, 0))
brush.setStyle(QtCore.Qt.SolidPattern)
......@@ -395,10 +397,14 @@ class Ui_MainWindow(object):
self.action_4.setEnabled(False)
self.action_5 = QtWidgets.QAction("旁白导入",self,triggered=self.import_excel)
self.action_5.setEnabled(False)
self.action_6 = QtWidgets.QAction("OCR++",self,triggered=self.up_ocr)
self.action_6 = QtWidgets.QAction("字幕上边界++",self,triggered=self.up_ocr)
self.action_6.setEnabled(True)
self.action_7 = QtWidgets.QAction("OCR--",self,triggered=self.down_ocr)
self.action_7 = QtWidgets.QAction("字幕上边界--",self,triggered=self.down_ocr)
self.action_7.setEnabled(True)
self.action_8 = QtWidgets.QAction("字幕下边界++",self,triggered=self.up_ocr_bottom)
self.action_8.setEnabled(True)
self.action_9 = QtWidgets.QAction("字幕下边界--",self,triggered=self.down_ocr_bottom)
self.action_9.setEnabled(True)
# self.action_3.setObjectName("action_3")
# self.action_4 = QtWidgets.QAction(MainWindow)
......@@ -439,6 +445,8 @@ class Ui_MainWindow(object):
self.menubar.addAction(self.action_5)
self.menubar.addAction(self.action_6)
self.menubar.addAction(self.action_7)
self.menubar.addAction(self.action_8)
self.menubar.addAction(self.action_9)
# self.menubar.addAction(self.menu_5.menuAction())
# self.menubar.addAction(self.menu_6.menuAction())
# self.menubar.addAction(self.menu_3.menuAction())
......
from PyQt5.QtMultimediaWidgets import QVideoWidget
from PyQt5.QtCore import *
from PyQt5.QtMultimedia import QMediaPlayer
class myVideoWidget(QVideoWidget):
......@@ -7,6 +8,8 @@ class myVideoWidget(QVideoWidget):
def __init__(self, parent=None):
super(QVideoWidget, self).__init__(parent)
self.setAspectRatioMode(Qt.IgnoreAspectRatio)
def mouseDoubleClickEvent(self, QMouseEvent): #双击事件
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment