Commit eeb2d83a authored by cxy's avatar cxy

dev ocr

parent cdc541fc
......@@ -68,12 +68,13 @@ def get_position(video_path: str, start_time: float, rate: float) -> Tuple[float
# height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
print(">>>>>>>>>>>>video height")
print(cv2.CAP_PROP_FRAME_HEIGHT)
print(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
up = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate))
# down = up + 20
# down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73)
print(up)
# print(down)
return int(up), int(up + 20)
return int(up), int(up + video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.2)
# while True:
# _, img = video.read()
# # print("img:", img)
......@@ -201,8 +202,11 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
Tuple[Union[str, None]]: 字幕信息(没有字幕时返回None)和置信度
"""
subTitle = ''
# up_b = 276
# down_b = 297
height = down_b - up_b
img = img[int(up_b - height * 0.7):int(down_b)]
img = img[int(up_b - height * 0.7):int(down_b + height * 0.7)]
# 针对低帧率的视频做图像放大处理
print(">>>>>>>>>>>>>>>>>>>>>img shape")
print(height)
......@@ -211,14 +215,16 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
print(img.shape)
if img.shape[1] < 1000:
img = cv2.resize(img, (int(img.shape[1] * 1.5), int(img.shape[0] * 1.5)))
cv2.imwrite('./cap.png', img)
res = ocr.ocr(img, cls=True)
print('--------> res', res)
sorted(res, key=lambda text: text[0][0][1])
sorted(res, key=lambda text: text[0][0][0])
if len(res) == 0:
return None, 0
possible_txt = []
conf = 0
print(res)
print('res --------->', res)
for x in res:
# cv2.imshow("cut", img)
# cv2.waitKey(0)
......@@ -238,7 +244,8 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
# conf_thred2 = 0.4
# conf_thred1 = 0.5
# conf_thred2 = 0.7
if (rect[0][1] + rect[1][1])/2/img.shape[0] > 0.5 or (rect[0][1] + rect[1][1])/2/img.shape[0] <= 0.1:
if (rect[0][1] + rect[1][1])/2/img.shape[0] <= 0.1:
print('drop res ', x)
continue
if confidence > conf_thred1 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6:
subTitle += txt
......@@ -325,15 +332,24 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
mainWindow.projectContext.last_time = cur_time
subTitle, conf = detect_subtitle(frame)
print(">>>>>>>>>>>>111111111")
if subTitle is not None:
print(">>>>>>>>>>>>111111111 2222222")
subTitle = normalize(subTitle)
if len(subTitle) == 0:
print(">>>>>>>>>>>>111111111 3333333")
subTitle = None
print(">>>>>>>>>>>>222222222")
# 第一次找到字幕
if lastSubTitle is None and subTitle is not None:
start_time = cur_time
print(">>>>>>>>>>>>333333333")
# 字幕消失
elif lastSubTitle is not None and subTitle is None:
print(">>>>>>>>>>>>4444444444")
end_time = cur_time
res.append([start_time, end_time, lastSubTitle])
if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
......@@ -347,8 +363,10 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
# write_to_sheet(book_path, sheet_name, [round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''])
elif lastSubTitle is not None and subTitle is not None:
print(">>>>>>>>>>>>5555555555")
# 两句话连在一起,但是两句话不一样
if string_similar(lastSubTitle, subTitle) < 0.7:
print(">>>>>>>>>>>66666666666")
end_time = cur_time
res.append([start_time, end_time, lastSubTitle])
if (len(res) == 1 and res[-1][0] - last_time >= 1) or (len(res) > 1 and res[-1][0] - res[-2][1]) >= 1:
......@@ -362,6 +380,7 @@ def process_video(video_path: str, begin: float, end: float, book_path: str, she
add_to_list(mainWindow, "字幕", [round(start_time, 3), round(end_time, 3), lastSubTitle, ''])
start_time = end_time
else:
print(">>>>>>>>>>>>777777777")
lastSubTitle = subTitle if conf > lastConf else lastSubTitle
continue
# 当前字幕与上一段字幕不一样
......
......@@ -299,6 +299,8 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.projectContext.Init(project_path)
self.update_ui()
self.rate = 0
# 打印到log文件中
t = RunThread(funcName=make_print_to_file, args=os.path.join(os.getcwd(), 'log'), name="logging")
print(t)
......@@ -457,7 +459,7 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.insert_aside_from_now_btn.setEnabled(True)
def up_ocr(self):
h = self.widget.up(5)
h = self.widget.up(3)
video_h = self.wgt_video.height()
self.rate = float(h)/float(video_h)
print(">>>>>up h:" + str(h))
......@@ -466,9 +468,9 @@ class MainWindow(QMainWindow, Ui_MainWindow):
def down_ocr(self):
h = self.widget.down(5)
h = self.widget.down(3)
video_h = self.wgt_video.height()
self.rate = float(h)/float(video_h)
self.rate = float(h - 6)/float(video_h)
print(">>>>>down h:" + str(h))
print(self.wgt_video.height())
print(">>>>>>>>>rate" + str(self.rate))
......
......@@ -61,7 +61,7 @@ class Ui_MainWindow(object):
self.wgt_video.setMinimumSize(QtCore.QSize(410, 200))
self.wgt_video.setMaximumSize(QtCore.QSize(16777215, 16777215))
self.widget = MyWidget(self.centralwidget)
self.widget.setGeometry(0,150,800,3)
self.widget.setGeometry(0,250,800,3)
palette = QtGui.QPalette()
brush = QtGui.QBrush(QtGui.QColor(0, 0, 0))
brush.setStyle(QtCore.Qt.SolidPattern)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment